Spaces:

IndraneelKumar
/

Search_Engine

Sleeping

App Files Files Community

IndraneelKumar commited on Nov 9, 2025

Commit

266d7bc

0 Parent(s):

Initial search engine commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +64 -0
.env.example +58 -0
.github/workflows/cd.yml +32 -0
.github/workflows/ci.yml +78 -0
.gitignore +219 -0
.pre-commit-config.yaml +67 -0
.prefectignore +41 -0
.python-version +1 -0
.vscode/settings.json +7 -0
Dockerfile +65 -0
Makefile +200 -0
README.md +74 -0
cloudbuild_fastapi.yaml +12 -0
deploy_fastapi.sh +105 -0
frontend/__init__.py +0 -0
frontend/app.py +560 -0
prefect-cloud.yaml +52 -0
prefect-local.yaml +53 -0
pyproject.toml +174 -0
requirements.txt +23 -0
src/__init__.py +0 -0
src/api/__init__.py +0 -0
src/api/exceptions/__init__.py +0 -0
src/api/exceptions/exception_handlers.py +97 -0
src/api/main.py +142 -0
src/api/middleware/__init__.py +0 -0
src/api/middleware/logging_middleware.py +73 -0
src/api/models/__init__.py +0 -0
src/api/models/api_models.py +85 -0
src/api/models/provider_models.py +77 -0
src/api/routes/__init__.py +0 -0
src/api/routes/health_routes.py +52 -0
src/api/routes/search_routes.py +123 -0
src/api/services/__init__.py +0 -0
src/api/services/generation_service.py +137 -0
src/api/services/providers/__init__.py +0 -0
src/api/services/providers/huggingface_service.py +64 -0
src/api/services/providers/openai_service.py +181 -0
src/api/services/providers/openrouter_service.py +254 -0
src/api/services/providers/utils/__init__.py +0 -0
src/api/services/providers/utils/evaluation_metrics.py +110 -0
src/api/services/providers/utils/messages.py +18 -0
src/api/services/providers/utils/prompts.py +77 -0
src/api/services/search_service.py +188 -0
src/config.py +202 -0
src/configs/feeds_rss.yaml +91 -0
src/infrastructure/__init__.py +0 -0
src/infrastructure/qdrant/__init__.py +0 -0
src/infrastructure/qdrant/create_collection.py +47 -0
src/infrastructure/qdrant/create_indexes.py +44 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,64 @@

+# Git
+.git
+.gitignore
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+.ruff_cache/
+.mypy_cache/
+# Virtual environments
+venv/
+.venv/
+env/
+ENV/
+# IDE files
+.idea/
+.vscode/
+*.swp
+*.swo
+# Build directories
+dist/
+build/
+*.egg-info/
+# Docker
+.dockerignore
+docker-compose*.yml
+# Logs
+logs/
+*.log
+# Temporary files
+.tmp/
+tmp/
+# Documentation
+docs/
+# README.md
+CHANGELOG.md
+LICENSE
+# Test data
+images/
+# Project directories
+tests/
+src/pipelines/
+src/infrastructure/supabase/
+# uv.lock
+pre-commit-config.yaml
+# pyproject.toml
+.python-version
+MEMORY.md

.env.example ADDED Viewed

	@@ -0,0 +1,58 @@

+SUPABASE_DB__TABLE_NAME=substack_articles
+SUPABASE_DB__HOST=your_supabase_db_host_here
+SUPABASE_DB__NAME=postgres
+SUPABASE_DB__USER=your_supabase_db_user_here
+SUPABASE_DB__PASSWORD=your_supabase_db_password_here
+SUPABASE_DB__PORT=6543
+# RSS
+RSS__DEFAULT_START_DATE=2025-07-01
+RSS__BATCH_SIZE=30
+# Qdrant configurationbatch
+QDRANT__API_KEY=your_qdrant_api_key_here
+QDRANT__URL=your_qdrant_url_here
+QDRANT__COLLECTION_NAME=substack_collection
+QDRANT__DENSE_MODEL_NAME=BAAI/bge-base-en-v1.5 # BAAI/bge-large-en-v1.5 (1024), BAAI/bge-base-en-v1.5 (HF, 768). BAAI/bge-base-en (Fastembed, 768)
+QDRANT__SPARSE_MODEL_NAME=Qdrant/bm25 # prithivida/Splade_PP_en_v1, Qdrant/bm25
+QDRANT__VECTOR_DIM=768 # 768, 1024
+QDRANT__ARTICLE_BATCH_SIZE=5
+QDRANT__SPARSE_BATCH_SIZE=32
+QDRANT__EMBED_BATCH_SIZE=50 # 50
+QDRANT__UPSERT_BATCH_SIZE=100 # 50
+QDRANT__MAX_CONCURRENT=3
+# Text splitting
+TS__CHUNK_SIZE=4000
+TS__CHUNK_OVERLAP=200
+# PREFECT
+PREFECT__API_KEY=your_prefect_api_key_here
+PREFECT__WORKSPACE=your_prefect_workspace_here
+PREFECT__API_URL=your_prefect_api_url_here
+# JINA
+JINA__API_KEY=your_jina_api_key_here
+JINA__URL=https://api.jina.ai/v1/embeddings
+JINA__MODEL=jina-embeddings-v3
+# HUGGING FACE
+HUGGING_FACE__API_KEY=your_hugging_face_api_key_here
+HUGGING_FACE__MODEL=BAAI/bge-base-en-v1.5
+# OPENAI
+OPENAI__API_KEY=your_openai_api_key_here
+# OPENROUTER
+OPENROUTER__API_KEY=your_openrouter_api_key_here
+OPENROUTER__API_URL=https://openrouter.ai/api/v1
+# OPIK OBSERVABILITY
+OPIK__API_KEY=your_opik_api_key_here
+OPIK__PROJECT_NAME=substack-pipeline
+# FastAPI Endpoint
+BACKEND_URL=your_fastapi_backend_url_here
+# Default (8501)
+ALLOWED_ORIGINS=your_allowed_origins_here_as_comma_separated_values

.github/workflows/cd.yml ADDED Viewed

	@@ -0,0 +1,32 @@

+name: CD
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      # - main
+      - develop
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: .python-version
+      - name: Run Prefect Deploy
+        uses: PrefectHQ/actions-prefect-deploy@v4
+        with:
+          all-deployments: "true"  # deploy all deployments in prefect.yaml
+          requirements-file-paths: ./requirements.txt
+          deployment-file-path: ./prefect-cloud.yaml
+        env:
+          PREFECT_API_KEY: ${{ secrets.PREFECT__API_KEY }}
+          PREFECT_WORKSPACE: ${{ secrets.PREFECT__WORKSPACE }}
+          PREFECT_API_URL: ${{ secrets.PREFECT__API_URL }}

.github/workflows/ci.yml ADDED Viewed

	@@ -0,0 +1,78 @@

+name: CI
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      # - main
+      - develop
+jobs:
+  lint-and-test:
+    runs-on: ubuntu-latest
+    env:
+      # Supabase secrets
+      SUPABASE_DB__TABLE_NAME: ${{ secrets.SUPABASE_DB__TABLE_NAME }}
+      SUPABASE_DB__HOST: ${{ secrets.SUPABASE_DB__HOST }}
+      SUPABASE_DB__NAME: ${{ secrets.SUPABASE_DB__NAME }}
+      SUPABASE_DB__USER: ${{ secrets.SUPABASE_DB__USER }}
+      SUPABASE_DB__PASSWORD: ${{ secrets.SUPABASE_DB__PASSWORD }}
+      SUPABASE_DB__PORT: ${{ secrets.SUPABASE_DB__PORT }}
+      # Qdrant secrets
+      QDRANT__API_KEY: ${{ secrets.QDRANT__API_KEY }}
+      QDRANT__URL: ${{ secrets.QDRANT__URL }}
+      QDRANT__COLLECTION_NAME: ${{ secrets.QDRANT__COLLECTION_NAME }}
+      # OpenRouter secrets
+      OPENROUTER__API_KEY: ${{ secrets.OPENROUTER__API_KEY }}
+      OPENROUTER__API_URL: ${{ secrets.OPENROUTER__API_URL }}
+      # OPIK secrets
+      OPIK__API_KEY: ${{ secrets.OPIK__API_KEY }}
+      OPIK__PROJECT_NAME: ${{ secrets.OPIK__PROJECT_NAME }}
+      # FastAPI secrets
+      ALLOWED_ORIGINS: ${{ secrets.ALLOWED_ORIGINS }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: .python-version
+      - name: Install dependencies
+        run: uv sync --all-groups
+      - name: Run pre-commit hooks
+        run: |
+          source .venv/bin/activate
+          pre-commit install
+          pre-commit run --all-files
+      # 🔹 Debug step: check that DB env vars are set
+      - name: Check DB environment variables
+        run: |
+          for var in SUPABASE_DB__HOST SUPABASE_DB__NAME SUPABASE_DB__USER SUPABASE_DB__PORT SUPABASE_DB__TABLE_NAME \
+                    QDRANT__API_KEY QDRANT__URL QDRANT__COLLECTION_NAME \
+                    OPENROUTER__API_KEY OPENROUTER__API_URL \
+                    ALLOWED_ORIGINS; do
+            if [ -z "${!var}" ]; then
+              echo "ERROR: $var is empty!"
+              exit 1
+            else
+              echo "$var is set"
+            fi
+          done
+      - name: Run tests
+        run: uv run pytest

.gitignore ADDED Viewed

	@@ -0,0 +1,219 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the enitre vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# FILES
+MEMORY.md
+DOCKER.md
+INSTRUCTIONS.md
+create_prefect_secrets.py
+OLD_README.md
+delete_deployment.sh
+update_deploy_fastapi.sh
+src/infrastructure/qdrant/query_scroll.py
+src/infrastructure/qdrant/query_search.py
+src/pipelines/flows/rss_ingestion_flow_old.py
+src/pipelines/tasks/fetch_rss_old.py
+src/pipelines/tasks/parse_articles_new.py
+src/pipelines/tasks/batch_parse_ingest_articles.py
+experiments/
+src/configs/all_feeds.yaml
+src/pipelines/flows/backfilling_archive_flow.py
+src/pipelines/tasks/fetch_archive.py
+src/pipelines/tasks/ingest_archive.py
+src/pipelines/tasks/parse_archive.py
+src/configs/feeds_archive.yaml
+deploy_gradio.sh
+frontend/Dockerfile
+frontend/requirements.txt

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,67 @@

+repos:
+  - repo: https://github.com/astral-sh/uv-pre-commit
+    # uv version.
+    rev: 0.8.17
+    hooks:
+      # Update the uv lockfile
+      - id: uv-lock
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v6.0.0
+    hooks:
+      - id: check-added-large-files
+        args: ['--maxkb=20000']
+      - id: check-toml
+      - id: check-yaml
+        args: [--allow-multiple-documents]
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+      - id: check-json
+      - id: detect-private-key
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.18.1
+    hooks:
+      - id: mypy
+        additional_dependencies:
+          - types-pyyaml>=6.0.12.20250822
+          - types-requests>=2.32.4.20250809
+          - types-python-dateutil>=2.9.0.20250822
+          - types-markdown>=3.9.0.20250906
+        args: ["--config-file=pyproject.toml"]
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.13.0
+    hooks:
+    - id: ruff-check
+      args:
+        [
+          --fix,
+          --exit-non-zero-on-fix,
+          --show-fixes
+          ]
+    - id: ruff-format
+  - repo: https://github.com/hukkin/mdformat
+    rev: 0.7.22
+    hooks:
+      - id: mdformat
+        additional_dependencies:
+        - mdformat-gfm
+        exclude: ^team_data/
+  - repo: https://github.com/gitleaks/gitleaks
+    rev: v8.28.0
+    hooks:
+      - id: gitleaks
+  # - repo: local
+  #   hooks:
+  #     - id: pytest
+  #       name: pytest
+  #       entry: pytest
+  #       language: system
+  #       types: [python]
+  #       pass_filenames: false

.prefectignore ADDED Viewed

	@@ -0,0 +1,41 @@

+# prefect artifacts
+.prefectignore
+# python artifacts
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info/
+*.egg
+# Type checking artifacts
+.mypy_cache/
+.dmypy.json
+dmypy.json
+.pyre/
+# IPython
+profile_default/
+ipython_config.py
+*.ipynb_checkpoints/*
+# Environments
+.python-version
+.env
+.venv
+env/
+venv/
+# MacOS
+.DS_Store
+# Dask
+dask-worker-space/
+# Editors
+.idea/
+.vscode/
+# VCS
+.git/
+.hg/

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "terminal.integrated.defaultProfile.linux": "zsh",
+    "terminal.integrated.defaultProfile.windows": "",
+    "cSpell.words": [
+        "fastapi"
+    ]
+}

Dockerfile ADDED Viewed

	@@ -0,0 +1,65 @@

+# ---------- Build Stage ----------
+FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
+WORKDIR /app
+# System deps required for building some Python wheels (e.g., madoka)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential g++ \
+  && rm -rf /var/lib/apt/lists/*
+# Configure UV for optimal performance
+ENV UV_COMPILE_BYTECODE=1
+ENV UV_LINK_MODE=copy
+ENV UV_PYTHON_DOWNLOADS=never
+# Copy dependency files and sync dependencies
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    uv sync --locked --no-install-project --no-dev
+# Copy source code selectively
+COPY src/api ./src/api
+COPY src/config.py ./src/config.py
+COPY src/infrastructure/qdrant ./src/infrastructure/qdrant
+COPY src/models ./src/models
+COPY src/utils ./src/utils
+# Also copy README.md, pyproject.toml and uv.lock for the final sync
+COPY pyproject.toml uv.lock README.md ./
+# Install project dependencies into virtualenv
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --locked --no-dev
+# ---------- Runtime Stage ----------
+FROM python:3.12-slim-bookworm
+# Copy built application and virtualenv from builder
+COPY --from=builder /app /app
+# Install runtime tools used by HEALTHCHECK
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+  && rm -rf /var/lib/apt/lists/*
+# Set Python path and environment variables
+ENV PATH="/app/.venv/bin:$PATH"
+ENV PYTHONPATH=/app
+ENV HF_HOME=/tmp/huggingface
+ENV FASTEMBED_CACHE=/tmp/fastembed_cache
+ENV PORT=8080
+# Create cache directories
+RUN mkdir -p $HF_HOME $FASTEMBED_CACHE && chmod -R 755 $HF_HOME $FASTEMBED_CACHE
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:$PORT/health || exit 1
+# Expose Cloud Run port
+EXPOSE $PORT
+# Run FastAPI with uvicorn
+CMD ["uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "8080", "--workers", "1", "--loop", "uvloop"]

Makefile ADDED Viewed

	@@ -0,0 +1,200 @@

+# Makefile
+# Check if .env exists
+ifeq (,$(wildcard .env))
+$(error .env file is missing at .env. Please create one based on .env.example)
+endif
+# Load environment variables from .env
+include .env
+.PHONY: tests mypy clean help ruff-check ruff-check-fix ruff-format ruff-format-fix all-check all-fix
+#################################################################################
+## Supabase Commands
+#################################################################################
+supabase-create: ## Create Supabase database
+	@echo "Creating Supabase database..."
+	uv run python src/infrastructure/supabase/create_db.py
+supabase-delete: ## Delete Supabase database
+	@echo "Deleting Supabase database..."
+	uv run python src/infrastructure/supabase/delete_db.py
+#################################################################################
+## Qdrant Commands
+#################################################################################
+qdrant-create-collection: ## Create Qdrant collection
+	@echo "Creating Qdrant collection..."
+	uv run python src/infrastructure/qdrant/create_collection.py
+qdrant-delete-collection: ## Delete Qdrant collection
+	@echo "Deleting Qdrant collection..."
+	uv run python src/infrastructure/qdrant/delete_collection.py
+qdrant-create-index: ## Create Qdrant index
+	@echo "Updating HNSW and creating Qdrant indexes..."
+	uv run python src/infrastructure/qdrant/create_indexes.py
+qdrant-ingest-from-sql: ## Ingest data from SQL to Qdrant
+	@echo "Ingesting data from SQL to Qdrant..."
+	uv run python src/infrastructure/qdrant/ingest_from_sql.py
+	@echo "Data ingestion complete."
+#################################################################################
+## Prefect Flow Commands
+#################################################################################
+ingest-rss-articles-flow: ## Ingest RSS articles flow
+	@echo "Running ingest RSS articles flow..."
+	uv run python src/pipelines/flows/rss_ingestion_flow.py
+	@echo "Ingest RSS articles flow completed."
+ingest-embeddings-flow: ## Ingest embeddings flow
+	@echo "Running ingest embeddings flow..."
+	$(if $(FROM_DATE), \
+		uv run python src/pipelines/flows/embeddings_ingestion_flow.py --from-date $(FROM_DATE), \
+		uv run python src/pipelines/flows/embeddings_ingestion_flow.py)
+	@echo "Ingest embeddings flow completed."
+#################################################################################
+## Prefect Deployment Commands
+#################################################################################
+deploy-cloud-flows: ## Deploy Prefect flows to Prefect Cloud
+	@echo "Deploying Prefect flows to Prefect Cloud..."
+	prefect deploy --prefect-file prefect-cloud.yaml
+	@echo "Prefect Cloud deployment complete."
+deploy-local-flows: ## Deploy Prefect flows to Prefect Local Server
+	@echo "Deploying Prefect flows to Prefect Local Server..."
+	prefect deploy --prefect-file prefect-local.yaml
+	@echo "Prefect Local deployment complete."
+#################################################################################
+## Recreate Commands
+#################################################################################
+recreate-supabase: supabase-delete supabase-create ## Recreate Supabase resources
+recreate-qdrant: qdrant-delete-collection qdrant-create-collection ## Recreate Qdrant resources
+recreate-all: supabase-delete qdrant-delete-collection supabase-create qdrant-create-collection ## Recreate Qdrant and Supabase resources
+#################################################################################
+## FastAPI Commands
+#################################################################################
+run-api: ## Run FastAPI application
+	@echo "Starting FastAPI application..."
+	uv run src/api/main.py
+	@echo "FastAPI application stopped."
+#################################################################################
+## Gradio Commands
+#################################################################################
+run-gradio: ## Run Gradio application
+	@echo "Starting Gradio application..."
+	uv run frontend/app.py
+	@echo "Gradio application stopped."
+#################################################################################
+## Testing Commands
+#################################################################################
+unit-tests: ## Run all unit tests
+	@echo "Running all unit tests..."
+	uv run pytest tests/unit
+	@echo "All unit tests completed."
+integration-tests: ## Run all integration tests
+	@echo "Running all integration tests..."
+	uv run pytest tests/integration
+	@echo "All integration tests completed."
+all-tests: ## Run all tests
+	@echo "Running all tests..."
+	uv run pytest
+	@echo "All tests completed."
+################################################################################
+## Pre-commit Commands
+################################################################################
+pre-commit-run: ## Run pre-commit hooks
+	@echo "Running pre-commit hooks..."
+	pre-commit run --all-files
+	@echo "Pre-commit checks complete."
+################################################################################
+## Linting
+################################################################################
+# Linting (just checks)
+ruff-check: ## Check code lint violations (--diff to show possible changes)
+	@echo "Checking Ruff formatting..."
+	uv run ruff check .
+	@echo "Ruff lint checks complete."
+ruff-check-fix: ## Auto-format code using Ruff
+	@echo "Formatting code with Ruff..."
+	uv run ruff check . --fix --exit-non-zero-on-fix
+	@echo "Formatting complete."
+################################################################################
+## Formatting
+################################################################################
+# Formatting (just checks)
+ruff-format: ## Check code format violations (--diff to show possible changes)
+	@echo "Checking Ruff formatting..."
+	uv run ruff format . --check
+	@echo "Ruff format checks complete."
+ruff-format-fix: ## Auto-format code using Ruff
+	@echo "Formatting code with Ruff..."
+	uv run ruff format .
+	@echo "Formatting complete."
+#################################################################################
+## Static Type Checking
+#################################################################################
+mypy: ## Run MyPy static type checker
+	@echo "Running MyPy static type checker..."
+	uv run mypy
+	@echo "MyPy static type checker complete."
+################################################################################
+## Cleanup
+################################################################################
+clean: ## Clean up cached generated files
+	@echo "Cleaning up generated files..."
+	find . -type d -name "__pycache__" -exec rm -rf {} +
+	find . -type d -name ".pytest_cache" -exec rm -rf {} +
+	find . -type d -name ".ruff_cache" -exec rm -rf {} +
+	find . -type d -name ".mypy_cache" -exec rm -rf {} +
+	find . -type f -name "*.pyc" -delete
+	@echo "Cleanup complete."
+################################################################################
+## Composite Commands
+################################################################################
+all-check: ruff-format ruff-check clean ## Run all: linting, formatting and type checking
+all-fix: ruff-format-fix ruff-check-fix mypy clean ## Run all fix: auto-formatting and linting fixes
+################################################################################
+## Help
+################################################################################
+help: ## Display this help message
+	@echo "Default target: $(.DEFAULT_GOAL)"
+	@echo "Available targets:"
+	@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
+.DEFAULT_GOAL := help

README.md ADDED Viewed

	@@ -0,0 +1,74 @@

+# Articles Search Engine
+A compact, production-style RAG pipeline. It ingests Substack RSS articles, stores them in Postgres (Supabase), creates dense/sparse embeddings in Qdrant, and exposes search and answer endpoints via FastAPI with a simple Gradio UI.
+## How it works (brief)
+- Ingest RSS → Supabase:
+  - Prefect flow (`src/pipelines/flows/rss_ingestion_flow.py`) reads feeds from `src/configs/feeds_rss.yaml`, parses articles, and writes them to Postgres using SQLAlchemy models.
+- Embed + index in Qdrant:
+  - Content is chunked, embedded (e.g., BAAI bge models), and upserted to a Qdrant collection with payload indexes for filtering and hybrid search.
+  - Collection and indexes are created via utilities in `src/infrastructure/qdrant/`.
+- Search + generate:
+  - FastAPI (`src/api/main.py`) exposes search endpoints (keyword, semantic, hybrid) and assembles answers with citations.
+  - LLM providers are pluggable with fallback (OpenRouter, OpenAI, Hugging Face).
+- UI + deploy:
+  - Gradio app for quick local search (`frontend/app.py`).
+  - Containerization with Docker and optional deploy to Google Cloud Run.
+## Tech stack
+- Python 3.12, FastAPI, Prefect, SQLAlchemy
+- Supabase (Postgres) for articles
+- Qdrant for vector search (dense + sparse/hybrid)
+- OpenRouter / OpenAI / Hugging Face for LLM completion
+- Gradio UI, Docker, Google Cloud Run
+- Config via Pydantic Settings, `uv` or `pip` for deps
+## Run locally (minimal)
+1) Configure environment (either `.env` or shell). Key variables (Pydantic nested with `__`):
+   - Supabase: `SUPABASE_DB__HOST`, `SUPABASE_DB__PORT`, `SUPABASE_DB__NAME`, `SUPABASE_DB__USER`, `SUPABASE_DB__PASSWORD`
+   - Qdrant: `QDRANT__URL`, `QDRANT__API_KEY`
+   - LLM (choose one): `OPENROUTER__API_KEY` or `OPENAI__API_KEY` or `HUGGING_FACE__API_KEY`
+   - Optional CORS: `ALLOWED_ORIGINS`
+2) Install dependencies:
+```bash
+# with uv
+uv venv && source .venv/bin/activate
+uv pip install -r requirements.txt
+# or with pip
+python -m venv .venv && source .venv/bin/activate
+pip install -r requirements.txt
+```
+3) Initialize storage:
+```bash
+python src/infrastructure/supabase/create_db.py
+python src/infrastructure/qdrant/create_collection.py
+python src/infrastructure/qdrant/create_indexes.py
+```
+4) Ingest and embed:
+```bash
+python src/pipelines/flows/rss_ingestion_flow.py
+python src/pipelines/flows/embeddings_ingestion_flow.py
+```
+5) Start services:
+```bash
+# REST API
+uvicorn src.api.main:app --reload
+# Gradio UI (optional)
+python frontend/app.py
+```
+## Project structure (high-level)
+- `src/api/` — FastAPI app, routes, middleware, exceptions
+- `src/infrastructure/supabase/` — DB init and sessions
+- `src/infrastructure/qdrant/` — Vector store and collection utilities
+- `src/pipelines/` — Prefect flows and tasks for ingestion/embeddings
+- `src/models/` — SQL and vector models
+- `frontend/` — Gradio UI
+- `configs/` — RSS feeds config

cloudbuild_fastapi.yaml ADDED Viewed

	@@ -0,0 +1,12 @@

+steps:
+- name: 'gcr.io/cloud-builders/docker'
+  entrypoint: 'bash'
+  args:
+    - '-c'
+    - |
+      export DOCKER_BUILDKIT=1
+      docker build -t gcr.io/${PROJECT_ID}/${_SERVICE_NAME} -f Dockerfile .
+substitutions:
+  _SERVICE_NAME: "substack-pipeline-fastapi"
+images:
+  - "gcr.io/${PROJECT_ID}/${_SERVICE_NAME}"

deploy_fastapi.sh ADDED Viewed

	@@ -0,0 +1,105 @@

+#!/bin/bash
+# -----------------------
+# FastAPI Backend Deployment to Cloud Run
+# -----------------------
+# Exit immediately if a command exits with a non-zero status
+set -e
+#-----------------------
+# Load environment variables
+#-----------------------
+if [ ! -f .env ]; then
+    echo "❌ .env file not found!"
+    exit 1
+fi
+# Load environment variables from .env file
+set -o allexport
+source .env
+set +o allexport
+echo "✅ Environment variables loaded."
+# -----------------------
+# Configuration
+# -----------------------
+PROJECT_ID="personal-projects-477710"
+SERVICE_NAME="substack-pipeline-fastapi"
+REGION="asia-south2" #europe-west1 "europe-west6"
+IMAGE_NAME="gcr.io/$PROJECT_ID/$SERVICE_NAME"
+# -----------------------
+# Set project
+# -----------------------
+echo "🔧 Setting GCP project to $PROJECT_ID..."
+gcloud config set project "$PROJECT_ID"
+# -----------------------
+# Enable required APIs
+# -----------------------
+echo "🔧 Enabling required GCP services..."
+gcloud services enable \
+    cloudbuild.googleapis.com \
+    run.googleapis.com \
+    containerregistry.googleapis.com
+# -----------------------
+# Build and push Docker image
+# -----------------------
+echo "🐳 Building and pushing Docker image..."
+gcloud builds submit --config cloudbuild_fastapi.yaml \
+    --substitutions=_SERVICE_NAME=$SERVICE_NAME
+# -----------------------
+# Deploy to Cloud Run
+# -----------------------
+echo "🚀 Deploying $SERVICE_NAME to Cloud Run..."
+gcloud run deploy "$SERVICE_NAME" \
+--image "$IMAGE_NAME" \
+--platform managed \
+--region "$REGION" \
+--allow-unauthenticated \
+--memory 2.5Gi \
+--cpu 1 \
+--timeout 180 \
+--concurrency 2 \
+--min-instances 0 \
+--max-instances 2 \
+--execution-environment gen2 \
+--cpu-boost \
+--set-env-vars HF_HOME=/tmp/huggingface \
+--set-env-vars HUGGING_FACE__API_KEY=$HUGGING_FACE__API_KEY \
+--set-env-vars QDRANT__API_KEY=$QDRANT__API_KEY \
+--set-env-vars QDRANT__URL=$QDRANT__URL \
+--set-env-vars QDRANT__COLLECTION_NAME=$QDRANT__COLLECTION_NAME \
+--set-env-vars QDRANT__DENSE_MODEL_NAME=$QDRANT__DENSE_MODEL_NAME \
+--set-env-vars QDRANT__SPARSE_MODEL_NAME=$QDRANT__SPARSE_MODEL_NAME \
+--set-env-vars OPENROUTER__API_KEY=$OPENROUTER__API_KEY \
+--set-env-vars OPIK__API_KEY=$OPIK__API_KEY \
+--set-env-vars OPIK__PROJECT_NAME=$OPIK__PROJECT_NAME \
+--set-env-vars "^@^ALLOWED_ORIGINS=$ALLOWED_ORIGINS@" \
+# Log the allowed origins
+echo "✅ Allowed origins set to: $ALLOWED_ORIGINS"
+# -----------------------
+# Capture the deployed service URL and update BACKEND_URL
+#-----------------------
+SERVICE_URL=$(gcloud run services describe $SERVICE_NAME --region=$REGION --format='value(status.url)')
+echo "Deployment complete!"
+echo "Service URL: $SERVICE_URL"
+# # -----------------------
+# # Update BACKEND_URL dynamically
+# # -----------------------
+# echo "🔄 Updating BACKEND_URL to $SERVICE_URL..."
+# gcloud run services update "$SERVICE_NAME" \
+#     --region "$REGION" \
+#     --update-env-vars BACKEND_URL="$SERVICE_URL"
+# echo "✅ BACKEND_URL updated successfully."

frontend/__init__.py ADDED Viewed

File without changes

frontend/app.py ADDED Viewed

	@@ -0,0 +1,560 @@

+import os
+import gradio as gr
+import markdown
+import requests
+import yaml
+from dotenv import load_dotenv
+try:
+    from src.api.models.provider_models import MODEL_REGISTRY
+except ImportError as e:
+    raise ImportError(
+        "Could not import MODEL_REGISTRY from src.api.models.provider_models. "
+        "Check the path and file existence."
+    ) from e
+# Initialize environment variables
+load_dotenv()
+BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:8080")
+API_BASE_URL = f"{BACKEND_URL}/search"
+# Load feeds from YAML
+def load_feeds():
+    """Load feeds from the YAML configuration file.
+    Returns:
+        list: List of feeds with their details.
+    """
+    feeds_path = os.path.join(os.path.dirname(__file__), "../src/configs/feeds_rss.yaml")
+    with open(feeds_path) as f:
+        feeds_yaml = yaml.safe_load(f)
+    return feeds_yaml.get("feeds", [])
+feeds = load_feeds()
+feed_names = [f["name"] for f in feeds]
+feed_authors = [f["author"] for f in feeds]
+# -----------------------
+# API helpers
+# -----------------------
+def fetch_unique_titles(payload):
+    """
+    Fetch unique article titles based on the search criteria.
+    Args:
+        payload (dict): The search criteria including query_text, feed_author,
+                        feed_name, limit, and optional title_keywords.
+    Returns:
+        list: A list of articles matching the criteria.
+    Raises:
+        Exception: If the API request fails.
+    """
+    try:
+        resp = requests.post(f"{API_BASE_URL}/unique-titles", json=payload)
+        resp.raise_for_status()
+        return resp.json().get("results", [])
+    except Exception as e:
+        raise Exception(f"Failed to fetch titles: {str(e)}") from e
+def call_ai(payload, streaming=True):
+    """ "
+    Call the AI endpoint with the given payload.
+    Args:
+        payload (dict): The payload to send to the AI endpoint.
+        streaming (bool): Whether to use streaming or non-streaming endpoint.
+    Yields:
+        tuple: A tuple containing the type of response and the response text.
+    """
+    endpoint = f"{API_BASE_URL}/ask/stream" if streaming else f"{API_BASE_URL}/ask"
+    answer_text = ""
+    try:
+        if streaming:
+            with requests.post(endpoint, json=payload, stream=True) as r:
+                r.raise_for_status()
+                for chunk in r.iter_content(chunk_size=None, decode_unicode=True):
+                    if not chunk:
+                        continue
+                    if chunk.startswith("__model_used__:"):
+                        yield "model", chunk.replace("__model_used__:", "").strip()
+                    elif chunk.startswith("__error__"):
+                        yield "error", "Request failed. Please try again later."
+                        break
+                    elif chunk.startswith("__truncated__"):
+                        yield "truncated", "AI response truncated due to token limit."
+                    else:
+                        answer_text += chunk
+                        yield "text", answer_text
+        else:
+            resp = requests.post(endpoint, json=payload)
+            resp.raise_for_status()
+            data = resp.json()
+            answer_text = data.get("answer", "")
+            yield "text", answer_text
+            if data.get("finish_reason") == "length":
+                yield "truncated", "AI response truncated due to token limit."
+    except Exception as e:
+        yield "error", f"Request failed: {str(e)}"
+def get_models_for_provider(provider):
+    """
+    Get available models for a provider
+    Args:
+        provider (str): The name of the provider (e.g., "openrouter", "openai")
+    Returns:
+        list: List of model names available for the provider
+    """
+    provider_key = provider.lower()
+    try:
+        config = MODEL_REGISTRY.get_config(provider_key)
+        return (
+            ["Automatic Model Selection (Model Routing)"]
+            + ([config.primary_model] if config.primary_model else [])
+            + list(config.candidate_models)
+        )
+    except Exception:
+        return ["Automatic Model Selection (Model Routing)"]
+# -----------------------
+# Gradio interface functions
+# -----------------------
+def handle_search_articles(query_text, feed_name, feed_author, title_keywords, limit):
+    """
+    Handle article search
+    Args:
+        query_text (str): The text to search for in article titles.
+        feed_name (str): The name of the feed to filter articles by.
+        feed_author (str): The author of the feed to filter articles by.
+        title_keywords (str): Keywords to search for in article titles.
+        limit (int): The maximum number of articles to return.
+    Returns:
+        str: HTML formatted string of search results or error message.
+    Raises:
+        Exception: If the API request fails.
+    """
+    if not query_text.strip():
+        return "Please enter a query text."
+    payload = {
+        "query_text": query_text.strip().lower(),
+        "feed_author": feed_author.strip() if feed_author else "",
+        "feed_name": feed_name.strip() if feed_name else "",
+        "limit": limit,
+        "title_keywords": title_keywords.strip().lower() if title_keywords else None,
+    }
+    try:
+        results = fetch_unique_titles(payload)
+        if not results:
+            return "No results found."
+        html_output = ""
+        for item in results:
+            html_output += (
+                f"<div style='background-color:#F0F8FF; padding:20px; "
+                f"border-radius:10px; font-size:18px; margin-bottom:15px;'>\n"
+                f"    <h2 style='font-size:22px; color:#1f4e79; margin-top:0;'>"
+                f"{item.get('title', 'No title')}</h2>\n"
+                f"    <p style='margin:5px 0;'>"
+                f"<b>Newsletter:</b> {item.get('feed_name', 'N/A')}"
+                f"</p>\n"
+                f"    <p style='margin:5px 0;'>"
+                f"<b>Author:</b> {item.get('feed_author', 'N/A')}"
+                f"</p>\n"
+                f"    <p style='margin:5px 0;'><b>Article Authors:</b> "
+                f"{', '.join(item.get('article_author') or ['N/A'])}</p>\n"
+                f"    <p style='margin:5px 0;'><b>URL:</b> "
+                f"<a href='{item.get('url', '#')}' target='_blank' style='color:#0066cc;'>"
+                f"{item.get('url', 'No URL')}</a></p>\n"
+                f"</div>\n"
+            )
+        return html_output
+    except Exception as e:
+        return f"<div style='color:red; padding:10px;'>Error: {str(e)}</div>"
+def handle_ai_question_streaming(
+    query_text,
+    feed_name,
+    feed_author,
+    limit,
+    provider,
+    model,
+):
+    """
+    Handle AI question with streaming
+    Args:
+        query_text (str): The question to ask the AI.
+        feed_name (str): The name of the feed to filter articles by.
+        feed_author (str): The author of the feed to filter articles by.
+        limit (int): The maximum number of articles to consider.
+        provider (str): The LLM provider to use.
+        model (str): The specific model to use from the provider.
+    Yields:
+        tuple: (HTML formatted answer string, model info string)
+    """
+    if not query_text.strip():
+        yield "Please enter a query text.", ""
+        return
+    if not provider or not model:
+        yield "Please select provider and model.", ""
+        return
+    payload = {
+        "query_text": query_text.strip().lower(),
+        "feed_author": feed_author.strip() if feed_author else "",
+        "feed_name": feed_name.strip() if feed_name else "",
+        "limit": limit,
+        "provider": provider.lower(),
+    }
+    if model != "Automatic Model Selection (Model Routing)":
+        payload["model"] = model
+    try:
+        answer_html = ""
+        model_info = f"Provider: {provider}"
+        for _, (event_type, content) in enumerate(call_ai(payload, streaming=True)):
+            if event_type == "text":
+                # Convert markdown to HTML
+                html_content = markdown.markdown(content, extensions=["tables"])
+                answer_html = (
+                    f"\n"
+                    f"<div style='background-color:#E8F0FE; "
+                    f"padding:15px; border-radius:10px; font-size:16px;'>\n"
+                    f"    {html_content}\n"
+                    f"</div>\n"
+                )
+                yield answer_html, model_info
+            elif event_type == "model":
+                model_info = f"Provider: {provider} | Model: {content}"
+                yield answer_html, model_info
+            elif event_type == "truncated":
+                answer_html += (
+                    f"<div style='color:#ff6600; padding:10px; font-weight:bold;'>⚠️ {content}</div>"
+                )
+                yield answer_html, model_info
+            elif event_type == "error":
+                error_html = (
+                    f"<div style='color:red; padding:10px; font-weight:bold;'>❌ {content}</div>"
+                )
+                yield error_html, model_info
+                break
+    except Exception as e:
+        error_html = f"<div style='color:red; padding:10px;'>Error: {str(e)}</div>"
+        yield error_html, model_info
+def handle_ai_question_non_streaming(query_text, feed_name, feed_author, limit, provider, model):
+    """
+    Handle AI question without streaming
+    Args:
+        query_text (str): The question to ask the AI.
+        feed_name (str): The name of the feed to filter articles by.
+        feed_author (str): The author of the feed to filter articles by.
+        limit (int): The maximum number of articles to consider.
+        provider (str): The LLM provider to use.
+        model (str): The specific model to use from the provider.
+    Returns:
+        tuple: (HTML formatted answer string, model info string)
+    """
+    if not query_text.strip():
+        return "Please enter a query text.", ""
+    if not provider or not model:
+        return "Please select provider and model.", ""
+    payload = {
+        "query_text": query_text.strip().lower(),
+        "feed_author": feed_author.strip() if feed_author else "",
+        "feed_name": feed_name.strip() if feed_name else "",
+        "limit": limit,
+        "provider": provider.lower(),
+    }
+    if model != "Automatic Model Selection (Model Routing)":
+        payload["model"] = model
+    try:
+        answer_html = ""
+        model_info = f"Provider: {provider}"
+        for event_type, content in call_ai(payload, streaming=False):
+            if event_type == "text":
+                html_content = markdown.markdown(content, extensions=["tables"])
+                answer_html = (
+                    "<div style='background-color:#E8F0FE; "
+                    "padding:15px; border-radius:10px; font-size:16px;'>\n"
+                    f"{html_content}\n"
+                    "</div>\n"
+                )
+            elif event_type == "model":
+                model_info = f"Provider: {provider} | Model: {content}"
+            elif event_type == "truncated":
+                answer_html += (
+                    f"<div style='color:#ff6600; padding:10px; font-weight:bold;'>⚠️ {content}</div>"
+                )
+            elif event_type == "error":
+                return (
+                    f"<div style='color:red; padding:10px; font-weight:bold;'>❌ {content}</div>",
+                    model_info,
+                )
+        return answer_html, model_info
+    except Exception as e:
+        return (
+            f"<div style='color:red; padding:10px;'>Error: {str(e)}</div>",
+            f"Provider: {provider}",
+        )
+def update_model_choices(provider):
+    """
+    Update model choices based on selected provider
+    Args:
+        provider (str): The selected LLM provider
+    Returns:
+        gr.Dropdown: Updated model dropdown component
+    """
+    models = get_models_for_provider(provider)
+    return gr.Dropdown(choices=models, value=models[0] if models else "")
+# -----------------------
+# Gradio UI
+# -----------------------
+with gr.Blocks(title="Substack Articles LLM Engine", theme=gr.themes.Soft()) as demo:
+    # Header
+    gr.HTML(
+        "<div style='background-color:#ff6719; padding:20px; border-radius:12px; "
+        "text-align:center; margin-bottom:20px;'>\n"
+        "    <h1 style='color:white; font-size:42px; font-family:serif; margin:0;'>\n"
+        "        📰 Substack Articles LLM Engine\n"
+        "    </h1>\n"
+        "</div>\n"
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            # Search Mode Selection
+            gr.Markdown("## 🔍 Select Search Mode")
+            search_type = gr.Radio(
+                choices=["Search Articles", "Ask the AI"],
+                value="Search Articles",
+                label="Search Mode",
+                info="Choose between searching for articles or asking AI questions",
+            )
+            # Common filters
+            gr.Markdown("### Filters")
+            query_text = gr.Textbox(label="Query", placeholder="Type your query here...", lines=3)
+            feed_author = gr.Dropdown(
+                choices=[""] + feed_authors, label="Author (optional)", value=""
+            )
+            feed_name = gr.Dropdown(
+                choices=[""] + feed_names, label="Newsletter (optional)", value=""
+            )
+            # Conditional fields based on search type
+            title_keywords = gr.Textbox(
+                label="Title Keywords (optional)",
+                placeholder="Filter by words in the title",
+                visible=True,
+            )
+            limit = gr.Slider(
+                minimum=1, maximum=20, step=1, label="Number of results", value=5, visible=True
+            )
+            # LLM Options (only visible for AI mode)
+            with gr.Group(visible=False) as llm_options:
+                gr.Markdown("### ⚙️ LLM Options")
+                provider = gr.Dropdown(
+                    choices=["OpenRouter", "HuggingFace", "OpenAI"],
+                    label="Select LLM Provider",
+                    value="OpenRouter",
+                )
+                model = gr.Dropdown(
+                    choices=get_models_for_provider("OpenRouter"),
+                    label="Select Model",
+                    value="Automatic Model Selection (Model Routing)",
+                )
+                streaming_mode = gr.Radio(
+                    choices=["Streaming", "Non-Streaming"],
+                    value="Streaming",
+                    label="Answer Mode",
+                    info="Streaming shows results as they're generated",
+                )
+            # Submit button
+            submit_btn = gr.Button("🔎 Search / Ask AI", variant="primary", size="lg")
+        with gr.Column(scale=2):
+            # Output area
+            output_html = gr.HTML(label="Results")
+            model_info = gr.HTML(visible=False)
+    # Event handlers
+    def toggle_visibility(search_type):
+        """
+        Toggle visibility of components based on search type
+        Args:
+            search_type (str): The selected search type
+        Returns:
+            tuple: Visibility states for (llm_options, title_keywords, model_info)
+        """
+        show_title_keywords = search_type == "Search Articles"
+        show_llm_options = search_type == "Ask the AI"
+        show_model_info = search_type == "Ask the AI"
+        show_limit_slider = search_type == "Search Articles"
+        return (
+            gr.Group(visible=show_llm_options),  # llm_options
+            gr.Textbox(visible=show_title_keywords),  # title_keywords
+            gr.HTML(visible=show_model_info),  # model_info
+            gr.Slider(visible=show_limit_slider),  # limit
+        )
+    search_type.change(
+        fn=toggle_visibility,
+        inputs=[search_type],
+        outputs=[llm_options, title_keywords, model_info, limit],
+    )
+    # Update model dropdown when provider changes
+    provider.change(fn=update_model_choices, inputs=[provider], outputs=[model])
+    # Unified submission handler
+    def handle_submission(
+        search_type,
+        streaming_mode,
+        query_text,
+        feed_name,
+        feed_author,
+        title_keywords,
+        limit,
+        provider,
+        model,
+    ):
+        """
+        Handle submission based on search type and streaming mode
+        Args:
+            search_type (str): The selected search type
+            streaming_mode (str): The selected streaming mode
+            query_text (str): The query text
+            feed_name (str): The selected feed name
+            feed_author (str): The selected feed author
+            title_keywords (str): The title keywords (if applicable)
+            limit (int): The number of results to return
+            provider (str): The selected LLM provider (if applicable)
+            model (str): The selected model (if applicable)
+        Returns:
+            tuple: (HTML formatted answer string, model info string)
+        """
+        if search_type == "Search Articles":
+            result = handle_search_articles(
+                query_text, feed_name, feed_author, title_keywords, limit
+            )
+            return result, ""  # Always return two values
+        else:  # Ask the AI
+            if streaming_mode == "Non-Streaming":
+                return handle_ai_question_non_streaming(
+                    query_text, feed_name, feed_author, limit, provider, model
+                )
+            else:
+                # For streaming, we'll use a separate handler
+                return "", ""
+    # Streaming handler
+    def handle_streaming_submission(
+        search_type,
+        streaming_mode,
+        query_text,
+        feed_name,
+        feed_author,
+        title_keywords,
+        limit,
+        provider,
+        model,
+    ):
+        """
+        Handle submission with streaming support
+        Args:
+            search_type (str): The selected search type
+            streaming_mode (str): The selected streaming mode
+            query_text (str): The query text
+            feed_name (str): The selected feed name
+            feed_author (str): The selected feed author
+            title_keywords (str): The title keywords (if applicable)
+            limit (int): The number of results to return
+            provider (str): The selected LLM provider (if applicable)
+            model (str): The selected model (if applicable)
+        Yields:
+            tuple: (HTML formatted answer string, model info string)
+        """
+        if search_type == "Ask the AI" and streaming_mode == "Streaming":
+            yield from handle_ai_question_streaming(
+                query_text, feed_name, feed_author, limit, provider, model
+            )
+        else:
+            # For non-streaming cases, just return the regular result
+            if search_type == "Search Articles":
+                result = handle_search_articles(
+                    query_text, feed_name, feed_author, title_keywords, limit
+                )
+                yield result, ""
+            else:
+                result_html, model_info_text = handle_ai_question_non_streaming(
+                    query_text, feed_name, feed_author, limit, provider, model
+                )
+                yield result_html, model_info_text
+    # Single click handler that routes based on mode
+    submit_btn.click(
+        fn=handle_streaming_submission,
+        inputs=[
+            search_type,
+            streaming_mode,
+            query_text,
+            feed_name,
+            feed_author,
+            title_keywords,
+            limit,
+            provider,
+            model,
+        ],
+        outputs=[output_html, model_info],
+        show_progress=True,
+    )
+# For local testing
+if __name__ == "__main__":
+    demo.launch()
+# # For Google Cloud Run deployment
+# if __name__ == "__main__":
+#     demo.launch(
+#         server_name="0.0.0.0",
+#         server_port=int(os.environ.get("PORT", 8080))
+#     )

prefect-cloud.yaml ADDED Viewed

	@@ -0,0 +1,52 @@

+pull:
+- prefect.deployments.steps.git_clone:
+    id: clone-step
+    repository: https://github.com/Indraneel99/substack-newsletters-search-course
+    credentials: "{{ prefect.blocks.github-credentials.my-gh-creds }}"
+- prefect.deployments.steps.run_shell_script:
+    id: install-build-tools
+    script: |
+      apt-get update -y
+      apt-get install -y --no-install-recommends build-essential g++
+- prefect.deployments.steps.pip_install_requirements:
+    directory: "{{ clone-step.directory }}"
+    requirements_file: requirements.txt
+    stream_output: true
+deployments:
+  - name: rss-ingest
+    entrypoint: src/pipelines/flows/rss_ingestion_flow.py:rss_ingest_flow
+    work_pool:
+      name: default-work-pool
+      job_variables:
+        env:
+          SUPABASE_DB__TABLE_NAME: "{{ prefect.blocks.secret.supabase-db--table-name }}"
+          SUPABASE_DB__HOST: "{{ prefect.blocks.secret.supabase-db--host }}"
+          SUPABASE_DB__NAME: "{{ prefect.blocks.secret.supabase-db--name }}"
+          SUPABASE_DB__USER: "{{ prefect.blocks.secret.supabase-db--user }}"
+          SUPABASE_DB__PASSWORD: "{{ prefect.blocks.secret.supabase-db--password }}"
+          SUPABASE_DB__PORT: "{{ prefect.blocks.secret.supabase-db--port }}"
+    schedule:
+      cron: "0 0 * * 7"
+  - name: qdrant-embeddings
+    entrypoint: src/pipelines/flows/embeddings_ingestion_flow.py:qdrant_ingest_flow
+    work_pool:
+      name: default-work-pool
+      job_variables:
+        env:
+          SUPABASE_DB__TABLE_NAME: "{{ prefect.blocks.secret.supabase-db--table-name }}"
+          SUPABASE_DB__HOST: "{{ prefect.blocks.secret.supabase-db--host }}"
+          SUPABASE_DB__NAME: "{{ prefect.blocks.secret.supabase-db--name }}"
+          SUPABASE_DB__USER: "{{ prefect.blocks.secret.supabase-db--user }}"
+          SUPABASE_DB__PASSWORD: "{{ prefect.blocks.secret.supabase-db--password }}"
+          SUPABASE_DB__PORT: "{{ prefect.blocks.secret.supabase-db--port }}"
+          QDRANT__API_KEY: "{{ prefect.blocks.secret.qdrant--api-key }}"
+          QDRANT__URL: "{{ prefect.blocks.secret.qdrant--url }}"
+          QDRANT__COLLECTION_NAME: "{{ prefect.blocks.secret.qdrant--collection-name }}"
+    schedule:
+      cron: "0 0 * * 7"

prefect-local.yaml ADDED Viewed

	@@ -0,0 +1,53 @@

+pull:
+- prefect.deployments.steps.git_clone:
+    id: clone-step
+    repository: https://github.com/Indraneel99/substack-newsletters-search-course
+    credentials: "{{ prefect.blocks.github-credentials.my-gh-creds }}"
+# This function ensures pip is installed in the environment (Only needed for Prefect Server)
+- prefect.deployments.steps.run_shell_script:
+    id: install-pip
+    directory: "{{ clone-step.directory }}"
+    script: |
+      python -m ensurepip --upgrade
+- prefect.deployments.steps.pip_install_requirements:
+    directory: "{{ clone-step.directory }}"
+    requirements_file: requirements.txt
+    stream_output: true
+deployments:
+  - name: rss-ingest
+    entrypoint: src/pipelines/flows/rss_ingestion_flow.py:rss_ingest_flow
+    work_pool:
+      name: default-work-pool
+      job_variables:
+        env:
+          SUPABASE_DB__TABLE_NAME: "{{ prefect.blocks.secret.supabase-db--table-name }}"
+          SUPABASE_DB__HOST: "{{ prefect.blocks.secret.supabase-db--host }}"
+          SUPABASE_DB__NAME: "{{ prefect.blocks.secret.supabase-db--name }}"
+          SUPABASE_DB__USER: "{{ prefect.blocks.secret.supabase-db--user }}"
+          SUPABASE_DB__PASSWORD: "{{ prefect.blocks.secret.supabase-db--password }}"
+          SUPABASE_DB__PORT: "{{ prefect.blocks.secret.supabase-db--port }}"
+    schedule:
+      cron: "0 0 * * 7"
+  - name: qdrant-embeddings
+    entrypoint: src/pipelines/flows/embeddings_ingestion_flow.py:qdrant_ingest_flow
+    work_pool:
+      name: default-work-pool
+      job_variables:
+        env:
+          SUPABASE_DB__TABLE_NAME: "{{ prefect.blocks.secret.supabase-db--table-name }}"
+          SUPABASE_DB__HOST: "{{ prefect.blocks.secret.supabase-db--host }}"
+          SUPABASE_DB__NAME: "{{ prefect.blocks.secret.supabase-db--name }}"
+          SUPABASE_DB__USER: "{{ prefect.blocks.secret.supabase-db--user }}"
+          SUPABASE_DB__PASSWORD: "{{ prefect.blocks.secret.supabase-db--password }}"
+          SUPABASE_DB__PORT: "{{ prefect.blocks.secret.supabase-db--port }}"
+          QDRANT__API_KEY: "{{ prefect.blocks.secret.qdrant--api-key }}"
+          QDRANT__URL: "{{ prefect.blocks.secret.qdrant--url }}"
+          QDRANT__COLLECTION_NAME: "{{ prefect.blocks.secret.qdrant--collection-name }}"
+    schedule:
+      cron: "0 0 * * 7"

pyproject.toml ADDED Viewed

	@@ -0,0 +1,174 @@

+[project]
+name = "substack-newsletters-search-course"
+version = "1.0.0"
+description = "A pipeline to retrieve Newsletters from Substack"
+readme = "README.md"
+authors = [
+    {name = "Benito Martin"}
+]
+license = {text = "MIT License"}
+requires-python = ">=3.12"
+dependencies = [
+    "aiohttp>=3.12.15",
+    "beautifulsoup4>=4.13.5",
+    "fastapi[standard]>=0.116.1",
+    "fastembed>=0.7.2",
+    "langchain>=0.3.27",
+    "langchain-text-splitters>=0.3.9",
+    "loguru>=0.7.3",
+    "lxml>=5.4.0",
+    "openai>=1.103.0",
+    "opik>=1.8.29",
+    "prefect>=3.4.14",
+    "psutil>=7.0.0",
+    "psycopg2-binary>=2.9.10",
+    "pydantic>=2.11.7",
+    "pydantic-settings>=2.10.1",
+    "qdrant-client>=1.15.1",
+    "sqlalchemy>=2.0.43",
+    "supabase>=2.18.1",
+    "uvloop>=0.21.0",
+    "gradio>=5.45.0",
+    "markdown>=3.9",
+    "python-dotenv>=1.1.1",
+    "markdownify>=1.2.0",
+    "prefect-github>=0.3.1",
+    "requests>=2.32.5",
+]
+# [[tool.uv.index]]
+# name = "pytorch-cpu"
+# url = "https://download.pytorch.org/whl/cpu"
+# explicit = true
+[dependency-groups]
+dev = [
+    "pre-commit>=4.3.0",
+    "types-python-dateutil>=2.9.0.20250822",
+    "types-pyyaml>=6.0.12.20250822",
+    "types-requests>=2.32.4.20250809",
+]
+lint = [
+    "mypy>=1.17.1",
+    "ruff>=0.12.10",
+    "types-markdown>=3.9.0.20250906",
+    "types-python-dateutil>=2.9.0.20250822",
+    "types-pyyaml>=6.0.12.20250822",
+    "types-requests>=2.32.4.20250809",
+]
+test = [
+    "pytest>=8.4.1",
+    "pytest-asyncio>=1.1.0",
+    "responses>=0.25.8",
+]
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build]
+packages = ["src"]
+######################################
+# --- Linting & Formatting Tools --- #
+######################################
+[tool.ruff]
+# Assume Python 3.12
+target-version = "py312"
+# Same as Black.
+line-length = 100
+indent-width = 4
+# Exclude a variety of commonly ignored directories.
+exclude = [
+  ".bzr",
+  ".direnv",
+  ".eggs",
+  ".git",
+  ".git-rewrite",
+  ".hg",
+  ".mypy_cache",
+  ".nox",
+  ".pants.d",
+  ".pytype",
+  ".ruff_cache",
+  ".svn",
+  ".tox",
+  ".venv",
+  "__pypackages__",
+  "_build",
+  "buck-out",
+  "build",
+  "dist",
+  "node_modules",
+  "venv",
+]
+# Whether to show an enumeration of all fixed lint violations
+show-fixes = true
+# Enable common lint rules.
+lint.select = [
+  "B",    # flake8-bugbear
+  "E",    # pycodestyle
+  "F",    # Pyflakes1
+  "I",    # isort
+  "SIM",  # similarity
+  "UP",   # pyupgrade
+  "D102", # docstring method
+  "D103", # docstring function
+  "D414", # docstring missing section
+  "D419", # empty docstring
+#   "D101", # docstring missing class
+]
+lint.ignore = []
+# Allow autofix for all enabled rules (when `--fix`) is provided.
+lint.fixable = ["ALL"]
+lint.unfixable = []
+[tool.ruff.lint.mccabe]
+# Maximum allowed McCabe complexity.
+max-complexity = 10
+#########################
+# --- Static Typing --- #
+#########################
+[tool.mypy]
+# Use `packages` to specify the package root
+packages = ["src"]
+explicit_package_bases = true
+# All other configurations
+ignore_missing_imports = true
+disallow_untyped_defs = false
+check_untyped_defs = true
+# warn_redundant_casts = true
+warn_unused_ignores = false
+warn_return_any = false
+strict_optional = true
+# [tool.mypy]
+# # Only check src directory, with src as the package root
+# files = ["src"]  # Check from project root instead of just src
+# mypy_path = ["src"]  # Set mypy path to project root
+#########################
+# --- Testing Tools --- #
+#########################
+[tool.pytest.ini_options]
+testpaths = [ "tests" ]
+python_files = [ "test_*.py" ]
+addopts = "-ra -v -s"
+filterwarnings = [
+    "ignore::DeprecationWarning",
+    "ignore::UserWarning"
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+aiohttp
+beautifulsoup4
+fastapi[standard]
+fastembed
+langchain
+langchain-text-splitters
+loguru
+lxml
+openai
+opik
+prefect
+psutil
+psycopg2-binary
+pydantic
+pydantic-settings
+qdrant-client
+sqlalchemy
+supabase
+uvloop
+gradio
+markdown
+python-dotenv
+markdownify

src/__init__.py ADDED Viewed

File without changes

src/api/__init__.py ADDED Viewed

File without changes

src/api/exceptions/__init__.py ADDED Viewed

File without changes

src/api/exceptions/exception_handlers.py ADDED Viewed

	@@ -0,0 +1,97 @@

+from fastapi import Request
+from fastapi.exceptions import RequestValidationError
+from fastapi.responses import JSONResponse
+from qdrant_client.http.exceptions import UnexpectedResponse
+from src.utils.logger_util import setup_logging
+logger = setup_logging()
+async def validation_exception_handler(request: Request, exc: Exception) -> JSONResponse:
+    """Handle FastAPI request validation errors.
+    Args:
+        request (Request): The incoming request that caused the validation error.
+        exc (Exception): The exception instance.
+    Returns:
+        JSONResponse: A JSON response with status code 422 and error details.
+    """
+    if isinstance(exc, RequestValidationError):
+        logger.warning(f"Validation error on {request.url}: {exc.errors()}")
+        return JSONResponse(
+            status_code=422,
+            content={
+                "type": "validation_error",
+                "message": "Invalid request",
+                "details": exc.errors(),
+            },
+        )
+    logger.exception(f"Unexpected exception on {request.url}: {exc}")
+    return JSONResponse(
+        status_code=500,
+        content={
+            "type": "internal_error",
+            "message": "Internal server error",
+            "details": str(exc),
+        },
+    )
+async def qdrant_exception_handler(request: Request, exc: Exception) -> JSONResponse:
+    """Handle unexpected responses from Qdrant.
+    Args:
+        request (Request): The incoming request that caused the error.
+        exc (Exception): The exception instance.
+    Returns:
+        JSONResponse: A JSON response with status code 500 and error details.
+    """
+    if isinstance(exc, UnexpectedResponse):
+        logger.error(f"Qdrant error on {request.url}: {exc}")
+        return JSONResponse(
+            status_code=500,
+            content={
+                "type": "qdrant_error",
+                "message": "Vector store error",
+                "details": str(exc),
+            },
+        )
+    # Fallback to general internal error if exception is not UnexpectedResponse
+    logger.exception(f"Unexpected exception on {request.url}: {exc}")
+    return JSONResponse(
+        status_code=500,
+        content={
+            "type": "internal_error",
+            "message": "Internal server error",
+            "details": str(exc),
+        },
+    )
+async def general_exception_handler(request: Request, exc: Exception) -> JSONResponse:
+    """Handle all uncaught exceptions in FastAPI.
+    Args:
+        request (Request): The incoming request that caused the error.
+        exc (Exception): The exception instance.
+    Returns:
+        JSONResponse: A JSON response with status code 500 and error details.
+    """
+    logger.exception(f"Unhandled exception on {request.url}: {exc}")
+    return JSONResponse(
+        status_code=500,
+        content={
+            "type": "internal_error",
+            "message": "Internal server error",
+            "details": str(exc),
+        },
+    )

src/api/main.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import os
+from contextlib import asynccontextmanager
+import dotenv
+from fastapi import FastAPI
+from fastapi.exceptions import RequestValidationError
+from fastapi.middleware.cors import CORSMiddleware
+from qdrant_client.http.exceptions import UnexpectedResponse
+from src.api.exceptions.exception_handlers import (
+    general_exception_handler,
+    qdrant_exception_handler,
+    validation_exception_handler,
+)
+from src.api.middleware.logging_middleware import LoggingMiddleware
+from src.api.routes.health_routes import router as health_router
+from src.api.routes.search_routes import router as search_router
+from src.infrastructure.qdrant.qdrant_vectorstore import AsyncQdrantVectorStore
+from src.utils.logger_util import setup_logging
+# Load environment variables from .env file
+dotenv.load_dotenv()
+# -----------------------
+# Logger setup
+# -----------------------
+logger = setup_logging()
+# -----------------------
+# Lifespan
+# -----------------------
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Lifespan context manager to handle startup and shutdown events.
+    Initializes the Qdrant vector store on startup and ensures proper cleanup on shutdown.
+    Args:
+        app (FastAPI): The FastAPI application instance.
+    Yields:
+        None
+    Exceptions:
+        Raises exceptions if initialization or cleanup fails.
+    """
+    ## Ensure the cache directory exists and is writable (HF downloads the models here)
+    cache_dir = "/tmp/fastembed_cache"
+    os.makedirs(cache_dir, exist_ok=True)  # Ensure directory exists
+    # Force /tmp/huggingface in Google Cloud so that it's writable.
+    # This is the default cache dir of Huggingface.
+    # Otherwise it tries ~/.cache/huggingface (read-only directory) in Google Cloud.
+    # That directory is not writable.
+    logger.info(f"HF_HOME: {os.environ.get('HF_HOME', 'Not set')}")
+    logger.info(f"Cache dir: {cache_dir}, Writable: {os.access(cache_dir, os.W_OK)}")
+    cache_contents = os.listdir(cache_dir) if os.path.exists(cache_dir) else "Empty"
+    logger.info(f"Cache contents before: {cache_contents}")
+    try:
+        # creates Qdrant client internally
+        app.state.vectorstore = AsyncQdrantVectorStore(cache_dir=cache_dir)
+    except Exception as e:
+        logger.exception("Failed to initialize QdrantVectorStore")
+        raise e
+    yield
+    try:
+        await app.state.vectorstore.client.close()
+    except Exception:
+        logger.exception("Failed to close Qdrant client")
+# -----------------------
+# FastAPI application
+# -----------------------
+app = FastAPI(
+    title="Substack RAG API",
+    version="1.0",
+    description="API for Substack Retrieval-Augmented Generation (RAG) system",
+    lifespan=lifespan,
+    # root_path=root_path,
+)
+# -----------------------
+# Middleware
+# -----------------------
+# Log the allowed origins
+allowed_origins = os.getenv("ALLOWED_ORIGINS", "").split(",")
+logger.info(f"CORS allowed origins: {allowed_origins}")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=allowed_origins,  # ["*"],  # allowed_origins,
+    allow_credentials=True,
+    allow_methods=["GET", "POST", "OPTIONS"],  # only the methods the app uses
+    allow_headers=["Authorization", "Content-Type"],  # only headers needed
+)
+app.add_middleware(LoggingMiddleware)
+# -----------------------
+# Exception Handlers
+# -----------------------
+app.add_exception_handler(RequestValidationError, validation_exception_handler)
+app.add_exception_handler(UnexpectedResponse, qdrant_exception_handler)
+app.add_exception_handler(Exception, general_exception_handler)
+# -----------------------
+# Routers
+# -----------------------
+app.include_router(search_router, prefix="/search", tags=["search"])
+app.include_router(health_router, tags=["health"])
+# For Cloud Run, run the app directly
+if __name__ == "__main__":
+    import uvicorn
+    port = int(os.environ.get("PORT", 8080))  # Cloud Run provides PORT env var
+    uvicorn.run(
+        "src.api.main:app",
+        host="0.0.0.0",
+        port=port,
+        log_level="info",
+        reload=True,  # Enable auto-reload for development
+    )
+    # config = uvicorn.Config(
+    #     app,
+    #     port=port,
+    #     log_level="info",
+    #     # loop="uvloop",
+    #     # workers=1,
+    #     reload=True
+    #     )
+    # server = uvicorn.Server(config)
+    # server.run()

src/api/middleware/__init__.py ADDED Viewed

File without changes

src/api/middleware/logging_middleware.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import time
+from fastapi import Request
+from starlette.middleware.base import BaseHTTPMiddleware
+from src.utils.logger_util import setup_logging
+logger = setup_logging()
+class LoggingMiddleware(BaseHTTPMiddleware):
+    """Middleware for logging incoming HTTP requests and their responses.
+    Logs the request method, URL, client IP, and headers.
+    Excludes sensitive headers like Authorization and Cookie.
+    as well as the response status code and request duration in milliseconds.
+    Exceptions raised during request processing are logged with the full traceback.
+    Usage:
+        Add this middleware to your FastAPI app:
+            app.add_middleware(LoggingMiddleware)
+    Attributes:
+        logger: Configured logger from `setup_logging`.
+    """
+    async def dispatch(self, request: Request, call_next):
+        """Process the incoming request, log its details, and measure execution time.
+        Args:
+            request (Request): The incoming FastAPI request.
+            call_next: Callable to invoke the next middleware or route handler.
+        Returns:
+            Response: The HTTP response returned by the next middleware or route handler.
+        Raises:
+            Exception: Propagates any exceptions raised by downstream handlers after logging them.
+        """
+        start_time = time.time()
+        client_host = request.client.host if request.client else "unknown"
+        # logger.debug(f"Request headers: {request.headers}")
+        # logger.debug(f"Request cookies: {request.cookies}")
+        # Exclude sensitive headers from logging
+        safe_headers = {
+            k: v for k, v in request.headers.items() if k.lower() not in {"authorization", "cookie"}
+        }
+        logger.info(
+            f"Incoming request: {request.method} {request.url} from {client_host} "
+            f"headers={safe_headers}"
+        )
+        try:
+            response = await call_next(request)
+        except Exception:
+            duration = (time.time() - start_time) * 1000
+            logger.exception(
+                f"Request failed: {request.method} {request.url} from {client_host} "
+                f"duration={duration:.2f}ms"
+            )
+            raise
+        duration = (time.time() - start_time) * 1000
+        logger.info(
+            f"Completed request: {request.method} {request.url} from {client_host} "
+            f"status_code={response.status_code} duration={duration:.2f}ms"
+        )
+        return response

src/api/models/__init__.py ADDED Viewed

File without changes

src/api/models/api_models.py ADDED Viewed

	@@ -0,0 +1,85 @@

+from pydantic import BaseModel, Field
+# -----------------------
+# Core search result model
+# -----------------------
+class SearchResult(BaseModel):
+    title: str = Field(default="", description="Title of the article")
+    feed_author: str | None = Field(default=None, description="Author of the article")
+    feed_name: str | None = Field(default=None, description="Name of the feed/newsletter")
+    article_author: list[str] | None = Field(default=None, description="List of article authors")
+    url: str | None = Field(default=None, description="URL of the article")
+    chunk_text: str | None = Field(default=None, description="Text content of the article chunk")
+    score: float = Field(default=0.0, description="Relevance score of the article")
+# -----------------------
+# Unique titles request/response
+# -----------------------
+class UniqueTitleRequest(BaseModel):
+    query_text: str = Field(default="", description="The user query text")
+    feed_author: str | None = Field(default=None, description="Filter by author name")
+    feed_name: str | None = Field(default=None, description="Filter by feed/newsletter name")
+    article_author: list[str] | None = Field(default=None, description="List of article authors")
+    title_keywords: str | None = Field(
+        default=None, description="Keywords or phrase to match in title"
+    )
+    limit: int = Field(default=5, description="Number of results to return")
+class UniqueTitleResponse(BaseModel):
+    results: list[SearchResult] = Field(
+        default_factory=list, description="List of unique title search results"
+    )
+# -----------------------
+# Ask request model
+# -----------------------
+class AskRequest(BaseModel):
+    query_text: str = Field(default="", description="The user query text")
+    feed_author: str | None = Field(default=None, description="Filter by author name")
+    feed_name: str | None = Field(default=None, description="Filter by feed/newsletter name")
+    article_author: list[str] | None = Field(default=None, description="List of article authors")
+    title_keywords: str | None = Field(
+        default=None, description="Keywords or phrase to match in title"
+    )
+    limit: int = Field(default=5, description="Number of results to return")
+    provider: str = Field(default="OpenRouter", description="The provider to use for the query")
+    model: str | None = Field(
+        default=None, description="The specific model to use for the provider, if applicable"
+    )
+# -----------------------
+# Ask response model
+# -----------------------
+class AskResponse(BaseModel):
+    query: str = Field(default="", description="The original query text")
+    provider: str = Field(default="", description="The LLM provider used for generation")
+    answer: str = Field(default="", description="Generated answer from the LLM")
+    sources: list[SearchResult] = Field(
+        default_factory=list, description="List of source documents used in generation"
+    )
+    model: str | None = Field(
+        default=None, description="The specific model used by the provider, if available"
+    )
+    finish_reason: str | None = Field(
+        default=None, description="The reason why the generation finished, if available"
+    )
+# -----------------------
+# Streaming "response" documentation
+# -----------------------
+class AskStreamingChunk(BaseModel):
+    delta: str = Field(default="", description="Partial text generated by the LLM")
+class AskStreamingResponse(BaseModel):
+    query: str = Field(default="", description="The original query text")
+    provider: str = Field(default="", description="The LLM provider used for generation")
+    chunks: list[AskStreamingChunk] = Field(
+        default_factory=list, description="Streamed chunks of generated text"
+    )

src/api/models/provider_models.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from enum import Enum
+from pydantic import BaseModel, Field
+# OpenRouter priority sort options
+class ProviderSort(str, Enum):
+    latency = "latency"
+class ModelConfig(BaseModel):
+    # The "entry point" model — required by OpenRouter API
+    primary_model: str = Field(default="", description="The initial model requested")
+    # Optional fallback / routing models
+    candidate_models: list[str] = Field(
+        default_factory=list, description="List of candidate models for fallback or routing"
+    )
+    provider_sort: ProviderSort = Field(
+        default=ProviderSort.latency, description="How to sort candidate models"
+    )
+    stream: bool = Field(default=False, description="Whether to stream responses")
+    max_completion_tokens: int = Field(
+        default=5000, description="Maximum number of tokens for completion"
+    )
+    temperature: float = Field(default=0.0, description="Sampling temperature")
+class ModelRegistry(BaseModel):
+    models: dict[str, ModelConfig] = Field(default_factory=dict)
+    def get_config(self, provider: str) -> ModelConfig:
+        """Retrieve the ModelConfig for the specified provider.
+        Args:
+            provider (str): The name of the provider.
+        Returns:resp
+            ModelConfig: The ModelConfig instance for the specified provider.
+        Raises:
+            ValueError: If the provider is not found in the registry.
+        """
+        provider_lower = provider.lower()
+        if provider_lower not in self.models:
+            raise ValueError(f"ModelConfig not found for provider: {provider}")
+        return self.models[provider_lower]
+# -----------------------
+# Default registry
+# -----------------------
+# Default ModelConfigs for models
+# OpenRouter models show low latency and are highly ranked by OpenRouter
+MODEL_REGISTRY = ModelRegistry(
+    models={
+        "openrouter": ModelConfig(
+            primary_model="openai/gpt-oss-20b:free",
+            candidate_models=[
+                # "meta-llama/llama-4-scout:free",
+                "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
+                # "meta-llama/llama-3.3-8b-instruct:free",
+                # "openai/gpt-oss-20b:free",
+                # "openai/gpt-oss-120b:free",
+                "nvidia/nemotron-nano-9b-v2:free",
+            ],
+        ),
+        # "openai": ModelConfig(primary_model="gpt-4o-mini"),
+        "huggingface": ModelConfig(primary_model="deepseek-ai/DeepSeek-R1"),
+    }
+)
+# MODELS WITH LOGPROBS SUPPORT
+# deepseek/deepseek-r1-0528-qwen3-8b:free
+# mistralai/mistral-small-3.2-24b-instruct:free

src/api/routes/__init__.py ADDED Viewed

File without changes

src/api/routes/health_routes.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import time
+from fastapi import APIRouter, Request
+from qdrant_client.http.exceptions import UnexpectedResponse
+router = APIRouter()
+start_time = time.time()
+@router.get("/")
+async def root():
+    """Root endpoint.
+    Returns a simple JSON response indicating that the API is running.
+    Returns:
+        dict: {"message": "Hello! API is running."}
+    """
+    return {"message": "Hello! API is running."}
+@router.get("/health")
+async def health_check():
+    """Liveness check endpoint.
+    Returns basic service info, uptime, and environment variables.
+    """
+    uptime = int(time.time() - start_time)
+    return {
+        "status": "ok",
+        "uptime_seconds": uptime,
+    }
+@router.get("/ready")
+async def readiness_check(request: Request):
+    """Readiness check endpoint.
+    Verifies whether the service is ready to handle requests by
+    checking connectivity to Qdrant.
+    """
+    try:
+        vectorstore = request.app.state.vectorstore
+        # a lightweight check: list_collections is cheap
+        await vectorstore.client.get_collections()
+        return {"status": "ready"}
+    except UnexpectedResponse:
+        return {"status": "not ready", "reason": "Qdrant unexpected response"}
+    except Exception as e:
+        return {"status": "not ready", "reason": str(e)}

src/api/routes/search_routes.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import asyncio
+from fastapi import APIRouter, Request
+from fastapi.responses import StreamingResponse
+from src.api.models.api_models import (
+    AskRequest,
+    AskResponse,
+    AskStreamingResponse,
+    SearchResult,
+    UniqueTitleRequest,
+    UniqueTitleResponse,
+)
+from src.api.services.generation_service import generate_answer, get_streaming_function
+from src.api.services.search_service import query_unique_titles, query_with_filters
+router = APIRouter()
+@router.post("/unique-titles", response_model=UniqueTitleResponse)
+async def search_unique(request: Request, params: UniqueTitleRequest):
+    """Returns unique article titles based on a query and optional filters.
+    Deduplicates results by article title.
+    Args:
+        request: FastAPI request object.
+        params: UniqueTitleRequest with search parameters.
+    Returns:
+        UniqueTitleResponse: List of unique titles.
+    """
+    results = await query_unique_titles(
+        request=request,
+        query_text=params.query_text,
+        feed_author=params.feed_author,
+        feed_name=params.feed_name,
+        title_keywords=params.title_keywords,
+        limit=params.limit,
+    )
+    return {"results": results}
+@router.post("/ask", response_model=AskResponse)
+async def ask_with_generation(request: Request, ask: AskRequest):
+    """Non-streaming question-answering endpoint using vector search and LLM.
+    Workflow:
+        1. Retrieve relevant documents (possibly duplicate titles for richer context).
+        2. Generate an answer with the selected LLM provider.
+    Args:
+        request: FastAPI request object.
+        ask: AskRequest with query, provider, and limit.
+    Returns:
+        AskResponse: Generated answer and source documents.
+    """
+    # Step 1: Retrieve relevant documents with filters
+    results: list[SearchResult] = await query_with_filters(
+        request,
+        query_text=ask.query_text,
+        feed_author=ask.feed_author,
+        feed_name=ask.feed_name,
+        title_keywords=ask.title_keywords,
+        limit=ask.limit,
+    )
+    # Step 2: Generate an answer
+    answer_data = await generate_answer(
+        query=ask.query_text, contexts=results, provider=ask.provider, selected_model=ask.model
+    )
+    return AskResponse(
+        query=ask.query_text,
+        provider=ask.provider,
+        answer=answer_data["answer"],
+        sources=results,
+        model=answer_data.get("model", None),
+        finish_reason=answer_data.get("finish_reason", None),
+    )
+@router.post("/ask/stream", response_model=AskStreamingResponse)
+async def ask_with_generation_stream(request: Request, ask: AskRequest):
+    """Streaming question-answering endpoint using vector search and LLM.
+    Workflow:
+        1. Retrieve relevant documents (possibly duplicate titles for richer context).
+        2. Stream generated answer with the selected LLM provider.
+    Args:
+        request: FastAPI request object.
+        ask: AskRequest with query, provider, and limit.
+    Returns:
+        StreamingResponse: Yields text chunks as plain text.
+    """
+    # Step 1: Retrieve relevant documents with filters
+    results: list[SearchResult] = await query_with_filters(
+        request,
+        query_text=ask.query_text,
+        feed_author=ask.feed_author,
+        feed_name=ask.feed_name,
+        title_keywords=ask.title_keywords,
+        limit=ask.limit,
+    )
+    # Step 2: Get the streaming generator
+    stream_func = get_streaming_function(
+        provider=ask.provider, query=ask.query_text, contexts=results, selected_model=ask.model
+    )
+    # Step 3: Wrap streaming generator
+    async def stream_generator():
+        async for delta in stream_func():
+            yield delta
+            await asyncio.sleep(0)  # allow event loop to handle other tasks
+    return StreamingResponse(stream_generator(), media_type="text/plain")

src/api/services/__init__.py ADDED Viewed

File without changes

src/api/services/generation_service.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from collections.abc import AsyncGenerator, Callable
+import opik
+from src.api.models.api_models import SearchResult
+from src.api.models.provider_models import MODEL_REGISTRY
+from src.api.services.providers.huggingface_service import generate_huggingface, stream_huggingface
+from src.api.services.providers.openai_service import generate_openai, stream_openai
+from src.api.services.providers.openrouter_service import generate_openrouter, stream_openrouter
+from src.api.services.providers.utils.evaluation_metrics import evaluate_metrics
+from src.api.services.providers.utils.prompts import build_research_prompt
+from src.utils.logger_util import setup_logging
+logger = setup_logging()
+# -----------------------
+# Non-streaming answer generator
+# -----------------------
+@opik.track(name="generate_answer")
+async def generate_answer(
+    query: str,
+    contexts: list[SearchResult],
+    provider: str = "openrouter",
+    selected_model: str | None = None,
+) -> dict:
+    """Generate a non-streaming answer using the specified LLM provider.
+    Args:
+        query (str): The user's research query.
+        contexts (list[SearchResult]): List of context documents with metadata.
+        provider (str): The LLM provider to use ("openai", "openrouter", "huggingface").
+    Returns:
+        dict: {"answer": str, "sources": list[str], "model": Optional[str]}
+    """
+    prompt = build_research_prompt(contexts, query=query)
+    model_used: str | None = None
+    finish_reason: str | None = None
+    provider_lower = provider.lower()
+    config = MODEL_REGISTRY.get_config(provider_lower)
+    if provider_lower == "openai":
+        answer, model_used = await generate_openai(prompt, config=config)
+    elif provider_lower == "openrouter":
+        try:
+            answer, model_used, finish_reason = await generate_openrouter(
+                prompt, config=config, selected_model=selected_model
+            )
+            metrics_results = await evaluate_metrics(answer, prompt)
+            logger.info(f"G-Eval Faithfulness → {metrics_results}")
+        except Exception as e:
+            logger.error(f"Error occurred while generating answer from {provider_lower}: {e}")
+            raise
+    elif provider_lower == "huggingface":
+        answer, model_used = await generate_huggingface(prompt, config=config)
+    else:
+        raise ValueError(f"Unknown provider: {provider}")
+    return {
+        "answer": answer,
+        "sources": [r.url for r in contexts],
+        "model": model_used,
+        "finish_reason": finish_reason,
+    }
+# -----------------------
+# Streaming answer generator
+# -----------------------
+@opik.track(name="get_streaming_function")
+def get_streaming_function(
+    provider: str,
+    query: str,
+    contexts: list[SearchResult],
+    selected_model: str | None = None,
+) -> Callable[[], AsyncGenerator[str, None]]:
+    """Get a streaming function for the specified LLM provider.
+    Args:
+        provider (str): The LLM provider to use ("openai", "openrouter", "huggingface").
+        query (str): The user's research query.
+        contexts (list[SearchResult]): List of context documents with metadata.
+    Returns:
+        Callable[[], AsyncGenerator[str, None]]: A function that returns an async generator yielding
+        response chunks.
+    """
+    prompt = build_research_prompt(contexts, query=query)
+    provider_lower = provider.lower()
+    config = MODEL_REGISTRY.get_config(provider_lower)
+    logger.info(f"Using model config: {config}")
+    async def stream_gen() -> AsyncGenerator[str, None]:
+        """Asynchronous generator that streams response chunks from the specified provider.
+        Yields:
+            str: The next chunk of the response.
+        """
+        buffer = []  # collect all chunks here
+        if provider_lower == "openai":
+            async for chunk in stream_openai(prompt, config=config):
+                buffer.append(chunk)
+                yield chunk
+        elif provider_lower == "openrouter":
+            try:
+                async for chunk in stream_openrouter(
+                    prompt, config=config, selected_model=selected_model
+                ):
+                    buffer.append(chunk)
+                    yield chunk
+                full_output = "".join(buffer)
+                metrics_results = await evaluate_metrics(full_output, prompt)
+                logger.info(f"Metrics results: {metrics_results}")
+            except Exception as e:
+                logger.error(f"Error occurred while streaming from {provider}: {e}")
+                yield "__error__"
+        elif provider_lower == "huggingface":
+            async for chunk in stream_huggingface(prompt, config=config):
+                buffer.append(chunk)
+                yield chunk
+        else:
+            raise ValueError(f"Unknown provider: {provider}")
+    return stream_gen

src/api/services/providers/__init__.py ADDED Viewed

File without changes

src/api/services/providers/huggingface_service.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from collections.abc import AsyncGenerator
+from huggingface_hub import AsyncInferenceClient
+from src.api.models.provider_models import ModelConfig
+from src.api.services.providers.utils.messages import build_messages
+from src.config import settings
+from src.utils.logger_util import setup_logging
+logger = setup_logging()
+# -----------------------
+# Hugging Face client
+# -----------------------
+hf_key = settings.hugging_face.api_key
+hf_client = AsyncInferenceClient(provider="auto", api_key=hf_key)
+async def generate_huggingface(prompt: str, config: ModelConfig) -> tuple[str, None]:
+    """Generate a response from Hugging Face for a given prompt and model configuration.
+    Args:
+        prompt (str): The input prompt.
+        config (ModelConfig): The model configuration.
+    Returns:
+        tuple[str, None]: The generated response and None for model and finish reason.
+    """
+    resp = await hf_client.chat.completions.create(
+        model=config.primary_model,
+        messages=build_messages(prompt),
+        temperature=config.temperature,
+        max_tokens=config.max_completion_tokens,
+    )
+    return resp.choices[0].message.content or "", None
+def stream_huggingface(prompt: str, config: ModelConfig) -> AsyncGenerator[str, None]:
+    """Stream a response from Hugging Face for a given prompt and model configuration.
+    Args:
+        prompt (str): The input prompt.
+        config (ModelConfig): The model configuration.
+    Returns:
+        AsyncGenerator[str, None]: An asynchronous generator yielding response chunks.
+    """
+    async def gen() -> AsyncGenerator[str, None]:
+        stream = await hf_client.chat.completions.create(
+            model=config.primary_model,
+            messages=build_messages(prompt),
+            temperature=config.temperature,
+            max_tokens=config.max_completion_tokens,
+            stream=True,
+        )
+        async for chunk in stream:
+            delta_text = getattr(chunk.choices[0].delta, "content", None)
+            if delta_text:
+                yield delta_text
+    return gen()

src/api/services/providers/openai_service.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import os
+from collections.abc import AsyncGenerator
+from openai import AsyncOpenAI
+from opik.integrations.openai import track_openai
+from src.api.models.provider_models import ModelConfig
+from src.api.services.providers.utils.messages import build_messages
+from src.config import settings
+from src.utils.logger_util import setup_logging
+logger = setup_logging()
+# -----------------------
+# OpenAI client
+# -----------------------
+openai_key = settings.openai.api_key
+async_openai_client = AsyncOpenAI(api_key=openai_key)
+# -----------------------
+# Opik Observability
+# -----------------------
+os.environ["OPIK_API_KEY"] = settings.opik.api_key
+os.environ["OPIK_PROJECT_NAME"] = settings.opik.project_name
+async_openai_client = track_openai(async_openai_client)
+async def generate_openai(prompt: str, config: ModelConfig) -> tuple[str, None]:
+    """Generate a response from OpenAI for a given prompt and model configuration.
+    Args:
+        prompt (str): The input prompt.
+        config (ModelConfig): The model configuration.
+    Returns:
+        tuple[str, None]: The generated response and None for model and finish reason.
+    """
+    ### NOTES ON PARAMETERS
+    # logprobs: Include the log probabilities on the logprobs most likely tokens,
+    #   as well the chosen tokens.
+    # temperature: 0.0 (more deterministic) to 1.0 (more creative)
+    # top_p: 0.0 to 1.0, nucleus sampling, 1.0 means no nucleus sampling
+    #   0.1 means only the tokens comprising the top 10% probability mass are considered.
+    # presence_penalty: -2.0 to 2.0, positive values penalize new tokens based
+    #   on whether they appear in the text so far
+    #   (Encourages model to use more context from other chunks)
+    # frequency_penalty: -2.0 to 2.0, positive values penalize new tokens based
+    #   on their existing frequency in the text so far (helpful if context chunks overlap.)
+    resp = await async_openai_client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=build_messages(prompt),
+        temperature=config.temperature,
+        max_completion_tokens=config.max_completion_tokens,
+        # logprobs=True,
+        # top_logprobs=3,
+        # top_p=1.0,
+        # presence_penalty=0.3,
+        # frequency_penalty=0.3,
+    )
+    return resp.choices[0].message.content or "", None
+def stream_openai(prompt: str, config: ModelConfig) -> AsyncGenerator[str, None]:
+    """Stream a response from OpenAI for a given prompt and model configuration.
+    Args:
+        prompt (str): The input prompt.
+        config (ModelConfig): The model configuration.
+    Returns:
+        AsyncGenerator[str, None]: An asynchronous generator yielding response chunks.
+    """
+    async def gen() -> AsyncGenerator[str, None]:
+        stream = await async_openai_client.chat.completions.create(
+            model=config.primary_model,
+            messages=build_messages(prompt),
+            temperature=config.temperature,
+            max_completion_tokens=config.max_completion_tokens,
+            stream=True,
+        )
+        last_finish_reason = None
+        async for chunk in stream:
+            delta_text = getattr(chunk.choices[0].delta, "content", None)
+            if delta_text:
+                yield delta_text
+            # Reasons: tool_calls, stop, length, content_filter, error
+            finish_reason = getattr(chunk.choices[0], "finish_reason", None)
+            if finish_reason:
+                last_finish_reason = finish_reason
+        logger.warning(f"Final finish_reason: {last_finish_reason}")
+        # Yield a chunk to trigger truncation warning in UI
+        if last_finish_reason == "length":
+            yield "__truncated__"
+    return gen()
+# -----------------------
+# Log Probs Parameter Experiment
+# -----------------------
+# import math
+# async def generate_openai(prompt: str, config: ModelConfig) -> str:
+#     """
+#     Generate a response from OpenAI for a given prompt and model configuration,
+#     and calculate the average log probability of the generated tokens.
+#     Returns:
+#         tuple[str, float | None]: Generated response and average log probability
+#     """
+#     resp = await async_openai_client.chat.completions.create(
+#         model="gpt-4o-mini",
+#         messages=build_messages(prompt),
+#         temperature=config.temperature,
+#         max_completion_tokens=config.max_completion_tokens,
+#         logprobs=True,          # include token log probabilities
+#         top_logprobs=3,         # top 3 alternatives for each token
+#         top_p=1.0,
+#         presence_penalty=0.3,
+#         frequency_penalty=0.3,
+#     )
+#     content = resp.choices[0].message.content or ""
+#     token_logprobs_list = resp.choices[0].logprobs
+#     tokens_logprobs = []
+#     token_probs = []
+#     if (
+#         token_logprobs_list is not None
+#         and hasattr(token_logprobs_list, "content")
+#         and isinstance(token_logprobs_list.content, list)
+#         and len(token_logprobs_list.content) > 0
+#     ):
+# for token_info in token_logprobs_list.content:
+#     if token_info is not None and hasattr(token_info, "logprob") \
+#                                 and hasattr(token_info, "token"):
+#         tokens_logprobs.append(token_info.logprob)
+#         token_probs.append((token_info.token, math.exp(token_info.logprob)))
+#     if tokens_logprobs:
+#         avg_logprob = sum(tokens_logprobs) / len(tokens_logprobs)
+#         avg_prob = math.exp(avg_logprob)
+#         # Sort by probability
+#         most_confident = sorted(token_probs, key=lambda x: x[1], reverse=True)[:5]
+#         least_confident = sorted(token_probs, key=lambda x: x[1])[:5]
+#         logger.info(f"Temperature: {config.temperature}")
+#         logger.info(f"Max completion tokens: {config.max_completion_tokens}")
+#         logger.info(f"Average log probability: {avg_logprob:.4f} "
+#                     f"(≈ {avg_prob:.2%} avg token prob)")
+#         logger.info("Top 5 most confident tokens:")
+#         for tok, prob in most_confident:
+#             logger.info(f"  '{tok}' → {prob:.2%}")
+#         logger.info("Top 5 least confident tokens:")
+#         for tok, prob in least_confident:
+#             logger.info(f"  '{tok}' → {prob:.2%}")
+#     else:
+#         logger.warning("No logprob information found in response.")
+#     breakpoint()
+#     return content

src/api/services/providers/openrouter_service.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import os
+from collections.abc import AsyncGenerator
+from typing import Any
+import opik
+from openai import AsyncOpenAI
+from opik.integrations.openai import track_openai
+from src.api.models.provider_models import ModelConfig
+from src.api.services.providers.utils.messages import build_messages
+from src.config import settings
+from src.utils.logger_util import setup_logging
+logger = setup_logging()
+# -----------------------
+# OpenRouter client
+# -----------------------
+openrouter_key = settings.openrouter.api_key
+openrouter_url = settings.openrouter.api_url
+async_openrouter_client = AsyncOpenAI(base_url=openrouter_url, api_key=openrouter_key)
+# -----------------------
+# Opik Observability
+# -----------------------
+os.environ["OPIK_API_KEY"] = settings.opik.api_key
+os.environ["OPIK_PROJECT_NAME"] = settings.opik.project_name
+async_openrouter_client = track_openai(async_openrouter_client)
+# -----------------------
+# Helper to build extra body for OpenRouter
+# -----------------------
+@opik.track(name="build_openrouter_extra")
+def build_openrouter_extra(config: ModelConfig) -> dict[str, Any]:
+    """Build the extra body for OpenRouter API requests based on the ModelConfig.
+    Args:
+        config (ModelConfig): The model configuration.
+    Returns:
+        dict[str, Any]: The extra body for OpenRouter API requests.
+    """
+    body = {"provider": {"sort": config.provider_sort.value}}
+    if config.candidate_models:
+        body["models"] = list(config.candidate_models)  # type: ignore
+    return body
+# -----------------------
+# Core OpenRouter functions
+# -----------------------
+@opik.track(name="generate_openrouter")
+async def generate_openrouter(
+    prompt: str,
+    config: ModelConfig,
+    selected_model: str | None = None,
+) -> tuple[str, str | None, str | None]:
+    """Generate a response from OpenRouter for a given prompt and model configuration.
+    Args:
+        prompt (str): The input prompt.
+        config (ModelConfig): The model configuration.
+        selected_model (str | None): Optional specific model to use.
+    Returns:
+        tuple[str, str | None, str | None]: The generated response, model used, and finish reason.
+    """
+    model_to_use = selected_model or config.primary_model
+    resp = await async_openrouter_client.chat.completions.create(
+        model=model_to_use,
+        messages=build_messages(prompt),
+        temperature=config.temperature,
+        max_completion_tokens=config.max_completion_tokens,
+        extra_body=build_openrouter_extra(config),
+    )
+    answer = resp.choices[0].message.content or ""
+    # Reasons: tool_calls, stop, length, content_filter, error
+    finish_reason = getattr(resp.choices[0], "native_finish_reason", None)
+    model_used = getattr(resp.choices[0], "model", None) or getattr(resp, "model", None)
+    logger.info(f"OpenRouter non-stream finish_reason: {finish_reason}")
+    if finish_reason == "length":
+        logger.warning("Response was truncated by token limit.")
+    model_used = getattr(resp.choices[0], "model", None) or getattr(resp, "model", None)
+    logger.info(f"OpenRouter non-stream finished. Model used: {model_used}")
+    return answer, model_used, finish_reason
+@opik.track(name="stream_openrouter")
+def stream_openrouter(
+    prompt: str,
+    config: ModelConfig,
+    selected_model: str | None = None,
+) -> AsyncGenerator[str, None]:
+    """Stream a response from OpenRouter for a given prompt and model configuration.
+    Args:
+        prompt (str): The input prompt.
+        config (ModelConfig): The model configuration.
+        selected_model (str | None): Optional specific model to use.
+    Returns:
+        AsyncGenerator[str, None]: An asynchronous generator yielding response chunks.
+    """
+    async def gen() -> AsyncGenerator[str, None]:
+        """Generate response chunks from OpenRouter.
+        Yields:
+            AsyncGenerator[str, None]: Response chunks.
+        """
+        model_to_use = selected_model or config.primary_model
+        stream = await async_openrouter_client.chat.completions.create(
+            model=model_to_use,
+            messages=build_messages(prompt),
+            temperature=config.temperature,
+            max_completion_tokens=config.max_completion_tokens,
+            extra_body=build_openrouter_extra(config),
+            stream=True,
+        )
+        try:
+            first_chunk = await stream.__anext__()
+            model_used = getattr(first_chunk, "model", None)
+            if model_used:
+                yield f"__model_used__:{model_used}"
+            delta_text = getattr(first_chunk.choices[0].delta, "content", None)
+            if delta_text:
+                yield delta_text
+        except StopAsyncIteration:
+            return
+        last_finish_reason = None
+        async for chunk in stream:
+            delta_text = getattr(chunk.choices[0].delta, "content", None)
+            if delta_text:
+                yield delta_text
+            # Reasons: tool_calls, stop, length, content_filter, error
+            finish_reason = getattr(chunk.choices[0], "finish_reason", None)
+            if finish_reason:
+                last_finish_reason = finish_reason
+        logger.info(f"OpenRouter stream finished. Model used: {model_used}")
+        logger.warning(f"Final finish_reason: {last_finish_reason}")
+        # Yield a chunk to trigger truncation warning in UI
+        if last_finish_reason == "length":
+            yield "__truncated__"
+    return gen()
+# ---------------------------------------
+# Test Log Probs and Confidence Visualization
+# ---------------------------------------
+# import math
+# def visualize_token_confidence(token_probs: list[tuple[str, float]]):
+#     """Print token probabilities as ASCII bars in the terminal."""
+#     for tok, prob in token_probs:
+#         bar_length = int(prob * 40)  # scale bar to 40 chars max
+#         bar = "#" * bar_length
+#         print(f"{tok:>12}: [{bar:<40}] {prob:.2%}")
+# async def generate_openrouter(
+#     prompt: str,
+#     config: ModelConfig,
+#     max_tokens: int | None = None) -> tuple[str, str | None, str | None]:
+#     """Generate a response from OpenRouter
+#     and log token-level statistics with confidence evolution."""
+#     resp = await async_openrouter_client.chat.completions.create(
+#         model=config.primary_model,
+#         messages=build_messages(prompt),
+#         temperature=config.temperature,
+#         max_completion_tokens=max_tokens or config.max_completion_tokens,
+#         extra_body={**build_openrouter_extra(config), "logprobs": True, "top_logprobs": 3},
+#     )
+#     choice = resp.choices[0]
+#     content = choice.message.content or ""
+#     finish_reason = getattr(choice, "native_finish_reason", None)
+#     model_used = getattr(choice, "model", None) or getattr(resp, "model", None)
+#     logger.info(f"OpenRouter non-stream finish_reason: {finish_reason}")
+#     if finish_reason == "length":
+#         logger.warning("Response was truncated by token limit.")
+#     # Extract logprobs
+#     token_logprobs_list = choice.logprobs
+#     tokens_logprobs = []
+#     token_probs = []
+#     if token_logprobs_list and hasattr(token_logprobs_list, "content"):
+#         for token_info in token_logprobs_list.content:
+#             tok = token_info.token
+#             logprob = token_info.logprob
+#             prob = math.exp(logprob)
+#             tokens_logprobs.append(logprob)
+#             token_probs.append((tok, prob))
+#     if tokens_logprobs:
+#         avg_logprob = sum(tokens_logprobs) / len(tokens_logprobs)
+#         avg_prob = math.exp(avg_logprob)
+#         most_confident = sorted(token_probs, key=lambda x: x[1], reverse=True)[:5]
+#         least_confident = sorted(token_probs, key=lambda x: x[1])[:5]
+#         logger.info(f"Temperature: {config.temperature}")
+#         logger.info(f"Max completion tokens: {config.max_completion_tokens}")
+# logger.info(f"Average log probability: {avg_logprob:.4f} "
+#             f"(≈ {avg_prob:.2%} avg token prob)")"
+#         logger.info("Top 5 most confident tokens:")
+#         for tok, prob in most_confident:
+#             logger.info(f"  '{tok}' → {prob:.2%}")
+#         logger.info("Top 5 least confident tokens:")
+#         for tok, prob in least_confident:
+#             logger.info(f"  '{tok}' → {prob:.2%}")
+#         # Terminal visualization
+#         print("\nToken confidence evolution:")
+#         visualize_token_confidence(token_probs,)
+#     else:
+#         logger.warning("No logprob information found in response.")
+#     return content, model_used, finish_reason

src/api/services/providers/utils/__init__.py ADDED Viewed

File without changes

src/api/services/providers/utils/evaluation_metrics.py ADDED Viewed

	@@ -0,0 +1,110 @@

+from opik.evaluation import models
+from opik.evaluation.metrics import GEval
+from src.config import settings
+from src.utils.logger_util import setup_logging
+logger = setup_logging()
+# -----------------------
+# Evaluation helper
+# -----------------------
+async def evaluate_metrics(output: str, context: str) -> dict:
+    """Evaluate multiple metrics for a given LLM output.
+    Metrics included: faithfulness, coherence, completeness.
+    Args:
+        output (str): The LLM-generated output to evaluate.
+        context (str): The context used to generate the output.
+    Returns:
+        dict: A dictionary with metric names as keys and their evaluation results as values.
+    """
+    settings.openai.api_key = None
+    logger.info(f"OpenAI key is not set: {settings.openai.api_key is None}")
+    if not output.strip():
+        logger.warning("Output is empty. Skipping evaluation.")
+        return {
+            "faithfulness": {"score": 0.0, "reason": "Empty output", "failed": True},
+            "coherence": {"score": 0.0, "reason": "Empty output", "failed": True},
+            "completeness": {"score": 0.0, "reason": "Empty output", "failed": True},
+        }
+    if not getattr(settings.openai, "api_key", None):
+        logger.info("OpenAI API key not set. Skipping metrics evaluation.")
+        return {
+            "faithfulness": {"score": None, "reason": "Skipped – no API key", "failed": True},
+            "coherence": {"score": None, "reason": "Skipped – no API key", "failed": True},
+            "completeness": {"score": None, "reason": "Skipped – no API key", "failed": True},
+        }
+    judge_model = models.LiteLLMChatModel(
+        model_name="gpt-4o",  # gpt-4o, gpt-5-mini
+        api_key=settings.openai.api_key,
+    )
+    metric_configs = {
+        "faithfulness": (
+            (
+                "You are an expert judge tasked with evaluating whether an AI-generated answer is "
+                "faithful to the provided Substack excerpts."
+            ),
+            (
+                "The OUTPUT must not introduce new information and beyond "
+                "what is contained in the CONTEXT. "
+                "All claims in the OUTPUT should be directly supported by the CONTEXT."
+            ),
+        ),
+        "coherence": (
+            (
+                "You are an expert judge tasked with evaluating whether an AI-generated answer is "
+                "logically coherent."
+            ),
+            "The answer should be well-structured, readable, and maintain consistent reasoning.",
+        ),
+        "completeness": (
+            (
+                "You are an expert judge tasked with evaluating whether an AI-generated answer "
+                "covers all relevant aspects of the query."
+            ),
+            (
+                "The answer should include all major points from the CONTEXT "
+                "and address the user's "
+                "query "
+                "fully."
+            ),
+        ),
+    }
+    results = {}
+    for name, (task_intro, eval_criteria) in metric_configs.items():
+        try:
+            metric = GEval(
+                task_introduction=task_intro,
+                evaluation_criteria=eval_criteria,
+                model=judge_model,
+                name=f"G-Eval {name.capitalize()}",
+            )
+            eval_input = f"""
+            OUTPUT: {output}
+            CONTEXT: {context}
+            """
+            score_result = await metric.ascore(eval_input)
+            results[name] = {
+                "score": score_result.value,
+                "reason": score_result.reason,
+                "failed": score_result.scoring_failed,
+            }
+        except Exception as e:
+            logger.warning(f"G-Eval {name} failed: {e}")
+            results[name] = {"score": 0.0, "reason": str(e), "failed": True}
+    return results

src/api/services/providers/utils/messages.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
+def build_messages(
+    prompt: str,
+) -> list[ChatCompletionSystemMessageParam | ChatCompletionUserMessageParam]:
+    """Build a list of messages for the OpenAI chat API.
+    Args:
+        prompt (str): The user prompt.
+    Returns:
+        list[ChatCompletionSystemMessageParam | ChatCompletionUserMessageParam]: A list of messages.
+    """
+    return [
+        ChatCompletionUserMessageParam(role="user", content=prompt),
+    ]

src/api/services/providers/utils/prompts.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import opik
+from src.api.models.api_models import SearchResult
+from src.api.models.provider_models import ModelConfig
+config = ModelConfig()
+PROMPT = """
+You are a skilled research assistant specialized in analyzing Substack newsletters.
+Respond to the user’s query using the provided context from these articles,
+that is retrieved from a vector database without relying on outside knowledge or assumptions.
+### Output Rules:
+- Write a detailed, structured answer using **Markdown** (headings, bullet points,
+  short or long paragraphs as appropriate).
+- Use up to **{tokens} tokens** without exceeding this limit.
+- Only include facts from the provided context from the articles.
+- Attribute each fact to the correct author(s) and source, and include **clickable links**.
+- If the article author and feed author differ, mention both.
+- There is no need to mention that you based your answer on the provided context.
+- But if no relevant information exists, clearly state this and provide a fallback suggestion.
+- At the very end, include a **funny quote** and wish the user a great day.
+### Query:
+{query}
+### Context Articles:
+{context_texts}
+### Final Answer:
+"""
+# Create a new prompt
+prompt = opik.Prompt(
+    name="substack_research_assistant", prompt=PROMPT, metadata={"environment": "development"}
+)
+def build_research_prompt(
+    contexts: list[SearchResult],
+    query: str = "",
+    tokens: int = config.max_completion_tokens,
+) -> str:
+    """Construct a research-focused LLM prompt using the given query
+    and supporting context documents.
+    The prompt enforces Markdown formatting, citations, and strict length guidance.
+    Args:
+        contexts (list[SearchResult]): List of context documents with metadata.
+        query (str): The user's research query.
+        tokens (int): Maximum number of tokens for the LLM response.
+    Returns:
+        str: The formatted prompt ready for LLM consumption.
+    """
+    # Join all retrieved contexts into a readable format
+    context_texts = "\n\n".join(
+        (
+            f"- Feed Name: {r.feed_name}\n"
+            f"  Article Title: {r.title}\n"
+            f"  Article Author(s): {r.article_author}\n"
+            f"  Feed Author: {r.feed_author}\n"
+            f"  URL: {r.url}\n"
+            f"  Snippet: {r.chunk_text}"
+        )
+        for r in contexts
+    )
+    return PROMPT.format(
+        query=query,
+        context_texts=context_texts,
+        tokens=tokens,
+    )

src/api/services/search_service.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import opik
+from fastapi import Request
+from qdrant_client.models import (
+    FieldCondition,
+    Filter,
+    Fusion,
+    FusionQuery,
+    MatchText,
+    MatchValue,
+    Prefetch,
+)
+from src.api.models.api_models import SearchResult
+from src.infrastructure.qdrant.qdrant_vectorstore import AsyncQdrantVectorStore
+from src.utils.logger_util import setup_logging
+logger = setup_logging()
+@opik.track(name="query_with_filters")
+async def query_with_filters(
+    request: Request,
+    query_text: str = "",
+    feed_author: str | None = None,
+    feed_name: str | None = None,
+    title_keywords: str | None = None,
+    limit: int = 5,
+) -> list[SearchResult]:
+    """Query the vector store with optional filters and return search results.
+    Performs a hybrid dense + sparse search on Qdrant and applies filters based
+    on feed author, feed name, and title keywords. Results are deduplicated by point ID.
+    Args:
+        request (Request): FastAPI request object containing the vector store in app.state.
+        query_text (str): Text query to search for.
+        feed_author (str | None): Optional filter for the feed author.
+        feed_name (str | None): Optional filter for the feed name.
+        title_keywords (str | None): Optional filter for title keywords.
+        limit (int): Maximum number of results to return.
+    Returns:
+        list[SearchResult]:
+            List of search results containing title, feed info, URL, chunk text, and score.
+    """
+    vectorstore: AsyncQdrantVectorStore = request.app.state.vectorstore
+    dense_vector = vectorstore.dense_vectors([query_text])[0]
+    sparse_vector = vectorstore.sparse_vectors([query_text])[0]
+    # Build filter conditions
+    conditions: list[FieldCondition] = []
+    if feed_author:
+        conditions.append(FieldCondition(key="feed_author", match=MatchValue(value=feed_author)))
+    if feed_name:
+        conditions.append(FieldCondition(key="feed_name", match=MatchValue(value=feed_name)))
+    if title_keywords:
+        conditions.append(
+            FieldCondition(key="title", match=MatchText(text=title_keywords.strip().lower()))
+        )
+    query_filter = Filter(must=conditions) if conditions else None  # type: ignore
+    fetch_limit = max(1, limit) * 100
+    logger.info(f"Fetching up to {fetch_limit} points for unique Ids.")
+    response = await vectorstore.client.query_points(
+        collection_name=vectorstore.collection_name,
+        query=FusionQuery(fusion=Fusion.RRF),
+        prefetch=[
+            Prefetch(query=dense_vector, using="Dense", limit=fetch_limit, filter=query_filter),
+            Prefetch(query=sparse_vector, using="Sparse", limit=fetch_limit, filter=query_filter),
+        ],
+        query_filter=query_filter,
+        limit=fetch_limit,
+    )
+    # Deduplicate by point ID
+    seen_ids: set[str] = set()
+    results: list[SearchResult] = []
+    for point in response.points:
+        if point.id in seen_ids:
+            continue
+        seen_ids.add(point.id)  # type: ignore
+        payload = point.payload or {}
+        results.append(
+            SearchResult(
+                title=payload.get("title", ""),
+                feed_author=payload.get("feed_author"),
+                feed_name=payload.get("feed_name"),
+                article_author=payload.get("article_authors"),
+                url=payload.get("url"),
+                chunk_text=payload.get("chunk_text"),
+                score=point.score,
+            )
+        )
+    results = results[:limit]
+    logger.info(f"Returning {len(results)} results for matching query '{query_text}'")
+    return results
+@opik.track(name="query_unique_titles")
+async def query_unique_titles(
+    request: Request,
+    query_text: str,
+    feed_author: str | None = None,
+    feed_name: str | None = None,
+    title_keywords: str | None = None,
+    limit: int = 5,
+) -> list[SearchResult]:
+    """Query the vector store and return only unique titles.
+    Performs a hybrid dense + sparse search with optional filters and dynamically
+    increases the fetch limit to account for duplicates. Deduplicates results
+    by article title.
+    Args:
+        request (Request): FastAPI request object containing the vector store in app.state.
+        query_text (str): Text query to search for.
+        feed_author (str | None): Optional filter for the feed author.
+        feed_name (str | None): Optional filter for the feed name.
+        title_keywords (str | None): Optional filter for title keywords.
+        limit (int): Maximum number of unique results to return.
+    Returns:
+        list[SearchResult]:
+            List of unique search results containing title, feed info, URL, chunk text, and score.
+    """
+    vectorstore: AsyncQdrantVectorStore = request.app.state.vectorstore
+    dense_vector = vectorstore.dense_vectors([query_text])[0]
+    sparse_vector = vectorstore.sparse_vectors([query_text])[0]
+    # Build filter conditions
+    conditions: list[FieldCondition] = []
+    if feed_author:
+        conditions.append(FieldCondition(key="feed_author", match=MatchValue(value=feed_author)))
+    if feed_name:
+        conditions.append(FieldCondition(key="feed_name", match=MatchValue(value=feed_name)))
+    if title_keywords:
+        conditions.append(
+            FieldCondition(key="title", match=MatchText(text=title_keywords.strip().lower()))
+        )
+    query_filter = Filter(must=conditions) if conditions else None  # type: ignore
+    fetch_limit = max(1, limit) * 280
+    logger.info(f"Fetching up to {fetch_limit} points for unique titles.")
+    response = await vectorstore.client.query_points(
+        collection_name=vectorstore.collection_name,
+        query=FusionQuery(fusion=Fusion.RRF),
+        prefetch=[
+            Prefetch(query=dense_vector, using="Dense", limit=fetch_limit, filter=query_filter),
+            Prefetch(query=sparse_vector, using="Sparse", limit=fetch_limit, filter=query_filter),
+        ],
+        query_filter=query_filter,
+        limit=fetch_limit,
+    )
+    # Deduplicate by title
+    seen_titles: set[str] = set()
+    results: list[SearchResult] = []
+    for point in response.points:
+        payload = point.payload or {}
+        title = payload.get("title")
+        if not title or title in seen_titles:
+            continue
+        seen_titles.add(title)
+        results.append(
+            SearchResult(
+                title=title,
+                feed_author=payload.get("feed_author"),
+                feed_name=payload.get("feed_name"),
+                article_author=payload.get("article_authors"),
+                url=payload.get("url"),
+                chunk_text=payload.get("chunk_text"),
+                score=point.score,
+            )
+        )
+        if len(results) >= limit:
+            break
+    logger.info(f"Returning {len(results)} unique title results for matching query '{query_text}'")
+    # logger.info(f"results: {results}")
+    return results

src/config.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import os
+from typing import ClassVar
+import yaml
+from pydantic import BaseModel, Field, SecretStr, model_validator
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from src.models.article_models import FeedItem
+# -----------------------------
+# Supabase database settings
+# -----------------------------
+class SupabaseDBSettings(BaseModel):
+    table_name: str = Field(default="substack_articles", description="Supabase table name")
+    host: str = Field(default="localhost", description="Database host")
+    name: str = Field(default="postgres", description="Database name")
+    user: str = Field(default="postgres", description="Database user")
+    password: SecretStr = Field(default=SecretStr("password"), description="Database password")
+    port: int = Field(default=6543, description="Database port")
+    test_database: str = Field(default="substack_test", description="Test database name")
+# -----------------------------
+# RSS settings
+# -----------------------------
+class RSSSettings(BaseModel):
+    feeds: list[FeedItem] = Field(
+        default_factory=list[FeedItem], description="List of RSS feed items"
+    )
+    default_start_date: str = Field(default="2025-09-15", description="Default cutoff date")
+    batch_size: int = Field(
+        default=5, description="Number of articles to parse and ingest in a batch"
+    )
+# -----------------------------
+# Qdrant settings
+# -----------------------------
+# BAAI/bge-large-en-v1.5 (1024), BAAI/bge-base-en-v1.5 (HF, 768). BAAI/bge-base-en (Fastembed, 768)
+class QdrantSettings(BaseModel):
+    url: str = Field(default="", description="Qdrant API URL")
+    api_key: str = Field(default="", description="Qdrant API key")
+    timeout: int = Field(default=30, description="Qdrant client timeout")
+    collection_name: str = Field(
+        default="substack_collection", description="Qdrant collection name"
+    )
+    dense_model_name: str = Field(default="BAAI/bge-base-en", description="Dense model name")
+    sparse_model_name: str = Field(
+        default="Qdrant/bm25", description="Sparse model name"
+    )  # prithivida/Splade_PP_en_v1 (larger)
+    vector_dim: int = Field(
+        default=768,
+        description="Vector dimension",  # 768, 1024 with Jina or large HF
+    )
+    article_batch_size: int = Field(
+        default=5, description="Number of articles to parse and ingest in a batch"
+    )
+    sparse_batch_size: int = Field(default=32, description="Sparse batch size")
+    embed_batch_size: int = Field(default=50, description="Dense embedding batch")
+    upsert_batch_size: int = Field(default=25, description="Batch size for Qdrant upsert")
+    max_concurrent: int = Field(default=2, description="Maximum number of concurrent tasks")
+# -----------------------------
+# Text splitting
+# -----------------------------
+class TextSplitterSettings(BaseModel):
+    chunk_size: int = Field(default=4000, description="Size of text chunks")
+    chunk_overlap: int = Field(default=200, description="Size of text chunks")
+    separators: list[str] = Field(
+        default_factory=lambda: [
+            "\n---\n",
+            "\n\n",
+            "\n```\n",
+            "\n## ",
+            "\n# ",
+            "\n**",
+            "\n",
+            ". ",
+            "! ",
+            "? ",
+            " ",
+            "",
+        ],
+        description="List of separators for text splitting. The order or separators matter",
+    )
+# -----------------------------
+# Jina Settings
+# -----------------------------
+class JinaSettings(BaseModel):
+    api_key: str = Field(default="", description="Jina API key")
+    url: str = Field(default="https://api.jina.ai/v1/embeddings", description="Jina API URL")
+    model: str = Field(default="jina-embeddings-v3", description="Jina model name")  # 1024
+# -----------------------------
+# Hugging Face Settings
+# -----------------------------
+# BAAI/bge-large-en-v1.5 (1024), BAAI/bge-base-en-v1.5 (768)
+class HuggingFaceSettings(BaseModel):
+    api_key: str = Field(default="", description="Hugging Face API key")
+    model: str = Field(default="BAAI/bge-base-en-v1.5", description="Hugging Face model name")
+# -----------------------------
+# Openai Settings
+# -----------------------------
+class OpenAISettings(BaseModel):
+    api_key: str | None = Field(default="", description="OpenAI API key")
+    # model: str = Field(default="gpt-4o-mini", description="OpenAI model name")
+# -----------------------------
+# OpenRouter Settings
+# -----------------------------
+class OpenRouterSettings(BaseModel):
+    api_key: str = Field(default="", description="OpenRouter API key")
+    api_url: str = Field(default="https://openrouter.ai/api/v1", description="OpenRouter API URL")
+# -----------------------------
+# Opik Observability Settings
+# -----------------------------
+class OpikObservabilitySettings(BaseModel):
+    api_key: str = Field(default="", description="Opik Observability API key")
+    project_name: str = Field(default="substack-pipeline", description="Opik project name")
+# -----------------------------
+# YAML loader
+# -----------------------------
+def load_yaml_feeds(path: str) -> list[FeedItem]:
+    """
+    Load RSS feed items from a YAML file.
+    If the file does not exist or is empty, returns an empty list.
+    Args:
+        path (str): Path to the YAML file.
+    Returns:
+        list[FeedItem]: List of FeedItem instances loaded from the file.
+    """
+    if not os.path.exists(path):
+        return []
+    with open(path, encoding="utf-8") as f:
+        data = yaml.safe_load(f)
+    feed_list = data.get("feeds", [])
+    return [FeedItem(**feed) for feed in feed_list]
+# -----------------------------
+# Main Settings
+# -----------------------------
+class Settings(BaseSettings):
+    supabase_db: SupabaseDBSettings = Field(default_factory=SupabaseDBSettings)
+    qdrant: QdrantSettings = Field(default_factory=QdrantSettings)
+    rss: RSSSettings = Field(default_factory=RSSSettings)
+    text_splitter: TextSplitterSettings = Field(default_factory=TextSplitterSettings)
+    jina: JinaSettings = Field(default_factory=JinaSettings)
+    hugging_face: HuggingFaceSettings = Field(default_factory=HuggingFaceSettings)
+    openai: OpenAISettings = Field(default_factory=OpenAISettings)
+    openrouter: OpenRouterSettings = Field(default_factory=OpenRouterSettings)
+    opik: OpikObservabilitySettings = Field(default_factory=OpikObservabilitySettings)
+    rss_config_yaml_path: str = "src/configs/feeds_rss.yaml"
+    # Pydantic v2 model config
+    model_config: ClassVar[SettingsConfigDict] = SettingsConfigDict(
+        env_file=[".env"],
+        env_file_encoding="utf-8",
+        extra="ignore",
+        env_nested_delimiter="__",
+        case_sensitive=False,
+        frozen=True,
+    )
+    @model_validator(mode="after")
+    def load_yaml_rss_feeds(self) -> "Settings":
+        """
+        Load RSS feeds from a YAML file after model initialization.
+        If the file does not exist or is empty, the feeds list remains unchanged.
+        Args:
+            self (Settings): The settings instance.
+        Returns:
+            Settings: The updated settings instance.
+        """
+        yaml_feeds = load_yaml_feeds(self.rss_config_yaml_path)
+        if yaml_feeds:
+            self.rss.feeds = yaml_feeds
+        return self
+# -----------------------------
+# Instantiate settings
+# -----------------------------
+settings = Settings()

src/configs/feeds_rss.yaml ADDED Viewed

	@@ -0,0 +1,91 @@

+feeds:
+- name: "AI Echoes"
+  author: "Benito Martin"
+  url: "https://aiechoes.substack.com/feed"
+- name: "The Neural Maze"
+  author: "Miguel Otero"
+  url: "https://theneuralmaze.substack.com/feed"
+- name: "Decoding ML"
+  author: "Paul Iusztin"
+  url: "https://decodingml.substack.com/feed"
+- name: "Swirl AI Newsletter"
+  author: "Aurimas Griciūnas"
+  url: "https://www.newsletter.swirlai.com/feed"
+- name: "Marvelous MLOps Substack"
+  author: "Başak Tuğçe Eskili and Maria Vechtomova"
+  url: "https://marvelousmlops.substack.com/feed"
+- name: "Jam with AI"
+  author: "Shirin Khosravi Jam and Shantanu Ladhwe"
+  url: "https://jamwithai.substack.com/feed"
+- name: "Hamel's Substack"
+  author: "Hamel Husain"
+  url: "https://hamelhusain.substack.com/feed"
+- name: "Neural Bits"
+  author: "Alex Razvant"
+  url: "https://multimodalai.substack.com/feed"
+- name: "DiamantAI"
+  author: "Nir Diamant"
+  url: "https://diamantai.substack.com/feed"
+- name: "ByteByteGo Newsletter"
+  author: "Alex Xu"
+  url: "https://blog.bytebytego.com/feed"
+- name: "Latent.Space"
+  author: "Latent.Space"
+  url: "https://www.latent.space/feed"
+- name: "Adaline Labs"
+  author: "Adaline"
+  url: "https://labs.adaline.ai/feed"
+- name: "Gradient Ascent"
+  author: "Sairam Sundaresan"
+  url: "https://newsletter.artofsaience.com/feed"
+- name: "Daily Dose of Data Science"
+  author: "Avi Chawla"
+  url: "https://blog.dailydoseofds.com/feed"
+- name: "Generative AI for Everyone"
+  author: "Hamza Farooq"
+  url: "https://boringbot.substack.com/feed"
+- name: "Vizuara's AI Newsletter"
+  author: "Vizuara AI Labs"
+  url: "https://www.vizuaranewsletter.com/feed"
+- name: "Deep (Learning) Focus"
+  author: "Cameron R. Wolfe, Ph.D."
+  url: "https://cameronrwolfe.substack.com/feed"
+- name: "Language Models & Co."
+  author: "Jay Alammar"
+  url: "https://newsletter.languagemodels.co/feed"
+- name: "Exploring Language Models"
+  author: "Maarten Grootendorst"
+  url: "https://newsletter.maartengrootendorst.com/feed"
+- name: "Hyperplane"
+  author: "Cube Digital"
+  url: "https://thehyperplane.substack.com/feed"
+- name: "ModelCraft"
+  author: "Abi Aryan"
+  url: "https://modelcraft.substack.com/feed"
+- name: "NeoSage"
+  author: "Shivani Virdi"
+  url: "https://blog.neosage.io/feed"
+- name: "Nnitiwe's AI Blog"
+  author: "Samuel Theophilus"
+  url: "https://blog.nnitiwe.io/feed"
+- name: "The Palindrome"
+  author: "Tivadar Danka"
+  url: "https://thepalindrome.org/feed"
+- name: "Python & Chill"
+  author: "Banias Baabe"
+  url: "https://pythonandchill.substack.com/feed"
+- name: "Rami's Data Newsletter"
+  author: "Rami Krispin"
+  url: "https://ramikrispin.substack.com/feed"
+- name: "To Data & Beyond"
+  author: "Youssef Hosni"
+  url: "https://youssefh.substack.com/feed"
+- name: "Vanishing Gradients"
+  author: "Hugo Bowne-Anderson"
+  url: "https://hugobowne.substack.com/feed"
+- name: "When Engineers meet AI"
+  author: "Kannan Kalidasan"
+  url: "https://engineersmeetai.substack.com/feed"
+- name: "slys.dev"
+  author: "Anna & Jakub Slys"
+  url: "https://iam.slys.dev/feed"

src/infrastructure/__init__.py ADDED Viewed

File without changes

src/infrastructure/qdrant/__init__.py ADDED Viewed

File without changes

src/infrastructure/qdrant/create_collection.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import asyncio
+from src.infrastructure.qdrant.qdrant_vectorstore import AsyncQdrantVectorStore
+from src.utils.logger_util import setup_logging
+logger = setup_logging()
+async def main() -> None:
+    """Create a Qdrant collection asynchronously using AsyncQdrantVectorStore.
+    This function initializes an AsyncQdrantVectorStore instance and calls its
+    create_collection method to set up a Qdrant collection for vector storage.
+    Errors during collection creation are logged
+    and handled gracefully.
+    Args:
+        None
+    Returns:
+        None
+    Raises:
+        RuntimeError: If an error occurs during Qdrant collection creation.
+        Exception: For unexpected errors during execution.
+    """
+    # Initialize the logger
+    logger.info("Creating Qdrant collection")
+    try:
+        # Initialize the AsyncQdrantVectorStore instance
+        vectorstore = AsyncQdrantVectorStore()
+        # Create the Qdrant collection asynchronously
+        await vectorstore.create_collection()
+        logger.info("Qdrant collection created successfully")
+    except RuntimeError as e:
+        logger.error(f"Failed to create Qdrant collection: {e}")
+        raise RuntimeError("Error creating Qdrant collection") from e
+    except Exception as e:
+        logger.error(f"Unexpected error during Qdrant collection creation: {e}")
+        raise
+if __name__ == "__main__":
+    asyncio.run(main())

src/infrastructure/qdrant/create_indexes.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import asyncio
+from src.infrastructure.qdrant.qdrant_vectorstore import AsyncQdrantVectorStore
+from src.utils.logger_util import setup_logging
+logger = setup_logging()
+async def main() -> None:
+    """Create necessary indexes for the Qdrant vector store.
+    Initializes an AsyncQdrantVectorStore and creates HNSW, title, article authors,
+    feed author, and feed name indexes. Logs errors and ensures proper execution.
+    Args:
+        None
+    Returns:
+        None
+    Raises:
+        RuntimeError: If an error occurs during index creation.
+        Exception: For unexpected errors during execution.
+    """
+    logger.info("Creating Qdrant indexes")
+    try:
+        vectorstore = AsyncQdrantVectorStore()
+        await vectorstore.enable_hnsw()
+        await vectorstore.create_title_index()
+        await vectorstore.create_article_authors_index()
+        await vectorstore.create_feed_author_index()
+        await vectorstore.create_article_feed_name_index()
+        logger.info("Qdrant indexes created successfully")
+    except RuntimeError as e:
+        logger.error(f"Failed to create Qdrant indexes: {e}")
+        raise RuntimeError("Error creating Qdrant indexes") from e
+    except Exception as e:
+        logger.error(f"Unexpected error creating Qdrant indexes: {e}")
+        raise
+if __name__ == "__main__":
+    asyncio.run(main())