Spaces:
Sleeping
Sleeping
IndraneelKumar
commited on
Commit
·
266d7bc
0
Parent(s):
Initial search engine commit
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .dockerignore +64 -0
- .env.example +58 -0
- .github/workflows/cd.yml +32 -0
- .github/workflows/ci.yml +78 -0
- .gitignore +219 -0
- .pre-commit-config.yaml +67 -0
- .prefectignore +41 -0
- .python-version +1 -0
- .vscode/settings.json +7 -0
- Dockerfile +65 -0
- Makefile +200 -0
- README.md +74 -0
- cloudbuild_fastapi.yaml +12 -0
- deploy_fastapi.sh +105 -0
- frontend/__init__.py +0 -0
- frontend/app.py +560 -0
- prefect-cloud.yaml +52 -0
- prefect-local.yaml +53 -0
- pyproject.toml +174 -0
- requirements.txt +23 -0
- src/__init__.py +0 -0
- src/api/__init__.py +0 -0
- src/api/exceptions/__init__.py +0 -0
- src/api/exceptions/exception_handlers.py +97 -0
- src/api/main.py +142 -0
- src/api/middleware/__init__.py +0 -0
- src/api/middleware/logging_middleware.py +73 -0
- src/api/models/__init__.py +0 -0
- src/api/models/api_models.py +85 -0
- src/api/models/provider_models.py +77 -0
- src/api/routes/__init__.py +0 -0
- src/api/routes/health_routes.py +52 -0
- src/api/routes/search_routes.py +123 -0
- src/api/services/__init__.py +0 -0
- src/api/services/generation_service.py +137 -0
- src/api/services/providers/__init__.py +0 -0
- src/api/services/providers/huggingface_service.py +64 -0
- src/api/services/providers/openai_service.py +181 -0
- src/api/services/providers/openrouter_service.py +254 -0
- src/api/services/providers/utils/__init__.py +0 -0
- src/api/services/providers/utils/evaluation_metrics.py +110 -0
- src/api/services/providers/utils/messages.py +18 -0
- src/api/services/providers/utils/prompts.py +77 -0
- src/api/services/search_service.py +188 -0
- src/config.py +202 -0
- src/configs/feeds_rss.yaml +91 -0
- src/infrastructure/__init__.py +0 -0
- src/infrastructure/qdrant/__init__.py +0 -0
- src/infrastructure/qdrant/create_collection.py +47 -0
- src/infrastructure/qdrant/create_indexes.py +44 -0
.dockerignore
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Git
|
| 2 |
+
.git
|
| 3 |
+
.gitignore
|
| 4 |
+
|
| 5 |
+
# Python
|
| 6 |
+
__pycache__/
|
| 7 |
+
*.py[cod]
|
| 8 |
+
*$py.class
|
| 9 |
+
*.so
|
| 10 |
+
.Python
|
| 11 |
+
.pytest_cache/
|
| 12 |
+
.coverage
|
| 13 |
+
htmlcov/
|
| 14 |
+
.tox/
|
| 15 |
+
.ruff_cache/
|
| 16 |
+
.mypy_cache/
|
| 17 |
+
|
| 18 |
+
# Virtual environments
|
| 19 |
+
venv/
|
| 20 |
+
.venv/
|
| 21 |
+
env/
|
| 22 |
+
ENV/
|
| 23 |
+
|
| 24 |
+
# IDE files
|
| 25 |
+
.idea/
|
| 26 |
+
.vscode/
|
| 27 |
+
*.swp
|
| 28 |
+
*.swo
|
| 29 |
+
|
| 30 |
+
# Build directories
|
| 31 |
+
dist/
|
| 32 |
+
build/
|
| 33 |
+
*.egg-info/
|
| 34 |
+
|
| 35 |
+
# Docker
|
| 36 |
+
.dockerignore
|
| 37 |
+
docker-compose*.yml
|
| 38 |
+
|
| 39 |
+
# Logs
|
| 40 |
+
logs/
|
| 41 |
+
*.log
|
| 42 |
+
|
| 43 |
+
# Temporary files
|
| 44 |
+
.tmp/
|
| 45 |
+
tmp/
|
| 46 |
+
|
| 47 |
+
# Documentation
|
| 48 |
+
docs/
|
| 49 |
+
# README.md
|
| 50 |
+
CHANGELOG.md
|
| 51 |
+
LICENSE
|
| 52 |
+
|
| 53 |
+
# Test data
|
| 54 |
+
images/
|
| 55 |
+
|
| 56 |
+
# Project directories
|
| 57 |
+
tests/
|
| 58 |
+
src/pipelines/
|
| 59 |
+
src/infrastructure/supabase/
|
| 60 |
+
# uv.lock
|
| 61 |
+
pre-commit-config.yaml
|
| 62 |
+
# pyproject.toml
|
| 63 |
+
.python-version
|
| 64 |
+
MEMORY.md
|
.env.example
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
SUPABASE_DB__TABLE_NAME=substack_articles
|
| 2 |
+
SUPABASE_DB__HOST=your_supabase_db_host_here
|
| 3 |
+
SUPABASE_DB__NAME=postgres
|
| 4 |
+
SUPABASE_DB__USER=your_supabase_db_user_here
|
| 5 |
+
SUPABASE_DB__PASSWORD=your_supabase_db_password_here
|
| 6 |
+
SUPABASE_DB__PORT=6543
|
| 7 |
+
|
| 8 |
+
# RSS
|
| 9 |
+
RSS__DEFAULT_START_DATE=2025-07-01
|
| 10 |
+
RSS__BATCH_SIZE=30
|
| 11 |
+
|
| 12 |
+
# Qdrant configurationbatch
|
| 13 |
+
QDRANT__API_KEY=your_qdrant_api_key_here
|
| 14 |
+
QDRANT__URL=your_qdrant_url_here
|
| 15 |
+
QDRANT__COLLECTION_NAME=substack_collection
|
| 16 |
+
QDRANT__DENSE_MODEL_NAME=BAAI/bge-base-en-v1.5 # BAAI/bge-large-en-v1.5 (1024), BAAI/bge-base-en-v1.5 (HF, 768). BAAI/bge-base-en (Fastembed, 768)
|
| 17 |
+
QDRANT__SPARSE_MODEL_NAME=Qdrant/bm25 # prithivida/Splade_PP_en_v1, Qdrant/bm25
|
| 18 |
+
QDRANT__VECTOR_DIM=768 # 768, 1024
|
| 19 |
+
QDRANT__ARTICLE_BATCH_SIZE=5
|
| 20 |
+
QDRANT__SPARSE_BATCH_SIZE=32
|
| 21 |
+
QDRANT__EMBED_BATCH_SIZE=50 # 50
|
| 22 |
+
QDRANT__UPSERT_BATCH_SIZE=100 # 50
|
| 23 |
+
QDRANT__MAX_CONCURRENT=3
|
| 24 |
+
|
| 25 |
+
# Text splitting
|
| 26 |
+
TS__CHUNK_SIZE=4000
|
| 27 |
+
TS__CHUNK_OVERLAP=200
|
| 28 |
+
|
| 29 |
+
# PREFECT
|
| 30 |
+
PREFECT__API_KEY=your_prefect_api_key_here
|
| 31 |
+
PREFECT__WORKSPACE=your_prefect_workspace_here
|
| 32 |
+
PREFECT__API_URL=your_prefect_api_url_here
|
| 33 |
+
|
| 34 |
+
# JINA
|
| 35 |
+
JINA__API_KEY=your_jina_api_key_here
|
| 36 |
+
JINA__URL=https://api.jina.ai/v1/embeddings
|
| 37 |
+
JINA__MODEL=jina-embeddings-v3
|
| 38 |
+
|
| 39 |
+
# HUGGING FACE
|
| 40 |
+
HUGGING_FACE__API_KEY=your_hugging_face_api_key_here
|
| 41 |
+
HUGGING_FACE__MODEL=BAAI/bge-base-en-v1.5
|
| 42 |
+
|
| 43 |
+
# OPENAI
|
| 44 |
+
OPENAI__API_KEY=your_openai_api_key_here
|
| 45 |
+
|
| 46 |
+
# OPENROUTER
|
| 47 |
+
OPENROUTER__API_KEY=your_openrouter_api_key_here
|
| 48 |
+
OPENROUTER__API_URL=https://openrouter.ai/api/v1
|
| 49 |
+
|
| 50 |
+
# OPIK OBSERVABILITY
|
| 51 |
+
OPIK__API_KEY=your_opik_api_key_here
|
| 52 |
+
OPIK__PROJECT_NAME=substack-pipeline
|
| 53 |
+
|
| 54 |
+
# FastAPI Endpoint
|
| 55 |
+
BACKEND_URL=your_fastapi_backend_url_here
|
| 56 |
+
|
| 57 |
+
# Default (8501)
|
| 58 |
+
ALLOWED_ORIGINS=your_allowed_origins_here_as_comma_separated_values
|
.github/workflows/cd.yml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: CD
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
workflow_dispatch:
|
| 5 |
+
push:
|
| 6 |
+
branches:
|
| 7 |
+
# - main
|
| 8 |
+
- develop
|
| 9 |
+
|
| 10 |
+
jobs:
|
| 11 |
+
deploy:
|
| 12 |
+
runs-on: ubuntu-latest
|
| 13 |
+
|
| 14 |
+
steps:
|
| 15 |
+
- name: Checkout code
|
| 16 |
+
uses: actions/checkout@v4
|
| 17 |
+
|
| 18 |
+
- name: Set up Python
|
| 19 |
+
uses: actions/setup-python@v5
|
| 20 |
+
with:
|
| 21 |
+
python-version-file: .python-version
|
| 22 |
+
|
| 23 |
+
- name: Run Prefect Deploy
|
| 24 |
+
uses: PrefectHQ/actions-prefect-deploy@v4
|
| 25 |
+
with:
|
| 26 |
+
all-deployments: "true" # deploy all deployments in prefect.yaml
|
| 27 |
+
requirements-file-paths: ./requirements.txt
|
| 28 |
+
deployment-file-path: ./prefect-cloud.yaml
|
| 29 |
+
env:
|
| 30 |
+
PREFECT_API_KEY: ${{ secrets.PREFECT__API_KEY }}
|
| 31 |
+
PREFECT_WORKSPACE: ${{ secrets.PREFECT__WORKSPACE }}
|
| 32 |
+
PREFECT_API_URL: ${{ secrets.PREFECT__API_URL }}
|
.github/workflows/ci.yml
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: CI
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
workflow_dispatch:
|
| 5 |
+
|
| 6 |
+
push:
|
| 7 |
+
|
| 8 |
+
branches:
|
| 9 |
+
# - main
|
| 10 |
+
- develop
|
| 11 |
+
|
| 12 |
+
jobs:
|
| 13 |
+
lint-and-test:
|
| 14 |
+
runs-on: ubuntu-latest
|
| 15 |
+
|
| 16 |
+
env:
|
| 17 |
+
# Supabase secrets
|
| 18 |
+
SUPABASE_DB__TABLE_NAME: ${{ secrets.SUPABASE_DB__TABLE_NAME }}
|
| 19 |
+
SUPABASE_DB__HOST: ${{ secrets.SUPABASE_DB__HOST }}
|
| 20 |
+
SUPABASE_DB__NAME: ${{ secrets.SUPABASE_DB__NAME }}
|
| 21 |
+
SUPABASE_DB__USER: ${{ secrets.SUPABASE_DB__USER }}
|
| 22 |
+
SUPABASE_DB__PASSWORD: ${{ secrets.SUPABASE_DB__PASSWORD }}
|
| 23 |
+
SUPABASE_DB__PORT: ${{ secrets.SUPABASE_DB__PORT }}
|
| 24 |
+
|
| 25 |
+
# Qdrant secrets
|
| 26 |
+
QDRANT__API_KEY: ${{ secrets.QDRANT__API_KEY }}
|
| 27 |
+
QDRANT__URL: ${{ secrets.QDRANT__URL }}
|
| 28 |
+
QDRANT__COLLECTION_NAME: ${{ secrets.QDRANT__COLLECTION_NAME }}
|
| 29 |
+
|
| 30 |
+
# OpenRouter secrets
|
| 31 |
+
OPENROUTER__API_KEY: ${{ secrets.OPENROUTER__API_KEY }}
|
| 32 |
+
OPENROUTER__API_URL: ${{ secrets.OPENROUTER__API_URL }}
|
| 33 |
+
|
| 34 |
+
# OPIK secrets
|
| 35 |
+
OPIK__API_KEY: ${{ secrets.OPIK__API_KEY }}
|
| 36 |
+
OPIK__PROJECT_NAME: ${{ secrets.OPIK__PROJECT_NAME }}
|
| 37 |
+
|
| 38 |
+
# FastAPI secrets
|
| 39 |
+
ALLOWED_ORIGINS: ${{ secrets.ALLOWED_ORIGINS }}
|
| 40 |
+
|
| 41 |
+
steps:
|
| 42 |
+
- name: Checkout code
|
| 43 |
+
uses: actions/checkout@v4
|
| 44 |
+
|
| 45 |
+
- name: Install uv
|
| 46 |
+
uses: astral-sh/setup-uv@v5
|
| 47 |
+
|
| 48 |
+
- name: Set up Python
|
| 49 |
+
uses: actions/setup-python@v5
|
| 50 |
+
with:
|
| 51 |
+
python-version-file: .python-version
|
| 52 |
+
|
| 53 |
+
- name: Install dependencies
|
| 54 |
+
run: uv sync --all-groups
|
| 55 |
+
|
| 56 |
+
- name: Run pre-commit hooks
|
| 57 |
+
run: |
|
| 58 |
+
source .venv/bin/activate
|
| 59 |
+
pre-commit install
|
| 60 |
+
pre-commit run --all-files
|
| 61 |
+
|
| 62 |
+
# 🔹 Debug step: check that DB env vars are set
|
| 63 |
+
- name: Check DB environment variables
|
| 64 |
+
run: |
|
| 65 |
+
for var in SUPABASE_DB__HOST SUPABASE_DB__NAME SUPABASE_DB__USER SUPABASE_DB__PORT SUPABASE_DB__TABLE_NAME \
|
| 66 |
+
QDRANT__API_KEY QDRANT__URL QDRANT__COLLECTION_NAME \
|
| 67 |
+
OPENROUTER__API_KEY OPENROUTER__API_URL \
|
| 68 |
+
ALLOWED_ORIGINS; do
|
| 69 |
+
if [ -z "${!var}" ]; then
|
| 70 |
+
echo "ERROR: $var is empty!"
|
| 71 |
+
exit 1
|
| 72 |
+
else
|
| 73 |
+
echo "$var is set"
|
| 74 |
+
fi
|
| 75 |
+
done
|
| 76 |
+
|
| 77 |
+
- name: Run tests
|
| 78 |
+
run: uv run pytest
|
.gitignore
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# UV
|
| 98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
+
# commonly ignored for libraries.
|
| 101 |
+
#uv.lock
|
| 102 |
+
|
| 103 |
+
# poetry
|
| 104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 106 |
+
# commonly ignored for libraries.
|
| 107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 108 |
+
#poetry.lock
|
| 109 |
+
|
| 110 |
+
# pdm
|
| 111 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 112 |
+
#pdm.lock
|
| 113 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 114 |
+
# in version control.
|
| 115 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
| 116 |
+
.pdm.toml
|
| 117 |
+
.pdm-python
|
| 118 |
+
.pdm-build/
|
| 119 |
+
|
| 120 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 121 |
+
__pypackages__/
|
| 122 |
+
|
| 123 |
+
# Celery stuff
|
| 124 |
+
celerybeat-schedule
|
| 125 |
+
celerybeat.pid
|
| 126 |
+
|
| 127 |
+
# SageMath parsed files
|
| 128 |
+
*.sage.py
|
| 129 |
+
|
| 130 |
+
# Environments
|
| 131 |
+
.env
|
| 132 |
+
.venv
|
| 133 |
+
env/
|
| 134 |
+
venv/
|
| 135 |
+
ENV/
|
| 136 |
+
env.bak/
|
| 137 |
+
venv.bak/
|
| 138 |
+
|
| 139 |
+
# Spyder project settings
|
| 140 |
+
.spyderproject
|
| 141 |
+
.spyproject
|
| 142 |
+
|
| 143 |
+
# Rope project settings
|
| 144 |
+
.ropeproject
|
| 145 |
+
|
| 146 |
+
# mkdocs documentation
|
| 147 |
+
/site
|
| 148 |
+
|
| 149 |
+
# mypy
|
| 150 |
+
.mypy_cache/
|
| 151 |
+
.dmypy.json
|
| 152 |
+
dmypy.json
|
| 153 |
+
|
| 154 |
+
# Pyre type checker
|
| 155 |
+
.pyre/
|
| 156 |
+
|
| 157 |
+
# pytype static type analyzer
|
| 158 |
+
.pytype/
|
| 159 |
+
|
| 160 |
+
# Cython debug symbols
|
| 161 |
+
cython_debug/
|
| 162 |
+
|
| 163 |
+
# PyCharm
|
| 164 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 165 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 166 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 167 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 168 |
+
#.idea/
|
| 169 |
+
|
| 170 |
+
# Abstra
|
| 171 |
+
# Abstra is an AI-powered process automation framework.
|
| 172 |
+
# Ignore directories containing user credentials, local state, and settings.
|
| 173 |
+
# Learn more at https://abstra.io/docs
|
| 174 |
+
.abstra/
|
| 175 |
+
|
| 176 |
+
# Visual Studio Code
|
| 177 |
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
| 178 |
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
| 179 |
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
| 180 |
+
# you could uncomment the following to ignore the enitre vscode folder
|
| 181 |
+
# .vscode/
|
| 182 |
+
|
| 183 |
+
# Ruff stuff:
|
| 184 |
+
.ruff_cache/
|
| 185 |
+
|
| 186 |
+
# PyPI configuration file
|
| 187 |
+
.pypirc
|
| 188 |
+
|
| 189 |
+
# Cursor
|
| 190 |
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
| 191 |
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
| 192 |
+
# refer to https://docs.cursor.com/context/ignore-files
|
| 193 |
+
.cursorignore
|
| 194 |
+
.cursorindexingignore
|
| 195 |
+
|
| 196 |
+
# FILES
|
| 197 |
+
MEMORY.md
|
| 198 |
+
DOCKER.md
|
| 199 |
+
INSTRUCTIONS.md
|
| 200 |
+
create_prefect_secrets.py
|
| 201 |
+
OLD_README.md
|
| 202 |
+
delete_deployment.sh
|
| 203 |
+
update_deploy_fastapi.sh
|
| 204 |
+
src/infrastructure/qdrant/query_scroll.py
|
| 205 |
+
src/infrastructure/qdrant/query_search.py
|
| 206 |
+
src/pipelines/flows/rss_ingestion_flow_old.py
|
| 207 |
+
src/pipelines/tasks/fetch_rss_old.py
|
| 208 |
+
src/pipelines/tasks/parse_articles_new.py
|
| 209 |
+
src/pipelines/tasks/batch_parse_ingest_articles.py
|
| 210 |
+
experiments/
|
| 211 |
+
src/configs/all_feeds.yaml
|
| 212 |
+
src/pipelines/flows/backfilling_archive_flow.py
|
| 213 |
+
src/pipelines/tasks/fetch_archive.py
|
| 214 |
+
src/pipelines/tasks/ingest_archive.py
|
| 215 |
+
src/pipelines/tasks/parse_archive.py
|
| 216 |
+
src/configs/feeds_archive.yaml
|
| 217 |
+
deploy_gradio.sh
|
| 218 |
+
frontend/Dockerfile
|
| 219 |
+
frontend/requirements.txt
|
.pre-commit-config.yaml
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
repos:
|
| 2 |
+
- repo: https://github.com/astral-sh/uv-pre-commit
|
| 3 |
+
# uv version.
|
| 4 |
+
rev: 0.8.17
|
| 5 |
+
hooks:
|
| 6 |
+
# Update the uv lockfile
|
| 7 |
+
- id: uv-lock
|
| 8 |
+
|
| 9 |
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
| 10 |
+
rev: v6.0.0
|
| 11 |
+
hooks:
|
| 12 |
+
- id: check-added-large-files
|
| 13 |
+
args: ['--maxkb=20000']
|
| 14 |
+
- id: check-toml
|
| 15 |
+
- id: check-yaml
|
| 16 |
+
args: [--allow-multiple-documents]
|
| 17 |
+
- id: end-of-file-fixer
|
| 18 |
+
- id: trailing-whitespace
|
| 19 |
+
- id: check-json
|
| 20 |
+
- id: detect-private-key
|
| 21 |
+
|
| 22 |
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
| 23 |
+
rev: v1.18.1
|
| 24 |
+
hooks:
|
| 25 |
+
- id: mypy
|
| 26 |
+
additional_dependencies:
|
| 27 |
+
- types-pyyaml>=6.0.12.20250822
|
| 28 |
+
- types-requests>=2.32.4.20250809
|
| 29 |
+
- types-python-dateutil>=2.9.0.20250822
|
| 30 |
+
- types-markdown>=3.9.0.20250906
|
| 31 |
+
args: ["--config-file=pyproject.toml"]
|
| 32 |
+
|
| 33 |
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
| 34 |
+
rev: v0.13.0
|
| 35 |
+
hooks:
|
| 36 |
+
- id: ruff-check
|
| 37 |
+
args:
|
| 38 |
+
[
|
| 39 |
+
--fix,
|
| 40 |
+
--exit-non-zero-on-fix,
|
| 41 |
+
--show-fixes
|
| 42 |
+
]
|
| 43 |
+
- id: ruff-format
|
| 44 |
+
|
| 45 |
+
- repo: https://github.com/hukkin/mdformat
|
| 46 |
+
rev: 0.7.22
|
| 47 |
+
hooks:
|
| 48 |
+
- id: mdformat
|
| 49 |
+
additional_dependencies:
|
| 50 |
+
- mdformat-gfm
|
| 51 |
+
exclude: ^team_data/
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
- repo: https://github.com/gitleaks/gitleaks
|
| 55 |
+
rev: v8.28.0
|
| 56 |
+
hooks:
|
| 57 |
+
- id: gitleaks
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# - repo: local
|
| 61 |
+
# hooks:
|
| 62 |
+
# - id: pytest
|
| 63 |
+
# name: pytest
|
| 64 |
+
# entry: pytest
|
| 65 |
+
# language: system
|
| 66 |
+
# types: [python]
|
| 67 |
+
# pass_filenames: false
|
.prefectignore
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# prefect artifacts
|
| 2 |
+
.prefectignore
|
| 3 |
+
|
| 4 |
+
# python artifacts
|
| 5 |
+
__pycache__/
|
| 6 |
+
*.py[cod]
|
| 7 |
+
*$py.class
|
| 8 |
+
*.egg-info/
|
| 9 |
+
*.egg
|
| 10 |
+
|
| 11 |
+
# Type checking artifacts
|
| 12 |
+
.mypy_cache/
|
| 13 |
+
.dmypy.json
|
| 14 |
+
dmypy.json
|
| 15 |
+
.pyre/
|
| 16 |
+
|
| 17 |
+
# IPython
|
| 18 |
+
profile_default/
|
| 19 |
+
ipython_config.py
|
| 20 |
+
*.ipynb_checkpoints/*
|
| 21 |
+
|
| 22 |
+
# Environments
|
| 23 |
+
.python-version
|
| 24 |
+
.env
|
| 25 |
+
.venv
|
| 26 |
+
env/
|
| 27 |
+
venv/
|
| 28 |
+
|
| 29 |
+
# MacOS
|
| 30 |
+
.DS_Store
|
| 31 |
+
|
| 32 |
+
# Dask
|
| 33 |
+
dask-worker-space/
|
| 34 |
+
|
| 35 |
+
# Editors
|
| 36 |
+
.idea/
|
| 37 |
+
.vscode/
|
| 38 |
+
|
| 39 |
+
# VCS
|
| 40 |
+
.git/
|
| 41 |
+
.hg/
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
.vscode/settings.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"terminal.integrated.defaultProfile.linux": "zsh",
|
| 3 |
+
"terminal.integrated.defaultProfile.windows": "",
|
| 4 |
+
"cSpell.words": [
|
| 5 |
+
"fastapi"
|
| 6 |
+
]
|
| 7 |
+
}
|
Dockerfile
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ---------- Build Stage ----------
|
| 2 |
+
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
|
| 3 |
+
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# System deps required for building some Python wheels (e.g., madoka)
|
| 7 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 8 |
+
build-essential g++ \
|
| 9 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
+
|
| 11 |
+
# Configure UV for optimal performance
|
| 12 |
+
ENV UV_COMPILE_BYTECODE=1
|
| 13 |
+
ENV UV_LINK_MODE=copy
|
| 14 |
+
ENV UV_PYTHON_DOWNLOADS=never
|
| 15 |
+
|
| 16 |
+
# Copy dependency files and sync dependencies
|
| 17 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 18 |
+
--mount=type=bind,source=uv.lock,target=uv.lock \
|
| 19 |
+
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
| 20 |
+
uv sync --locked --no-install-project --no-dev
|
| 21 |
+
|
| 22 |
+
# Copy source code selectively
|
| 23 |
+
COPY src/api ./src/api
|
| 24 |
+
COPY src/config.py ./src/config.py
|
| 25 |
+
COPY src/infrastructure/qdrant ./src/infrastructure/qdrant
|
| 26 |
+
COPY src/models ./src/models
|
| 27 |
+
COPY src/utils ./src/utils
|
| 28 |
+
|
| 29 |
+
# Also copy README.md, pyproject.toml and uv.lock for the final sync
|
| 30 |
+
COPY pyproject.toml uv.lock README.md ./
|
| 31 |
+
|
| 32 |
+
# Install project dependencies into virtualenv
|
| 33 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 34 |
+
uv sync --locked --no-dev
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# ---------- Runtime Stage ----------
|
| 38 |
+
FROM python:3.12-slim-bookworm
|
| 39 |
+
|
| 40 |
+
# Copy built application and virtualenv from builder
|
| 41 |
+
COPY --from=builder /app /app
|
| 42 |
+
|
| 43 |
+
# Install runtime tools used by HEALTHCHECK
|
| 44 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 45 |
+
curl \
|
| 46 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 47 |
+
|
| 48 |
+
# Set Python path and environment variables
|
| 49 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 50 |
+
ENV PYTHONPATH=/app
|
| 51 |
+
ENV HF_HOME=/tmp/huggingface
|
| 52 |
+
ENV FASTEMBED_CACHE=/tmp/fastembed_cache
|
| 53 |
+
ENV PORT=8080
|
| 54 |
+
|
| 55 |
+
# Create cache directories
|
| 56 |
+
RUN mkdir -p $HF_HOME $FASTEMBED_CACHE && chmod -R 755 $HF_HOME $FASTEMBED_CACHE
|
| 57 |
+
|
| 58 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
| 59 |
+
CMD curl -f http://localhost:$PORT/health || exit 1
|
| 60 |
+
|
| 61 |
+
# Expose Cloud Run port
|
| 62 |
+
EXPOSE $PORT
|
| 63 |
+
|
| 64 |
+
# Run FastAPI with uvicorn
|
| 65 |
+
CMD ["uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "8080", "--workers", "1", "--loop", "uvloop"]
|
Makefile
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Makefile
|
| 2 |
+
|
| 3 |
+
# Check if .env exists
|
| 4 |
+
ifeq (,$(wildcard .env))
|
| 5 |
+
$(error .env file is missing at .env. Please create one based on .env.example)
|
| 6 |
+
endif
|
| 7 |
+
|
| 8 |
+
# Load environment variables from .env
|
| 9 |
+
include .env
|
| 10 |
+
|
| 11 |
+
.PHONY: tests mypy clean help ruff-check ruff-check-fix ruff-format ruff-format-fix all-check all-fix
|
| 12 |
+
|
| 13 |
+
#################################################################################
|
| 14 |
+
## Supabase Commands
|
| 15 |
+
#################################################################################
|
| 16 |
+
|
| 17 |
+
supabase-create: ## Create Supabase database
|
| 18 |
+
@echo "Creating Supabase database..."
|
| 19 |
+
uv run python src/infrastructure/supabase/create_db.py
|
| 20 |
+
|
| 21 |
+
supabase-delete: ## Delete Supabase database
|
| 22 |
+
@echo "Deleting Supabase database..."
|
| 23 |
+
uv run python src/infrastructure/supabase/delete_db.py
|
| 24 |
+
|
| 25 |
+
#################################################################################
|
| 26 |
+
## Qdrant Commands
|
| 27 |
+
#################################################################################
|
| 28 |
+
|
| 29 |
+
qdrant-create-collection: ## Create Qdrant collection
|
| 30 |
+
@echo "Creating Qdrant collection..."
|
| 31 |
+
uv run python src/infrastructure/qdrant/create_collection.py
|
| 32 |
+
|
| 33 |
+
qdrant-delete-collection: ## Delete Qdrant collection
|
| 34 |
+
@echo "Deleting Qdrant collection..."
|
| 35 |
+
uv run python src/infrastructure/qdrant/delete_collection.py
|
| 36 |
+
|
| 37 |
+
qdrant-create-index: ## Create Qdrant index
|
| 38 |
+
@echo "Updating HNSW and creating Qdrant indexes..."
|
| 39 |
+
uv run python src/infrastructure/qdrant/create_indexes.py
|
| 40 |
+
|
| 41 |
+
qdrant-ingest-from-sql: ## Ingest data from SQL to Qdrant
|
| 42 |
+
@echo "Ingesting data from SQL to Qdrant..."
|
| 43 |
+
uv run python src/infrastructure/qdrant/ingest_from_sql.py
|
| 44 |
+
@echo "Data ingestion complete."
|
| 45 |
+
|
| 46 |
+
#################################################################################
|
| 47 |
+
## Prefect Flow Commands
|
| 48 |
+
#################################################################################
|
| 49 |
+
|
| 50 |
+
ingest-rss-articles-flow: ## Ingest RSS articles flow
|
| 51 |
+
@echo "Running ingest RSS articles flow..."
|
| 52 |
+
uv run python src/pipelines/flows/rss_ingestion_flow.py
|
| 53 |
+
@echo "Ingest RSS articles flow completed."
|
| 54 |
+
|
| 55 |
+
ingest-embeddings-flow: ## Ingest embeddings flow
|
| 56 |
+
@echo "Running ingest embeddings flow..."
|
| 57 |
+
$(if $(FROM_DATE), \
|
| 58 |
+
uv run python src/pipelines/flows/embeddings_ingestion_flow.py --from-date $(FROM_DATE), \
|
| 59 |
+
uv run python src/pipelines/flows/embeddings_ingestion_flow.py)
|
| 60 |
+
@echo "Ingest embeddings flow completed."
|
| 61 |
+
|
| 62 |
+
#################################################################################
|
| 63 |
+
## Prefect Deployment Commands
|
| 64 |
+
#################################################################################
|
| 65 |
+
deploy-cloud-flows: ## Deploy Prefect flows to Prefect Cloud
|
| 66 |
+
@echo "Deploying Prefect flows to Prefect Cloud..."
|
| 67 |
+
prefect deploy --prefect-file prefect-cloud.yaml
|
| 68 |
+
@echo "Prefect Cloud deployment complete."
|
| 69 |
+
|
| 70 |
+
deploy-local-flows: ## Deploy Prefect flows to Prefect Local Server
|
| 71 |
+
@echo "Deploying Prefect flows to Prefect Local Server..."
|
| 72 |
+
prefect deploy --prefect-file prefect-local.yaml
|
| 73 |
+
@echo "Prefect Local deployment complete."
|
| 74 |
+
|
| 75 |
+
#################################################################################
|
| 76 |
+
## Recreate Commands
|
| 77 |
+
#################################################################################
|
| 78 |
+
|
| 79 |
+
recreate-supabase: supabase-delete supabase-create ## Recreate Supabase resources
|
| 80 |
+
|
| 81 |
+
recreate-qdrant: qdrant-delete-collection qdrant-create-collection ## Recreate Qdrant resources
|
| 82 |
+
|
| 83 |
+
recreate-all: supabase-delete qdrant-delete-collection supabase-create qdrant-create-collection ## Recreate Qdrant and Supabase resources
|
| 84 |
+
|
| 85 |
+
#################################################################################
|
| 86 |
+
## FastAPI Commands
|
| 87 |
+
#################################################################################
|
| 88 |
+
|
| 89 |
+
run-api: ## Run FastAPI application
|
| 90 |
+
@echo "Starting FastAPI application..."
|
| 91 |
+
uv run src/api/main.py
|
| 92 |
+
@echo "FastAPI application stopped."
|
| 93 |
+
|
| 94 |
+
#################################################################################
|
| 95 |
+
## Gradio Commands
|
| 96 |
+
#################################################################################
|
| 97 |
+
|
| 98 |
+
run-gradio: ## Run Gradio application
|
| 99 |
+
@echo "Starting Gradio application..."
|
| 100 |
+
uv run frontend/app.py
|
| 101 |
+
@echo "Gradio application stopped."
|
| 102 |
+
|
| 103 |
+
#################################################################################
|
| 104 |
+
## Testing Commands
|
| 105 |
+
#################################################################################
|
| 106 |
+
|
| 107 |
+
unit-tests: ## Run all unit tests
|
| 108 |
+
@echo "Running all unit tests..."
|
| 109 |
+
uv run pytest tests/unit
|
| 110 |
+
@echo "All unit tests completed."
|
| 111 |
+
|
| 112 |
+
integration-tests: ## Run all integration tests
|
| 113 |
+
@echo "Running all integration tests..."
|
| 114 |
+
uv run pytest tests/integration
|
| 115 |
+
@echo "All integration tests completed."
|
| 116 |
+
|
| 117 |
+
all-tests: ## Run all tests
|
| 118 |
+
@echo "Running all tests..."
|
| 119 |
+
uv run pytest
|
| 120 |
+
@echo "All tests completed."
|
| 121 |
+
|
| 122 |
+
################################################################################
|
| 123 |
+
## Pre-commit Commands
|
| 124 |
+
################################################################################
|
| 125 |
+
|
| 126 |
+
pre-commit-run: ## Run pre-commit hooks
|
| 127 |
+
@echo "Running pre-commit hooks..."
|
| 128 |
+
pre-commit run --all-files
|
| 129 |
+
@echo "Pre-commit checks complete."
|
| 130 |
+
|
| 131 |
+
################################################################################
|
| 132 |
+
## Linting
|
| 133 |
+
################################################################################
|
| 134 |
+
|
| 135 |
+
# Linting (just checks)
|
| 136 |
+
ruff-check: ## Check code lint violations (--diff to show possible changes)
|
| 137 |
+
@echo "Checking Ruff formatting..."
|
| 138 |
+
uv run ruff check .
|
| 139 |
+
@echo "Ruff lint checks complete."
|
| 140 |
+
|
| 141 |
+
ruff-check-fix: ## Auto-format code using Ruff
|
| 142 |
+
@echo "Formatting code with Ruff..."
|
| 143 |
+
uv run ruff check . --fix --exit-non-zero-on-fix
|
| 144 |
+
@echo "Formatting complete."
|
| 145 |
+
|
| 146 |
+
################################################################################
|
| 147 |
+
## Formatting
|
| 148 |
+
################################################################################
|
| 149 |
+
|
| 150 |
+
# Formatting (just checks)
|
| 151 |
+
ruff-format: ## Check code format violations (--diff to show possible changes)
|
| 152 |
+
@echo "Checking Ruff formatting..."
|
| 153 |
+
uv run ruff format . --check
|
| 154 |
+
@echo "Ruff format checks complete."
|
| 155 |
+
|
| 156 |
+
ruff-format-fix: ## Auto-format code using Ruff
|
| 157 |
+
@echo "Formatting code with Ruff..."
|
| 158 |
+
uv run ruff format .
|
| 159 |
+
@echo "Formatting complete."
|
| 160 |
+
|
| 161 |
+
#################################################################################
|
| 162 |
+
## Static Type Checking
|
| 163 |
+
#################################################################################
|
| 164 |
+
|
| 165 |
+
mypy: ## Run MyPy static type checker
|
| 166 |
+
@echo "Running MyPy static type checker..."
|
| 167 |
+
uv run mypy
|
| 168 |
+
@echo "MyPy static type checker complete."
|
| 169 |
+
|
| 170 |
+
################################################################################
|
| 171 |
+
## Cleanup
|
| 172 |
+
################################################################################
|
| 173 |
+
|
| 174 |
+
clean: ## Clean up cached generated files
|
| 175 |
+
@echo "Cleaning up generated files..."
|
| 176 |
+
find . -type d -name "__pycache__" -exec rm -rf {} +
|
| 177 |
+
find . -type d -name ".pytest_cache" -exec rm -rf {} +
|
| 178 |
+
find . -type d -name ".ruff_cache" -exec rm -rf {} +
|
| 179 |
+
find . -type d -name ".mypy_cache" -exec rm -rf {} +
|
| 180 |
+
find . -type f -name "*.pyc" -delete
|
| 181 |
+
@echo "Cleanup complete."
|
| 182 |
+
|
| 183 |
+
################################################################################
|
| 184 |
+
## Composite Commands
|
| 185 |
+
################################################################################
|
| 186 |
+
|
| 187 |
+
all-check: ruff-format ruff-check clean ## Run all: linting, formatting and type checking
|
| 188 |
+
|
| 189 |
+
all-fix: ruff-format-fix ruff-check-fix mypy clean ## Run all fix: auto-formatting and linting fixes
|
| 190 |
+
|
| 191 |
+
################################################################################
|
| 192 |
+
## Help
|
| 193 |
+
################################################################################
|
| 194 |
+
|
| 195 |
+
help: ## Display this help message
|
| 196 |
+
@echo "Default target: $(.DEFAULT_GOAL)"
|
| 197 |
+
@echo "Available targets:"
|
| 198 |
+
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
|
| 199 |
+
|
| 200 |
+
.DEFAULT_GOAL := help
|
README.md
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Articles Search Engine
|
| 2 |
+
|
| 3 |
+
A compact, production-style RAG pipeline. It ingests Substack RSS articles, stores them in Postgres (Supabase), creates dense/sparse embeddings in Qdrant, and exposes search and answer endpoints via FastAPI with a simple Gradio UI.
|
| 4 |
+
|
| 5 |
+
## How it works (brief)
|
| 6 |
+
- Ingest RSS → Supabase:
|
| 7 |
+
- Prefect flow (`src/pipelines/flows/rss_ingestion_flow.py`) reads feeds from `src/configs/feeds_rss.yaml`, parses articles, and writes them to Postgres using SQLAlchemy models.
|
| 8 |
+
- Embed + index in Qdrant:
|
| 9 |
+
- Content is chunked, embedded (e.g., BAAI bge models), and upserted to a Qdrant collection with payload indexes for filtering and hybrid search.
|
| 10 |
+
- Collection and indexes are created via utilities in `src/infrastructure/qdrant/`.
|
| 11 |
+
- Search + generate:
|
| 12 |
+
- FastAPI (`src/api/main.py`) exposes search endpoints (keyword, semantic, hybrid) and assembles answers with citations.
|
| 13 |
+
- LLM providers are pluggable with fallback (OpenRouter, OpenAI, Hugging Face).
|
| 14 |
+
- UI + deploy:
|
| 15 |
+
- Gradio app for quick local search (`frontend/app.py`).
|
| 16 |
+
- Containerization with Docker and optional deploy to Google Cloud Run.
|
| 17 |
+
|
| 18 |
+
## Tech stack
|
| 19 |
+
- Python 3.12, FastAPI, Prefect, SQLAlchemy
|
| 20 |
+
- Supabase (Postgres) for articles
|
| 21 |
+
- Qdrant for vector search (dense + sparse/hybrid)
|
| 22 |
+
- OpenRouter / OpenAI / Hugging Face for LLM completion
|
| 23 |
+
- Gradio UI, Docker, Google Cloud Run
|
| 24 |
+
- Config via Pydantic Settings, `uv` or `pip` for deps
|
| 25 |
+
|
| 26 |
+
## Run locally (minimal)
|
| 27 |
+
1) Configure environment (either `.env` or shell). Key variables (Pydantic nested with `__`):
|
| 28 |
+
- Supabase: `SUPABASE_DB__HOST`, `SUPABASE_DB__PORT`, `SUPABASE_DB__NAME`, `SUPABASE_DB__USER`, `SUPABASE_DB__PASSWORD`
|
| 29 |
+
- Qdrant: `QDRANT__URL`, `QDRANT__API_KEY`
|
| 30 |
+
- LLM (choose one): `OPENROUTER__API_KEY` or `OPENAI__API_KEY` or `HUGGING_FACE__API_KEY`
|
| 31 |
+
- Optional CORS: `ALLOWED_ORIGINS`
|
| 32 |
+
|
| 33 |
+
2) Install dependencies:
|
| 34 |
+
```bash
|
| 35 |
+
# with uv
|
| 36 |
+
uv venv && source .venv/bin/activate
|
| 37 |
+
uv pip install -r requirements.txt
|
| 38 |
+
|
| 39 |
+
# or with pip
|
| 40 |
+
python -m venv .venv && source .venv/bin/activate
|
| 41 |
+
pip install -r requirements.txt
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
3) Initialize storage:
|
| 45 |
+
```bash
|
| 46 |
+
python src/infrastructure/supabase/create_db.py
|
| 47 |
+
python src/infrastructure/qdrant/create_collection.py
|
| 48 |
+
python src/infrastructure/qdrant/create_indexes.py
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
4) Ingest and embed:
|
| 52 |
+
```bash
|
| 53 |
+
python src/pipelines/flows/rss_ingestion_flow.py
|
| 54 |
+
python src/pipelines/flows/embeddings_ingestion_flow.py
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
5) Start services:
|
| 58 |
+
```bash
|
| 59 |
+
# REST API
|
| 60 |
+
uvicorn src.api.main:app --reload
|
| 61 |
+
|
| 62 |
+
# Gradio UI (optional)
|
| 63 |
+
python frontend/app.py
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
## Project structure (high-level)
|
| 67 |
+
- `src/api/` — FastAPI app, routes, middleware, exceptions
|
| 68 |
+
- `src/infrastructure/supabase/` — DB init and sessions
|
| 69 |
+
- `src/infrastructure/qdrant/` — Vector store and collection utilities
|
| 70 |
+
- `src/pipelines/` — Prefect flows and tasks for ingestion/embeddings
|
| 71 |
+
- `src/models/` — SQL and vector models
|
| 72 |
+
- `frontend/` — Gradio UI
|
| 73 |
+
- `configs/` — RSS feeds config
|
| 74 |
+
|
cloudbuild_fastapi.yaml
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
steps:
|
| 2 |
+
- name: 'gcr.io/cloud-builders/docker'
|
| 3 |
+
entrypoint: 'bash'
|
| 4 |
+
args:
|
| 5 |
+
- '-c'
|
| 6 |
+
- |
|
| 7 |
+
export DOCKER_BUILDKIT=1
|
| 8 |
+
docker build -t gcr.io/${PROJECT_ID}/${_SERVICE_NAME} -f Dockerfile .
|
| 9 |
+
substitutions:
|
| 10 |
+
_SERVICE_NAME: "substack-pipeline-fastapi"
|
| 11 |
+
images:
|
| 12 |
+
- "gcr.io/${PROJECT_ID}/${_SERVICE_NAME}"
|
deploy_fastapi.sh
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# -----------------------
|
| 3 |
+
# FastAPI Backend Deployment to Cloud Run
|
| 4 |
+
# -----------------------
|
| 5 |
+
|
| 6 |
+
# Exit immediately if a command exits with a non-zero status
|
| 7 |
+
set -e
|
| 8 |
+
|
| 9 |
+
#-----------------------
|
| 10 |
+
# Load environment variables
|
| 11 |
+
#-----------------------
|
| 12 |
+
|
| 13 |
+
if [ ! -f .env ]; then
|
| 14 |
+
echo "❌ .env file not found!"
|
| 15 |
+
exit 1
|
| 16 |
+
fi
|
| 17 |
+
|
| 18 |
+
# Load environment variables from .env file
|
| 19 |
+
set -o allexport
|
| 20 |
+
source .env
|
| 21 |
+
set +o allexport
|
| 22 |
+
|
| 23 |
+
echo "✅ Environment variables loaded."
|
| 24 |
+
|
| 25 |
+
# -----------------------
|
| 26 |
+
# Configuration
|
| 27 |
+
# -----------------------
|
| 28 |
+
PROJECT_ID="personal-projects-477710"
|
| 29 |
+
SERVICE_NAME="substack-pipeline-fastapi"
|
| 30 |
+
REGION="asia-south2" #europe-west1 "europe-west6"
|
| 31 |
+
IMAGE_NAME="gcr.io/$PROJECT_ID/$SERVICE_NAME"
|
| 32 |
+
|
| 33 |
+
# -----------------------
|
| 34 |
+
# Set project
|
| 35 |
+
# -----------------------
|
| 36 |
+
echo "🔧 Setting GCP project to $PROJECT_ID..."
|
| 37 |
+
gcloud config set project "$PROJECT_ID"
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# -----------------------
|
| 41 |
+
# Enable required APIs
|
| 42 |
+
# -----------------------
|
| 43 |
+
echo "🔧 Enabling required GCP services..."
|
| 44 |
+
gcloud services enable \
|
| 45 |
+
cloudbuild.googleapis.com \
|
| 46 |
+
run.googleapis.com \
|
| 47 |
+
containerregistry.googleapis.com
|
| 48 |
+
|
| 49 |
+
# -----------------------
|
| 50 |
+
# Build and push Docker image
|
| 51 |
+
# -----------------------
|
| 52 |
+
echo "🐳 Building and pushing Docker image..."
|
| 53 |
+
gcloud builds submit --config cloudbuild_fastapi.yaml \
|
| 54 |
+
--substitutions=_SERVICE_NAME=$SERVICE_NAME
|
| 55 |
+
|
| 56 |
+
# -----------------------
|
| 57 |
+
# Deploy to Cloud Run
|
| 58 |
+
# -----------------------
|
| 59 |
+
echo "🚀 Deploying $SERVICE_NAME to Cloud Run..."
|
| 60 |
+
gcloud run deploy "$SERVICE_NAME" \
|
| 61 |
+
--image "$IMAGE_NAME" \
|
| 62 |
+
--platform managed \
|
| 63 |
+
--region "$REGION" \
|
| 64 |
+
--allow-unauthenticated \
|
| 65 |
+
--memory 2.5Gi \
|
| 66 |
+
--cpu 1 \
|
| 67 |
+
--timeout 180 \
|
| 68 |
+
--concurrency 2 \
|
| 69 |
+
--min-instances 0 \
|
| 70 |
+
--max-instances 2 \
|
| 71 |
+
--execution-environment gen2 \
|
| 72 |
+
--cpu-boost \
|
| 73 |
+
--set-env-vars HF_HOME=/tmp/huggingface \
|
| 74 |
+
--set-env-vars HUGGING_FACE__API_KEY=$HUGGING_FACE__API_KEY \
|
| 75 |
+
--set-env-vars QDRANT__API_KEY=$QDRANT__API_KEY \
|
| 76 |
+
--set-env-vars QDRANT__URL=$QDRANT__URL \
|
| 77 |
+
--set-env-vars QDRANT__COLLECTION_NAME=$QDRANT__COLLECTION_NAME \
|
| 78 |
+
--set-env-vars QDRANT__DENSE_MODEL_NAME=$QDRANT__DENSE_MODEL_NAME \
|
| 79 |
+
--set-env-vars QDRANT__SPARSE_MODEL_NAME=$QDRANT__SPARSE_MODEL_NAME \
|
| 80 |
+
--set-env-vars OPENROUTER__API_KEY=$OPENROUTER__API_KEY \
|
| 81 |
+
--set-env-vars OPIK__API_KEY=$OPIK__API_KEY \
|
| 82 |
+
--set-env-vars OPIK__PROJECT_NAME=$OPIK__PROJECT_NAME \
|
| 83 |
+
--set-env-vars "^@^ALLOWED_ORIGINS=$ALLOWED_ORIGINS@" \
|
| 84 |
+
|
| 85 |
+
# Log the allowed origins
|
| 86 |
+
echo "✅ Allowed origins set to: $ALLOWED_ORIGINS"
|
| 87 |
+
|
| 88 |
+
# -----------------------
|
| 89 |
+
# Capture the deployed service URL and update BACKEND_URL
|
| 90 |
+
#-----------------------
|
| 91 |
+
SERVICE_URL=$(gcloud run services describe $SERVICE_NAME --region=$REGION --format='value(status.url)')
|
| 92 |
+
echo "Deployment complete!"
|
| 93 |
+
echo "Service URL: $SERVICE_URL"
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
# # -----------------------
|
| 98 |
+
# # Update BACKEND_URL dynamically
|
| 99 |
+
# # -----------------------
|
| 100 |
+
# echo "🔄 Updating BACKEND_URL to $SERVICE_URL..."
|
| 101 |
+
# gcloud run services update "$SERVICE_NAME" \
|
| 102 |
+
# --region "$REGION" \
|
| 103 |
+
# --update-env-vars BACKEND_URL="$SERVICE_URL"
|
| 104 |
+
|
| 105 |
+
# echo "✅ BACKEND_URL updated successfully."
|
frontend/__init__.py
ADDED
|
File without changes
|
frontend/app.py
ADDED
|
@@ -0,0 +1,560 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import markdown
|
| 5 |
+
import requests
|
| 6 |
+
import yaml
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
try:
|
| 10 |
+
from src.api.models.provider_models import MODEL_REGISTRY
|
| 11 |
+
except ImportError as e:
|
| 12 |
+
raise ImportError(
|
| 13 |
+
"Could not import MODEL_REGISTRY from src.api.models.provider_models. "
|
| 14 |
+
"Check the path and file existence."
|
| 15 |
+
) from e
|
| 16 |
+
|
| 17 |
+
# Initialize environment variables
|
| 18 |
+
load_dotenv()
|
| 19 |
+
|
| 20 |
+
BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:8080")
|
| 21 |
+
API_BASE_URL = f"{BACKEND_URL}/search"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# Load feeds from YAML
|
| 25 |
+
def load_feeds():
|
| 26 |
+
"""Load feeds from the YAML configuration file.
|
| 27 |
+
Returns:
|
| 28 |
+
list: List of feeds with their details.
|
| 29 |
+
"""
|
| 30 |
+
feeds_path = os.path.join(os.path.dirname(__file__), "../src/configs/feeds_rss.yaml")
|
| 31 |
+
with open(feeds_path) as f:
|
| 32 |
+
feeds_yaml = yaml.safe_load(f)
|
| 33 |
+
return feeds_yaml.get("feeds", [])
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
feeds = load_feeds()
|
| 37 |
+
feed_names = [f["name"] for f in feeds]
|
| 38 |
+
feed_authors = [f["author"] for f in feeds]
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
# -----------------------
|
| 42 |
+
# API helpers
|
| 43 |
+
# -----------------------
|
| 44 |
+
def fetch_unique_titles(payload):
|
| 45 |
+
"""
|
| 46 |
+
Fetch unique article titles based on the search criteria.
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
payload (dict): The search criteria including query_text, feed_author,
|
| 50 |
+
feed_name, limit, and optional title_keywords.
|
| 51 |
+
Returns:
|
| 52 |
+
list: A list of articles matching the criteria.
|
| 53 |
+
Raises:
|
| 54 |
+
Exception: If the API request fails.
|
| 55 |
+
"""
|
| 56 |
+
try:
|
| 57 |
+
resp = requests.post(f"{API_BASE_URL}/unique-titles", json=payload)
|
| 58 |
+
resp.raise_for_status()
|
| 59 |
+
return resp.json().get("results", [])
|
| 60 |
+
except Exception as e:
|
| 61 |
+
raise Exception(f"Failed to fetch titles: {str(e)}") from e
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def call_ai(payload, streaming=True):
|
| 65 |
+
""" "
|
| 66 |
+
Call the AI endpoint with the given payload.
|
| 67 |
+
Args:
|
| 68 |
+
payload (dict): The payload to send to the AI endpoint.
|
| 69 |
+
streaming (bool): Whether to use streaming or non-streaming endpoint.
|
| 70 |
+
Yields:
|
| 71 |
+
tuple: A tuple containing the type of response and the response text.
|
| 72 |
+
"""
|
| 73 |
+
endpoint = f"{API_BASE_URL}/ask/stream" if streaming else f"{API_BASE_URL}/ask"
|
| 74 |
+
answer_text = ""
|
| 75 |
+
try:
|
| 76 |
+
if streaming:
|
| 77 |
+
with requests.post(endpoint, json=payload, stream=True) as r:
|
| 78 |
+
r.raise_for_status()
|
| 79 |
+
for chunk in r.iter_content(chunk_size=None, decode_unicode=True):
|
| 80 |
+
if not chunk:
|
| 81 |
+
continue
|
| 82 |
+
if chunk.startswith("__model_used__:"):
|
| 83 |
+
yield "model", chunk.replace("__model_used__:", "").strip()
|
| 84 |
+
elif chunk.startswith("__error__"):
|
| 85 |
+
yield "error", "Request failed. Please try again later."
|
| 86 |
+
break
|
| 87 |
+
elif chunk.startswith("__truncated__"):
|
| 88 |
+
yield "truncated", "AI response truncated due to token limit."
|
| 89 |
+
else:
|
| 90 |
+
answer_text += chunk
|
| 91 |
+
yield "text", answer_text
|
| 92 |
+
else:
|
| 93 |
+
resp = requests.post(endpoint, json=payload)
|
| 94 |
+
resp.raise_for_status()
|
| 95 |
+
data = resp.json()
|
| 96 |
+
answer_text = data.get("answer", "")
|
| 97 |
+
yield "text", answer_text
|
| 98 |
+
if data.get("finish_reason") == "length":
|
| 99 |
+
yield "truncated", "AI response truncated due to token limit."
|
| 100 |
+
except Exception as e:
|
| 101 |
+
yield "error", f"Request failed: {str(e)}"
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def get_models_for_provider(provider):
|
| 105 |
+
"""
|
| 106 |
+
Get available models for a provider
|
| 107 |
+
|
| 108 |
+
Args:
|
| 109 |
+
provider (str): The name of the provider (e.g., "openrouter", "openai")
|
| 110 |
+
Returns:
|
| 111 |
+
list: List of model names available for the provider
|
| 112 |
+
"""
|
| 113 |
+
provider_key = provider.lower()
|
| 114 |
+
try:
|
| 115 |
+
config = MODEL_REGISTRY.get_config(provider_key)
|
| 116 |
+
return (
|
| 117 |
+
["Automatic Model Selection (Model Routing)"]
|
| 118 |
+
+ ([config.primary_model] if config.primary_model else [])
|
| 119 |
+
+ list(config.candidate_models)
|
| 120 |
+
)
|
| 121 |
+
except Exception:
|
| 122 |
+
return ["Automatic Model Selection (Model Routing)"]
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
# -----------------------
|
| 126 |
+
# Gradio interface functions
|
| 127 |
+
# -----------------------
|
| 128 |
+
def handle_search_articles(query_text, feed_name, feed_author, title_keywords, limit):
|
| 129 |
+
"""
|
| 130 |
+
Handle article search
|
| 131 |
+
|
| 132 |
+
Args:
|
| 133 |
+
query_text (str): The text to search for in article titles.
|
| 134 |
+
feed_name (str): The name of the feed to filter articles by.
|
| 135 |
+
feed_author (str): The author of the feed to filter articles by.
|
| 136 |
+
title_keywords (str): Keywords to search for in article titles.
|
| 137 |
+
limit (int): The maximum number of articles to return.
|
| 138 |
+
Returns:
|
| 139 |
+
str: HTML formatted string of search results or error message.
|
| 140 |
+
Raises:
|
| 141 |
+
Exception: If the API request fails.
|
| 142 |
+
"""
|
| 143 |
+
if not query_text.strip():
|
| 144 |
+
return "Please enter a query text."
|
| 145 |
+
|
| 146 |
+
payload = {
|
| 147 |
+
"query_text": query_text.strip().lower(),
|
| 148 |
+
"feed_author": feed_author.strip() if feed_author else "",
|
| 149 |
+
"feed_name": feed_name.strip() if feed_name else "",
|
| 150 |
+
"limit": limit,
|
| 151 |
+
"title_keywords": title_keywords.strip().lower() if title_keywords else None,
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
try:
|
| 155 |
+
results = fetch_unique_titles(payload)
|
| 156 |
+
if not results:
|
| 157 |
+
return "No results found."
|
| 158 |
+
|
| 159 |
+
html_output = ""
|
| 160 |
+
for item in results:
|
| 161 |
+
html_output += (
|
| 162 |
+
f"<div style='background-color:#F0F8FF; padding:20px; "
|
| 163 |
+
f"border-radius:10px; font-size:18px; margin-bottom:15px;'>\n"
|
| 164 |
+
f" <h2 style='font-size:22px; color:#1f4e79; margin-top:0;'>"
|
| 165 |
+
f"{item.get('title', 'No title')}</h2>\n"
|
| 166 |
+
f" <p style='margin:5px 0;'>"
|
| 167 |
+
f"<b>Newsletter:</b> {item.get('feed_name', 'N/A')}"
|
| 168 |
+
f"</p>\n"
|
| 169 |
+
f" <p style='margin:5px 0;'>"
|
| 170 |
+
f"<b>Author:</b> {item.get('feed_author', 'N/A')}"
|
| 171 |
+
f"</p>\n"
|
| 172 |
+
f" <p style='margin:5px 0;'><b>Article Authors:</b> "
|
| 173 |
+
f"{', '.join(item.get('article_author') or ['N/A'])}</p>\n"
|
| 174 |
+
f" <p style='margin:5px 0;'><b>URL:</b> "
|
| 175 |
+
f"<a href='{item.get('url', '#')}' target='_blank' style='color:#0066cc;'>"
|
| 176 |
+
f"{item.get('url', 'No URL')}</a></p>\n"
|
| 177 |
+
f"</div>\n"
|
| 178 |
+
)
|
| 179 |
+
return html_output
|
| 180 |
+
|
| 181 |
+
except Exception as e:
|
| 182 |
+
return f"<div style='color:red; padding:10px;'>Error: {str(e)}</div>"
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def handle_ai_question_streaming(
|
| 186 |
+
query_text,
|
| 187 |
+
feed_name,
|
| 188 |
+
feed_author,
|
| 189 |
+
limit,
|
| 190 |
+
provider,
|
| 191 |
+
model,
|
| 192 |
+
):
|
| 193 |
+
"""
|
| 194 |
+
Handle AI question with streaming
|
| 195 |
+
|
| 196 |
+
Args:
|
| 197 |
+
query_text (str): The question to ask the AI.
|
| 198 |
+
feed_name (str): The name of the feed to filter articles by.
|
| 199 |
+
feed_author (str): The author of the feed to filter articles by.
|
| 200 |
+
limit (int): The maximum number of articles to consider.
|
| 201 |
+
provider (str): The LLM provider to use.
|
| 202 |
+
model (str): The specific model to use from the provider.
|
| 203 |
+
Yields:
|
| 204 |
+
tuple: (HTML formatted answer string, model info string)
|
| 205 |
+
"""
|
| 206 |
+
if not query_text.strip():
|
| 207 |
+
yield "Please enter a query text.", ""
|
| 208 |
+
return
|
| 209 |
+
|
| 210 |
+
if not provider or not model:
|
| 211 |
+
yield "Please select provider and model.", ""
|
| 212 |
+
return
|
| 213 |
+
|
| 214 |
+
payload = {
|
| 215 |
+
"query_text": query_text.strip().lower(),
|
| 216 |
+
"feed_author": feed_author.strip() if feed_author else "",
|
| 217 |
+
"feed_name": feed_name.strip() if feed_name else "",
|
| 218 |
+
"limit": limit,
|
| 219 |
+
"provider": provider.lower(),
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
if model != "Automatic Model Selection (Model Routing)":
|
| 223 |
+
payload["model"] = model
|
| 224 |
+
|
| 225 |
+
try:
|
| 226 |
+
answer_html = ""
|
| 227 |
+
model_info = f"Provider: {provider}"
|
| 228 |
+
|
| 229 |
+
for _, (event_type, content) in enumerate(call_ai(payload, streaming=True)):
|
| 230 |
+
if event_type == "text":
|
| 231 |
+
# Convert markdown to HTML
|
| 232 |
+
html_content = markdown.markdown(content, extensions=["tables"])
|
| 233 |
+
answer_html = (
|
| 234 |
+
f"\n"
|
| 235 |
+
f"<div style='background-color:#E8F0FE; "
|
| 236 |
+
f"padding:15px; border-radius:10px; font-size:16px;'>\n"
|
| 237 |
+
f" {html_content}\n"
|
| 238 |
+
f"</div>\n"
|
| 239 |
+
)
|
| 240 |
+
yield answer_html, model_info
|
| 241 |
+
|
| 242 |
+
elif event_type == "model":
|
| 243 |
+
model_info = f"Provider: {provider} | Model: {content}"
|
| 244 |
+
yield answer_html, model_info
|
| 245 |
+
|
| 246 |
+
elif event_type == "truncated":
|
| 247 |
+
answer_html += (
|
| 248 |
+
f"<div style='color:#ff6600; padding:10px; font-weight:bold;'>⚠️ {content}</div>"
|
| 249 |
+
)
|
| 250 |
+
yield answer_html, model_info
|
| 251 |
+
|
| 252 |
+
elif event_type == "error":
|
| 253 |
+
error_html = (
|
| 254 |
+
f"<div style='color:red; padding:10px; font-weight:bold;'>❌ {content}</div>"
|
| 255 |
+
)
|
| 256 |
+
yield error_html, model_info
|
| 257 |
+
break
|
| 258 |
+
|
| 259 |
+
except Exception as e:
|
| 260 |
+
error_html = f"<div style='color:red; padding:10px;'>Error: {str(e)}</div>"
|
| 261 |
+
yield error_html, model_info
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
def handle_ai_question_non_streaming(query_text, feed_name, feed_author, limit, provider, model):
|
| 265 |
+
"""
|
| 266 |
+
Handle AI question without streaming
|
| 267 |
+
|
| 268 |
+
Args:
|
| 269 |
+
query_text (str): The question to ask the AI.
|
| 270 |
+
feed_name (str): The name of the feed to filter articles by.
|
| 271 |
+
feed_author (str): The author of the feed to filter articles by.
|
| 272 |
+
limit (int): The maximum number of articles to consider.
|
| 273 |
+
provider (str): The LLM provider to use.
|
| 274 |
+
model (str): The specific model to use from the provider.
|
| 275 |
+
|
| 276 |
+
Returns:
|
| 277 |
+
tuple: (HTML formatted answer string, model info string)
|
| 278 |
+
"""
|
| 279 |
+
if not query_text.strip():
|
| 280 |
+
return "Please enter a query text.", ""
|
| 281 |
+
|
| 282 |
+
if not provider or not model:
|
| 283 |
+
return "Please select provider and model.", ""
|
| 284 |
+
|
| 285 |
+
payload = {
|
| 286 |
+
"query_text": query_text.strip().lower(),
|
| 287 |
+
"feed_author": feed_author.strip() if feed_author else "",
|
| 288 |
+
"feed_name": feed_name.strip() if feed_name else "",
|
| 289 |
+
"limit": limit,
|
| 290 |
+
"provider": provider.lower(),
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
if model != "Automatic Model Selection (Model Routing)":
|
| 294 |
+
payload["model"] = model
|
| 295 |
+
|
| 296 |
+
try:
|
| 297 |
+
answer_html = ""
|
| 298 |
+
model_info = f"Provider: {provider}"
|
| 299 |
+
|
| 300 |
+
for event_type, content in call_ai(payload, streaming=False):
|
| 301 |
+
if event_type == "text":
|
| 302 |
+
html_content = markdown.markdown(content, extensions=["tables"])
|
| 303 |
+
answer_html = (
|
| 304 |
+
"<div style='background-color:#E8F0FE; "
|
| 305 |
+
"padding:15px; border-radius:10px; font-size:16px;'>\n"
|
| 306 |
+
f"{html_content}\n"
|
| 307 |
+
"</div>\n"
|
| 308 |
+
)
|
| 309 |
+
elif event_type == "model":
|
| 310 |
+
model_info = f"Provider: {provider} | Model: {content}"
|
| 311 |
+
elif event_type == "truncated":
|
| 312 |
+
answer_html += (
|
| 313 |
+
f"<div style='color:#ff6600; padding:10px; font-weight:bold;'>⚠️ {content}</div>"
|
| 314 |
+
)
|
| 315 |
+
elif event_type == "error":
|
| 316 |
+
return (
|
| 317 |
+
f"<div style='color:red; padding:10px; font-weight:bold;'>❌ {content}</div>",
|
| 318 |
+
model_info,
|
| 319 |
+
)
|
| 320 |
+
|
| 321 |
+
return answer_html, model_info
|
| 322 |
+
|
| 323 |
+
except Exception as e:
|
| 324 |
+
return (
|
| 325 |
+
f"<div style='color:red; padding:10px;'>Error: {str(e)}</div>",
|
| 326 |
+
f"Provider: {provider}",
|
| 327 |
+
)
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
def update_model_choices(provider):
|
| 331 |
+
"""
|
| 332 |
+
Update model choices based on selected provider
|
| 333 |
+
Args:
|
| 334 |
+
provider (str): The selected LLM provider
|
| 335 |
+
Returns:
|
| 336 |
+
gr.Dropdown: Updated model dropdown component
|
| 337 |
+
"""
|
| 338 |
+
models = get_models_for_provider(provider)
|
| 339 |
+
return gr.Dropdown(choices=models, value=models[0] if models else "")
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
# -----------------------
|
| 343 |
+
# Gradio UI
|
| 344 |
+
# -----------------------
|
| 345 |
+
with gr.Blocks(title="Substack Articles LLM Engine", theme=gr.themes.Soft()) as demo:
|
| 346 |
+
# Header
|
| 347 |
+
gr.HTML(
|
| 348 |
+
"<div style='background-color:#ff6719; padding:20px; border-radius:12px; "
|
| 349 |
+
"text-align:center; margin-bottom:20px;'>\n"
|
| 350 |
+
" <h1 style='color:white; font-size:42px; font-family:serif; margin:0;'>\n"
|
| 351 |
+
" 📰 Substack Articles LLM Engine\n"
|
| 352 |
+
" </h1>\n"
|
| 353 |
+
"</div>\n"
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
with gr.Row():
|
| 357 |
+
with gr.Column(scale=1):
|
| 358 |
+
# Search Mode Selection
|
| 359 |
+
gr.Markdown("## 🔍 Select Search Mode")
|
| 360 |
+
search_type = gr.Radio(
|
| 361 |
+
choices=["Search Articles", "Ask the AI"],
|
| 362 |
+
value="Search Articles",
|
| 363 |
+
label="Search Mode",
|
| 364 |
+
info="Choose between searching for articles or asking AI questions",
|
| 365 |
+
)
|
| 366 |
+
|
| 367 |
+
# Common filters
|
| 368 |
+
gr.Markdown("### Filters")
|
| 369 |
+
query_text = gr.Textbox(label="Query", placeholder="Type your query here...", lines=3)
|
| 370 |
+
feed_author = gr.Dropdown(
|
| 371 |
+
choices=[""] + feed_authors, label="Author (optional)", value=""
|
| 372 |
+
)
|
| 373 |
+
feed_name = gr.Dropdown(
|
| 374 |
+
choices=[""] + feed_names, label="Newsletter (optional)", value=""
|
| 375 |
+
)
|
| 376 |
+
|
| 377 |
+
# Conditional fields based on search type
|
| 378 |
+
title_keywords = gr.Textbox(
|
| 379 |
+
label="Title Keywords (optional)",
|
| 380 |
+
placeholder="Filter by words in the title",
|
| 381 |
+
visible=True,
|
| 382 |
+
)
|
| 383 |
+
|
| 384 |
+
limit = gr.Slider(
|
| 385 |
+
minimum=1, maximum=20, step=1, label="Number of results", value=5, visible=True
|
| 386 |
+
)
|
| 387 |
+
|
| 388 |
+
# LLM Options (only visible for AI mode)
|
| 389 |
+
with gr.Group(visible=False) as llm_options:
|
| 390 |
+
gr.Markdown("### ⚙️ LLM Options")
|
| 391 |
+
provider = gr.Dropdown(
|
| 392 |
+
choices=["OpenRouter", "HuggingFace", "OpenAI"],
|
| 393 |
+
label="Select LLM Provider",
|
| 394 |
+
value="OpenRouter",
|
| 395 |
+
)
|
| 396 |
+
model = gr.Dropdown(
|
| 397 |
+
choices=get_models_for_provider("OpenRouter"),
|
| 398 |
+
label="Select Model",
|
| 399 |
+
value="Automatic Model Selection (Model Routing)",
|
| 400 |
+
)
|
| 401 |
+
streaming_mode = gr.Radio(
|
| 402 |
+
choices=["Streaming", "Non-Streaming"],
|
| 403 |
+
value="Streaming",
|
| 404 |
+
label="Answer Mode",
|
| 405 |
+
info="Streaming shows results as they're generated",
|
| 406 |
+
)
|
| 407 |
+
|
| 408 |
+
# Submit button
|
| 409 |
+
submit_btn = gr.Button("🔎 Search / Ask AI", variant="primary", size="lg")
|
| 410 |
+
|
| 411 |
+
with gr.Column(scale=2):
|
| 412 |
+
# Output area
|
| 413 |
+
output_html = gr.HTML(label="Results")
|
| 414 |
+
model_info = gr.HTML(visible=False)
|
| 415 |
+
|
| 416 |
+
# Event handlers
|
| 417 |
+
def toggle_visibility(search_type):
|
| 418 |
+
"""
|
| 419 |
+
Toggle visibility of components based on search type
|
| 420 |
+
|
| 421 |
+
Args:
|
| 422 |
+
search_type (str): The selected search type
|
| 423 |
+
Returns:
|
| 424 |
+
tuple: Visibility states for (llm_options, title_keywords, model_info)
|
| 425 |
+
"""
|
| 426 |
+
|
| 427 |
+
show_title_keywords = search_type == "Search Articles"
|
| 428 |
+
show_llm_options = search_type == "Ask the AI"
|
| 429 |
+
show_model_info = search_type == "Ask the AI"
|
| 430 |
+
show_limit_slider = search_type == "Search Articles"
|
| 431 |
+
|
| 432 |
+
return (
|
| 433 |
+
gr.Group(visible=show_llm_options), # llm_options
|
| 434 |
+
gr.Textbox(visible=show_title_keywords), # title_keywords
|
| 435 |
+
gr.HTML(visible=show_model_info), # model_info
|
| 436 |
+
gr.Slider(visible=show_limit_slider), # limit
|
| 437 |
+
)
|
| 438 |
+
|
| 439 |
+
search_type.change(
|
| 440 |
+
fn=toggle_visibility,
|
| 441 |
+
inputs=[search_type],
|
| 442 |
+
outputs=[llm_options, title_keywords, model_info, limit],
|
| 443 |
+
)
|
| 444 |
+
|
| 445 |
+
# Update model dropdown when provider changes
|
| 446 |
+
provider.change(fn=update_model_choices, inputs=[provider], outputs=[model])
|
| 447 |
+
|
| 448 |
+
# Unified submission handler
|
| 449 |
+
def handle_submission(
|
| 450 |
+
search_type,
|
| 451 |
+
streaming_mode,
|
| 452 |
+
query_text,
|
| 453 |
+
feed_name,
|
| 454 |
+
feed_author,
|
| 455 |
+
title_keywords,
|
| 456 |
+
limit,
|
| 457 |
+
provider,
|
| 458 |
+
model,
|
| 459 |
+
):
|
| 460 |
+
"""
|
| 461 |
+
Handle submission based on search type and streaming mode
|
| 462 |
+
Args:
|
| 463 |
+
search_type (str): The selected search type
|
| 464 |
+
streaming_mode (str): The selected streaming mode
|
| 465 |
+
query_text (str): The query text
|
| 466 |
+
feed_name (str): The selected feed name
|
| 467 |
+
feed_author (str): The selected feed author
|
| 468 |
+
title_keywords (str): The title keywords (if applicable)
|
| 469 |
+
limit (int): The number of results to return
|
| 470 |
+
provider (str): The selected LLM provider (if applicable)
|
| 471 |
+
model (str): The selected model (if applicable)
|
| 472 |
+
Returns:
|
| 473 |
+
tuple: (HTML formatted answer string, model info string)
|
| 474 |
+
"""
|
| 475 |
+
if search_type == "Search Articles":
|
| 476 |
+
result = handle_search_articles(
|
| 477 |
+
query_text, feed_name, feed_author, title_keywords, limit
|
| 478 |
+
)
|
| 479 |
+
return result, "" # Always return two values
|
| 480 |
+
else: # Ask the AI
|
| 481 |
+
if streaming_mode == "Non-Streaming":
|
| 482 |
+
return handle_ai_question_non_streaming(
|
| 483 |
+
query_text, feed_name, feed_author, limit, provider, model
|
| 484 |
+
)
|
| 485 |
+
else:
|
| 486 |
+
# For streaming, we'll use a separate handler
|
| 487 |
+
return "", ""
|
| 488 |
+
|
| 489 |
+
# Streaming handler
|
| 490 |
+
def handle_streaming_submission(
|
| 491 |
+
search_type,
|
| 492 |
+
streaming_mode,
|
| 493 |
+
query_text,
|
| 494 |
+
feed_name,
|
| 495 |
+
feed_author,
|
| 496 |
+
title_keywords,
|
| 497 |
+
limit,
|
| 498 |
+
provider,
|
| 499 |
+
model,
|
| 500 |
+
):
|
| 501 |
+
"""
|
| 502 |
+
Handle submission with streaming support
|
| 503 |
+
Args:
|
| 504 |
+
search_type (str): The selected search type
|
| 505 |
+
streaming_mode (str): The selected streaming mode
|
| 506 |
+
query_text (str): The query text
|
| 507 |
+
feed_name (str): The selected feed name
|
| 508 |
+
feed_author (str): The selected feed author
|
| 509 |
+
title_keywords (str): The title keywords (if applicable)
|
| 510 |
+
limit (int): The number of results to return
|
| 511 |
+
provider (str): The selected LLM provider (if applicable)
|
| 512 |
+
model (str): The selected model (if applicable)
|
| 513 |
+
Yields:
|
| 514 |
+
tuple: (HTML formatted answer string, model info string)
|
| 515 |
+
"""
|
| 516 |
+
if search_type == "Ask the AI" and streaming_mode == "Streaming":
|
| 517 |
+
yield from handle_ai_question_streaming(
|
| 518 |
+
query_text, feed_name, feed_author, limit, provider, model
|
| 519 |
+
)
|
| 520 |
+
else:
|
| 521 |
+
# For non-streaming cases, just return the regular result
|
| 522 |
+
if search_type == "Search Articles":
|
| 523 |
+
result = handle_search_articles(
|
| 524 |
+
query_text, feed_name, feed_author, title_keywords, limit
|
| 525 |
+
)
|
| 526 |
+
yield result, ""
|
| 527 |
+
else:
|
| 528 |
+
result_html, model_info_text = handle_ai_question_non_streaming(
|
| 529 |
+
query_text, feed_name, feed_author, limit, provider, model
|
| 530 |
+
)
|
| 531 |
+
yield result_html, model_info_text
|
| 532 |
+
|
| 533 |
+
# Single click handler that routes based on mode
|
| 534 |
+
submit_btn.click(
|
| 535 |
+
fn=handle_streaming_submission,
|
| 536 |
+
inputs=[
|
| 537 |
+
search_type,
|
| 538 |
+
streaming_mode,
|
| 539 |
+
query_text,
|
| 540 |
+
feed_name,
|
| 541 |
+
feed_author,
|
| 542 |
+
title_keywords,
|
| 543 |
+
limit,
|
| 544 |
+
provider,
|
| 545 |
+
model,
|
| 546 |
+
],
|
| 547 |
+
outputs=[output_html, model_info],
|
| 548 |
+
show_progress=True,
|
| 549 |
+
)
|
| 550 |
+
|
| 551 |
+
# For local testing
|
| 552 |
+
if __name__ == "__main__":
|
| 553 |
+
demo.launch()
|
| 554 |
+
|
| 555 |
+
# # For Google Cloud Run deployment
|
| 556 |
+
# if __name__ == "__main__":
|
| 557 |
+
# demo.launch(
|
| 558 |
+
# server_name="0.0.0.0",
|
| 559 |
+
# server_port=int(os.environ.get("PORT", 8080))
|
| 560 |
+
# )
|
prefect-cloud.yaml
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pull:
|
| 2 |
+
- prefect.deployments.steps.git_clone:
|
| 3 |
+
id: clone-step
|
| 4 |
+
repository: https://github.com/Indraneel99/substack-newsletters-search-course
|
| 5 |
+
credentials: "{{ prefect.blocks.github-credentials.my-gh-creds }}"
|
| 6 |
+
|
| 7 |
+
- prefect.deployments.steps.run_shell_script:
|
| 8 |
+
id: install-build-tools
|
| 9 |
+
script: |
|
| 10 |
+
apt-get update -y
|
| 11 |
+
apt-get install -y --no-install-recommends build-essential g++
|
| 12 |
+
|
| 13 |
+
- prefect.deployments.steps.pip_install_requirements:
|
| 14 |
+
directory: "{{ clone-step.directory }}"
|
| 15 |
+
requirements_file: requirements.txt
|
| 16 |
+
stream_output: true
|
| 17 |
+
|
| 18 |
+
deployments:
|
| 19 |
+
- name: rss-ingest
|
| 20 |
+
entrypoint: src/pipelines/flows/rss_ingestion_flow.py:rss_ingest_flow
|
| 21 |
+
work_pool:
|
| 22 |
+
name: default-work-pool
|
| 23 |
+
job_variables:
|
| 24 |
+
env:
|
| 25 |
+
SUPABASE_DB__TABLE_NAME: "{{ prefect.blocks.secret.supabase-db--table-name }}"
|
| 26 |
+
SUPABASE_DB__HOST: "{{ prefect.blocks.secret.supabase-db--host }}"
|
| 27 |
+
SUPABASE_DB__NAME: "{{ prefect.blocks.secret.supabase-db--name }}"
|
| 28 |
+
SUPABASE_DB__USER: "{{ prefect.blocks.secret.supabase-db--user }}"
|
| 29 |
+
SUPABASE_DB__PASSWORD: "{{ prefect.blocks.secret.supabase-db--password }}"
|
| 30 |
+
SUPABASE_DB__PORT: "{{ prefect.blocks.secret.supabase-db--port }}"
|
| 31 |
+
|
| 32 |
+
schedule:
|
| 33 |
+
cron: "0 0 * * 7"
|
| 34 |
+
|
| 35 |
+
- name: qdrant-embeddings
|
| 36 |
+
entrypoint: src/pipelines/flows/embeddings_ingestion_flow.py:qdrant_ingest_flow
|
| 37 |
+
work_pool:
|
| 38 |
+
name: default-work-pool
|
| 39 |
+
job_variables:
|
| 40 |
+
env:
|
| 41 |
+
SUPABASE_DB__TABLE_NAME: "{{ prefect.blocks.secret.supabase-db--table-name }}"
|
| 42 |
+
SUPABASE_DB__HOST: "{{ prefect.blocks.secret.supabase-db--host }}"
|
| 43 |
+
SUPABASE_DB__NAME: "{{ prefect.blocks.secret.supabase-db--name }}"
|
| 44 |
+
SUPABASE_DB__USER: "{{ prefect.blocks.secret.supabase-db--user }}"
|
| 45 |
+
SUPABASE_DB__PASSWORD: "{{ prefect.blocks.secret.supabase-db--password }}"
|
| 46 |
+
SUPABASE_DB__PORT: "{{ prefect.blocks.secret.supabase-db--port }}"
|
| 47 |
+
QDRANT__API_KEY: "{{ prefect.blocks.secret.qdrant--api-key }}"
|
| 48 |
+
QDRANT__URL: "{{ prefect.blocks.secret.qdrant--url }}"
|
| 49 |
+
QDRANT__COLLECTION_NAME: "{{ prefect.blocks.secret.qdrant--collection-name }}"
|
| 50 |
+
|
| 51 |
+
schedule:
|
| 52 |
+
cron: "0 0 * * 7"
|
prefect-local.yaml
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pull:
|
| 2 |
+
- prefect.deployments.steps.git_clone:
|
| 3 |
+
id: clone-step
|
| 4 |
+
repository: https://github.com/Indraneel99/substack-newsletters-search-course
|
| 5 |
+
credentials: "{{ prefect.blocks.github-credentials.my-gh-creds }}"
|
| 6 |
+
|
| 7 |
+
# This function ensures pip is installed in the environment (Only needed for Prefect Server)
|
| 8 |
+
- prefect.deployments.steps.run_shell_script:
|
| 9 |
+
id: install-pip
|
| 10 |
+
directory: "{{ clone-step.directory }}"
|
| 11 |
+
script: |
|
| 12 |
+
python -m ensurepip --upgrade
|
| 13 |
+
|
| 14 |
+
- prefect.deployments.steps.pip_install_requirements:
|
| 15 |
+
directory: "{{ clone-step.directory }}"
|
| 16 |
+
requirements_file: requirements.txt
|
| 17 |
+
stream_output: true
|
| 18 |
+
|
| 19 |
+
deployments:
|
| 20 |
+
- name: rss-ingest
|
| 21 |
+
entrypoint: src/pipelines/flows/rss_ingestion_flow.py:rss_ingest_flow
|
| 22 |
+
work_pool:
|
| 23 |
+
name: default-work-pool
|
| 24 |
+
job_variables:
|
| 25 |
+
env:
|
| 26 |
+
SUPABASE_DB__TABLE_NAME: "{{ prefect.blocks.secret.supabase-db--table-name }}"
|
| 27 |
+
SUPABASE_DB__HOST: "{{ prefect.blocks.secret.supabase-db--host }}"
|
| 28 |
+
SUPABASE_DB__NAME: "{{ prefect.blocks.secret.supabase-db--name }}"
|
| 29 |
+
SUPABASE_DB__USER: "{{ prefect.blocks.secret.supabase-db--user }}"
|
| 30 |
+
SUPABASE_DB__PASSWORD: "{{ prefect.blocks.secret.supabase-db--password }}"
|
| 31 |
+
SUPABASE_DB__PORT: "{{ prefect.blocks.secret.supabase-db--port }}"
|
| 32 |
+
|
| 33 |
+
schedule:
|
| 34 |
+
cron: "0 0 * * 7"
|
| 35 |
+
|
| 36 |
+
- name: qdrant-embeddings
|
| 37 |
+
entrypoint: src/pipelines/flows/embeddings_ingestion_flow.py:qdrant_ingest_flow
|
| 38 |
+
work_pool:
|
| 39 |
+
name: default-work-pool
|
| 40 |
+
job_variables:
|
| 41 |
+
env:
|
| 42 |
+
SUPABASE_DB__TABLE_NAME: "{{ prefect.blocks.secret.supabase-db--table-name }}"
|
| 43 |
+
SUPABASE_DB__HOST: "{{ prefect.blocks.secret.supabase-db--host }}"
|
| 44 |
+
SUPABASE_DB__NAME: "{{ prefect.blocks.secret.supabase-db--name }}"
|
| 45 |
+
SUPABASE_DB__USER: "{{ prefect.blocks.secret.supabase-db--user }}"
|
| 46 |
+
SUPABASE_DB__PASSWORD: "{{ prefect.blocks.secret.supabase-db--password }}"
|
| 47 |
+
SUPABASE_DB__PORT: "{{ prefect.blocks.secret.supabase-db--port }}"
|
| 48 |
+
QDRANT__API_KEY: "{{ prefect.blocks.secret.qdrant--api-key }}"
|
| 49 |
+
QDRANT__URL: "{{ prefect.blocks.secret.qdrant--url }}"
|
| 50 |
+
QDRANT__COLLECTION_NAME: "{{ prefect.blocks.secret.qdrant--collection-name }}"
|
| 51 |
+
|
| 52 |
+
schedule:
|
| 53 |
+
cron: "0 0 * * 7"
|
pyproject.toml
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "substack-newsletters-search-course"
|
| 3 |
+
version = "1.0.0"
|
| 4 |
+
description = "A pipeline to retrieve Newsletters from Substack"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
authors = [
|
| 7 |
+
{name = "Benito Martin"}
|
| 8 |
+
]
|
| 9 |
+
license = {text = "MIT License"}
|
| 10 |
+
requires-python = ">=3.12"
|
| 11 |
+
|
| 12 |
+
dependencies = [
|
| 13 |
+
"aiohttp>=3.12.15",
|
| 14 |
+
"beautifulsoup4>=4.13.5",
|
| 15 |
+
"fastapi[standard]>=0.116.1",
|
| 16 |
+
"fastembed>=0.7.2",
|
| 17 |
+
"langchain>=0.3.27",
|
| 18 |
+
"langchain-text-splitters>=0.3.9",
|
| 19 |
+
"loguru>=0.7.3",
|
| 20 |
+
"lxml>=5.4.0",
|
| 21 |
+
"openai>=1.103.0",
|
| 22 |
+
"opik>=1.8.29",
|
| 23 |
+
"prefect>=3.4.14",
|
| 24 |
+
"psutil>=7.0.0",
|
| 25 |
+
"psycopg2-binary>=2.9.10",
|
| 26 |
+
"pydantic>=2.11.7",
|
| 27 |
+
"pydantic-settings>=2.10.1",
|
| 28 |
+
"qdrant-client>=1.15.1",
|
| 29 |
+
"sqlalchemy>=2.0.43",
|
| 30 |
+
"supabase>=2.18.1",
|
| 31 |
+
"uvloop>=0.21.0",
|
| 32 |
+
"gradio>=5.45.0",
|
| 33 |
+
"markdown>=3.9",
|
| 34 |
+
"python-dotenv>=1.1.1",
|
| 35 |
+
"markdownify>=1.2.0",
|
| 36 |
+
"prefect-github>=0.3.1",
|
| 37 |
+
"requests>=2.32.5",
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
# [[tool.uv.index]]
|
| 41 |
+
# name = "pytorch-cpu"
|
| 42 |
+
# url = "https://download.pytorch.org/whl/cpu"
|
| 43 |
+
# explicit = true
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
[dependency-groups]
|
| 47 |
+
dev = [
|
| 48 |
+
"pre-commit>=4.3.0",
|
| 49 |
+
"types-python-dateutil>=2.9.0.20250822",
|
| 50 |
+
"types-pyyaml>=6.0.12.20250822",
|
| 51 |
+
"types-requests>=2.32.4.20250809",
|
| 52 |
+
]
|
| 53 |
+
lint = [
|
| 54 |
+
"mypy>=1.17.1",
|
| 55 |
+
"ruff>=0.12.10",
|
| 56 |
+
"types-markdown>=3.9.0.20250906",
|
| 57 |
+
"types-python-dateutil>=2.9.0.20250822",
|
| 58 |
+
"types-pyyaml>=6.0.12.20250822",
|
| 59 |
+
"types-requests>=2.32.4.20250809",
|
| 60 |
+
]
|
| 61 |
+
test = [
|
| 62 |
+
"pytest>=8.4.1",
|
| 63 |
+
"pytest-asyncio>=1.1.0",
|
| 64 |
+
"responses>=0.25.8",
|
| 65 |
+
]
|
| 66 |
+
|
| 67 |
+
[build-system]
|
| 68 |
+
requires = ["hatchling"]
|
| 69 |
+
build-backend = "hatchling.build"
|
| 70 |
+
|
| 71 |
+
[tool.hatch.build]
|
| 72 |
+
packages = ["src"]
|
| 73 |
+
|
| 74 |
+
######################################
|
| 75 |
+
# --- Linting & Formatting Tools --- #
|
| 76 |
+
######################################
|
| 77 |
+
|
| 78 |
+
[tool.ruff]
|
| 79 |
+
# Assume Python 3.12
|
| 80 |
+
target-version = "py312"
|
| 81 |
+
|
| 82 |
+
# Same as Black.
|
| 83 |
+
line-length = 100
|
| 84 |
+
indent-width = 4
|
| 85 |
+
|
| 86 |
+
# Exclude a variety of commonly ignored directories.
|
| 87 |
+
exclude = [
|
| 88 |
+
".bzr",
|
| 89 |
+
".direnv",
|
| 90 |
+
".eggs",
|
| 91 |
+
".git",
|
| 92 |
+
".git-rewrite",
|
| 93 |
+
".hg",
|
| 94 |
+
".mypy_cache",
|
| 95 |
+
".nox",
|
| 96 |
+
".pants.d",
|
| 97 |
+
".pytype",
|
| 98 |
+
".ruff_cache",
|
| 99 |
+
".svn",
|
| 100 |
+
".tox",
|
| 101 |
+
".venv",
|
| 102 |
+
"__pypackages__",
|
| 103 |
+
"_build",
|
| 104 |
+
"buck-out",
|
| 105 |
+
"build",
|
| 106 |
+
"dist",
|
| 107 |
+
"node_modules",
|
| 108 |
+
"venv",
|
| 109 |
+
]
|
| 110 |
+
|
| 111 |
+
# Whether to show an enumeration of all fixed lint violations
|
| 112 |
+
show-fixes = true
|
| 113 |
+
|
| 114 |
+
# Enable common lint rules.
|
| 115 |
+
lint.select = [
|
| 116 |
+
"B", # flake8-bugbear
|
| 117 |
+
"E", # pycodestyle
|
| 118 |
+
"F", # Pyflakes1
|
| 119 |
+
"I", # isort
|
| 120 |
+
"SIM", # similarity
|
| 121 |
+
"UP", # pyupgrade
|
| 122 |
+
"D102", # docstring method
|
| 123 |
+
"D103", # docstring function
|
| 124 |
+
"D414", # docstring missing section
|
| 125 |
+
"D419", # empty docstring
|
| 126 |
+
# "D101", # docstring missing class
|
| 127 |
+
]
|
| 128 |
+
|
| 129 |
+
lint.ignore = []
|
| 130 |
+
|
| 131 |
+
# Allow autofix for all enabled rules (when `--fix`) is provided.
|
| 132 |
+
lint.fixable = ["ALL"]
|
| 133 |
+
lint.unfixable = []
|
| 134 |
+
|
| 135 |
+
[tool.ruff.lint.mccabe]
|
| 136 |
+
# Maximum allowed McCabe complexity.
|
| 137 |
+
max-complexity = 10
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
#########################
|
| 141 |
+
# --- Static Typing --- #
|
| 142 |
+
#########################
|
| 143 |
+
|
| 144 |
+
[tool.mypy]
|
| 145 |
+
# Use `packages` to specify the package root
|
| 146 |
+
packages = ["src"]
|
| 147 |
+
explicit_package_bases = true
|
| 148 |
+
|
| 149 |
+
# All other configurations
|
| 150 |
+
ignore_missing_imports = true
|
| 151 |
+
disallow_untyped_defs = false
|
| 152 |
+
check_untyped_defs = true
|
| 153 |
+
# warn_redundant_casts = true
|
| 154 |
+
warn_unused_ignores = false
|
| 155 |
+
warn_return_any = false
|
| 156 |
+
strict_optional = true
|
| 157 |
+
|
| 158 |
+
# [tool.mypy]
|
| 159 |
+
# # Only check src directory, with src as the package root
|
| 160 |
+
# files = ["src"] # Check from project root instead of just src
|
| 161 |
+
# mypy_path = ["src"] # Set mypy path to project root
|
| 162 |
+
|
| 163 |
+
#########################
|
| 164 |
+
# --- Testing Tools --- #
|
| 165 |
+
#########################
|
| 166 |
+
|
| 167 |
+
[tool.pytest.ini_options]
|
| 168 |
+
testpaths = [ "tests" ]
|
| 169 |
+
python_files = [ "test_*.py" ]
|
| 170 |
+
addopts = "-ra -v -s"
|
| 171 |
+
filterwarnings = [
|
| 172 |
+
"ignore::DeprecationWarning",
|
| 173 |
+
"ignore::UserWarning"
|
| 174 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
aiohttp
|
| 2 |
+
beautifulsoup4
|
| 3 |
+
fastapi[standard]
|
| 4 |
+
fastembed
|
| 5 |
+
langchain
|
| 6 |
+
langchain-text-splitters
|
| 7 |
+
loguru
|
| 8 |
+
lxml
|
| 9 |
+
openai
|
| 10 |
+
opik
|
| 11 |
+
prefect
|
| 12 |
+
psutil
|
| 13 |
+
psycopg2-binary
|
| 14 |
+
pydantic
|
| 15 |
+
pydantic-settings
|
| 16 |
+
qdrant-client
|
| 17 |
+
sqlalchemy
|
| 18 |
+
supabase
|
| 19 |
+
uvloop
|
| 20 |
+
gradio
|
| 21 |
+
markdown
|
| 22 |
+
python-dotenv
|
| 23 |
+
markdownify
|
src/__init__.py
ADDED
|
File without changes
|
src/api/__init__.py
ADDED
|
File without changes
|
src/api/exceptions/__init__.py
ADDED
|
File without changes
|
src/api/exceptions/exception_handlers.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import Request
|
| 2 |
+
from fastapi.exceptions import RequestValidationError
|
| 3 |
+
from fastapi.responses import JSONResponse
|
| 4 |
+
from qdrant_client.http.exceptions import UnexpectedResponse
|
| 5 |
+
|
| 6 |
+
from src.utils.logger_util import setup_logging
|
| 7 |
+
|
| 8 |
+
logger = setup_logging()
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
async def validation_exception_handler(request: Request, exc: Exception) -> JSONResponse:
|
| 12 |
+
"""Handle FastAPI request validation errors.
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
request (Request): The incoming request that caused the validation error.
|
| 16 |
+
exc (Exception): The exception instance.
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
JSONResponse: A JSON response with status code 422 and error details.
|
| 20 |
+
|
| 21 |
+
"""
|
| 22 |
+
if isinstance(exc, RequestValidationError):
|
| 23 |
+
logger.warning(f"Validation error on {request.url}: {exc.errors()}")
|
| 24 |
+
return JSONResponse(
|
| 25 |
+
status_code=422,
|
| 26 |
+
content={
|
| 27 |
+
"type": "validation_error",
|
| 28 |
+
"message": "Invalid request",
|
| 29 |
+
"details": exc.errors(),
|
| 30 |
+
},
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
logger.exception(f"Unexpected exception on {request.url}: {exc}")
|
| 34 |
+
return JSONResponse(
|
| 35 |
+
status_code=500,
|
| 36 |
+
content={
|
| 37 |
+
"type": "internal_error",
|
| 38 |
+
"message": "Internal server error",
|
| 39 |
+
"details": str(exc),
|
| 40 |
+
},
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
async def qdrant_exception_handler(request: Request, exc: Exception) -> JSONResponse:
|
| 45 |
+
"""Handle unexpected responses from Qdrant.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
request (Request): The incoming request that caused the error.
|
| 49 |
+
exc (Exception): The exception instance.
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
JSONResponse: A JSON response with status code 500 and error details.
|
| 53 |
+
|
| 54 |
+
"""
|
| 55 |
+
if isinstance(exc, UnexpectedResponse):
|
| 56 |
+
logger.error(f"Qdrant error on {request.url}: {exc}")
|
| 57 |
+
return JSONResponse(
|
| 58 |
+
status_code=500,
|
| 59 |
+
content={
|
| 60 |
+
"type": "qdrant_error",
|
| 61 |
+
"message": "Vector store error",
|
| 62 |
+
"details": str(exc),
|
| 63 |
+
},
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
# Fallback to general internal error if exception is not UnexpectedResponse
|
| 67 |
+
logger.exception(f"Unexpected exception on {request.url}: {exc}")
|
| 68 |
+
return JSONResponse(
|
| 69 |
+
status_code=500,
|
| 70 |
+
content={
|
| 71 |
+
"type": "internal_error",
|
| 72 |
+
"message": "Internal server error",
|
| 73 |
+
"details": str(exc),
|
| 74 |
+
},
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
async def general_exception_handler(request: Request, exc: Exception) -> JSONResponse:
|
| 79 |
+
"""Handle all uncaught exceptions in FastAPI.
|
| 80 |
+
|
| 81 |
+
Args:
|
| 82 |
+
request (Request): The incoming request that caused the error.
|
| 83 |
+
exc (Exception): The exception instance.
|
| 84 |
+
|
| 85 |
+
Returns:
|
| 86 |
+
JSONResponse: A JSON response with status code 500 and error details.
|
| 87 |
+
|
| 88 |
+
"""
|
| 89 |
+
logger.exception(f"Unhandled exception on {request.url}: {exc}")
|
| 90 |
+
return JSONResponse(
|
| 91 |
+
status_code=500,
|
| 92 |
+
content={
|
| 93 |
+
"type": "internal_error",
|
| 94 |
+
"message": "Internal server error",
|
| 95 |
+
"details": str(exc),
|
| 96 |
+
},
|
| 97 |
+
)
|
src/api/main.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from contextlib import asynccontextmanager
|
| 3 |
+
|
| 4 |
+
import dotenv
|
| 5 |
+
from fastapi import FastAPI
|
| 6 |
+
from fastapi.exceptions import RequestValidationError
|
| 7 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
+
from qdrant_client.http.exceptions import UnexpectedResponse
|
| 9 |
+
|
| 10 |
+
from src.api.exceptions.exception_handlers import (
|
| 11 |
+
general_exception_handler,
|
| 12 |
+
qdrant_exception_handler,
|
| 13 |
+
validation_exception_handler,
|
| 14 |
+
)
|
| 15 |
+
from src.api.middleware.logging_middleware import LoggingMiddleware
|
| 16 |
+
from src.api.routes.health_routes import router as health_router
|
| 17 |
+
from src.api.routes.search_routes import router as search_router
|
| 18 |
+
from src.infrastructure.qdrant.qdrant_vectorstore import AsyncQdrantVectorStore
|
| 19 |
+
from src.utils.logger_util import setup_logging
|
| 20 |
+
|
| 21 |
+
# Load environment variables from .env file
|
| 22 |
+
dotenv.load_dotenv()
|
| 23 |
+
|
| 24 |
+
# -----------------------
|
| 25 |
+
# Logger setup
|
| 26 |
+
# -----------------------
|
| 27 |
+
logger = setup_logging()
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# -----------------------
|
| 31 |
+
# Lifespan
|
| 32 |
+
# -----------------------
|
| 33 |
+
@asynccontextmanager
|
| 34 |
+
async def lifespan(app: FastAPI):
|
| 35 |
+
"""
|
| 36 |
+
Lifespan context manager to handle startup and shutdown events.
|
| 37 |
+
Initializes the Qdrant vector store on startup and ensures proper cleanup on shutdown.
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
app (FastAPI): The FastAPI application instance.
|
| 41 |
+
Yields:
|
| 42 |
+
None
|
| 43 |
+
|
| 44 |
+
Exceptions:
|
| 45 |
+
Raises exceptions if initialization or cleanup fails.
|
| 46 |
+
"""
|
| 47 |
+
## Ensure the cache directory exists and is writable (HF downloads the models here)
|
| 48 |
+
cache_dir = "/tmp/fastembed_cache"
|
| 49 |
+
os.makedirs(cache_dir, exist_ok=True) # Ensure directory exists
|
| 50 |
+
# Force /tmp/huggingface in Google Cloud so that it's writable.
|
| 51 |
+
# This is the default cache dir of Huggingface.
|
| 52 |
+
# Otherwise it tries ~/.cache/huggingface (read-only directory) in Google Cloud.
|
| 53 |
+
# That directory is not writable.
|
| 54 |
+
logger.info(f"HF_HOME: {os.environ.get('HF_HOME', 'Not set')}")
|
| 55 |
+
logger.info(f"Cache dir: {cache_dir}, Writable: {os.access(cache_dir, os.W_OK)}")
|
| 56 |
+
cache_contents = os.listdir(cache_dir) if os.path.exists(cache_dir) else "Empty"
|
| 57 |
+
logger.info(f"Cache contents before: {cache_contents}")
|
| 58 |
+
try:
|
| 59 |
+
# creates Qdrant client internally
|
| 60 |
+
app.state.vectorstore = AsyncQdrantVectorStore(cache_dir=cache_dir)
|
| 61 |
+
except Exception as e:
|
| 62 |
+
logger.exception("Failed to initialize QdrantVectorStore")
|
| 63 |
+
raise e
|
| 64 |
+
yield
|
| 65 |
+
try:
|
| 66 |
+
await app.state.vectorstore.client.close()
|
| 67 |
+
except Exception:
|
| 68 |
+
logger.exception("Failed to close Qdrant client")
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
# -----------------------
|
| 72 |
+
# FastAPI application
|
| 73 |
+
# -----------------------
|
| 74 |
+
|
| 75 |
+
app = FastAPI(
|
| 76 |
+
title="Substack RAG API",
|
| 77 |
+
version="1.0",
|
| 78 |
+
description="API for Substack Retrieval-Augmented Generation (RAG) system",
|
| 79 |
+
lifespan=lifespan,
|
| 80 |
+
# root_path=root_path,
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# -----------------------
|
| 85 |
+
# Middleware
|
| 86 |
+
# -----------------------
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
# Log the allowed origins
|
| 90 |
+
allowed_origins = os.getenv("ALLOWED_ORIGINS", "").split(",")
|
| 91 |
+
logger.info(f"CORS allowed origins: {allowed_origins}")
|
| 92 |
+
|
| 93 |
+
app.add_middleware(
|
| 94 |
+
CORSMiddleware,
|
| 95 |
+
allow_origins=allowed_origins, # ["*"], # allowed_origins,
|
| 96 |
+
allow_credentials=True,
|
| 97 |
+
allow_methods=["GET", "POST", "OPTIONS"], # only the methods the app uses
|
| 98 |
+
allow_headers=["Authorization", "Content-Type"], # only headers needed
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
app.add_middleware(LoggingMiddleware)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
# -----------------------
|
| 105 |
+
# Exception Handlers
|
| 106 |
+
# -----------------------
|
| 107 |
+
app.add_exception_handler(RequestValidationError, validation_exception_handler)
|
| 108 |
+
app.add_exception_handler(UnexpectedResponse, qdrant_exception_handler)
|
| 109 |
+
app.add_exception_handler(Exception, general_exception_handler)
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# -----------------------
|
| 113 |
+
# Routers
|
| 114 |
+
# -----------------------
|
| 115 |
+
app.include_router(search_router, prefix="/search", tags=["search"])
|
| 116 |
+
app.include_router(health_router, tags=["health"])
|
| 117 |
+
|
| 118 |
+
# For Cloud Run, run the app directly
|
| 119 |
+
if __name__ == "__main__":
|
| 120 |
+
import uvicorn
|
| 121 |
+
|
| 122 |
+
port = int(os.environ.get("PORT", 8080)) # Cloud Run provides PORT env var
|
| 123 |
+
|
| 124 |
+
uvicorn.run(
|
| 125 |
+
"src.api.main:app",
|
| 126 |
+
host="0.0.0.0",
|
| 127 |
+
port=port,
|
| 128 |
+
log_level="info",
|
| 129 |
+
reload=True, # Enable auto-reload for development
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
# config = uvicorn.Config(
|
| 133 |
+
# app,
|
| 134 |
+
# port=port,
|
| 135 |
+
# log_level="info",
|
| 136 |
+
# # loop="uvloop",
|
| 137 |
+
# # workers=1,
|
| 138 |
+
# reload=True
|
| 139 |
+
# )
|
| 140 |
+
# server = uvicorn.Server(config)
|
| 141 |
+
|
| 142 |
+
# server.run()
|
src/api/middleware/__init__.py
ADDED
|
File without changes
|
src/api/middleware/logging_middleware.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
|
| 3 |
+
from fastapi import Request
|
| 4 |
+
from starlette.middleware.base import BaseHTTPMiddleware
|
| 5 |
+
|
| 6 |
+
from src.utils.logger_util import setup_logging
|
| 7 |
+
|
| 8 |
+
logger = setup_logging()
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class LoggingMiddleware(BaseHTTPMiddleware):
|
| 12 |
+
"""Middleware for logging incoming HTTP requests and their responses.
|
| 13 |
+
|
| 14 |
+
Logs the request method, URL, client IP, and headers.
|
| 15 |
+
Excludes sensitive headers like Authorization and Cookie.
|
| 16 |
+
as well as the response status code and request duration in milliseconds.
|
| 17 |
+
Exceptions raised during request processing are logged with the full traceback.
|
| 18 |
+
|
| 19 |
+
Usage:
|
| 20 |
+
Add this middleware to your FastAPI app:
|
| 21 |
+
app.add_middleware(LoggingMiddleware)
|
| 22 |
+
|
| 23 |
+
Attributes:
|
| 24 |
+
logger: Configured logger from `setup_logging`.
|
| 25 |
+
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
async def dispatch(self, request: Request, call_next):
|
| 29 |
+
"""Process the incoming request, log its details, and measure execution time.
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
request (Request): The incoming FastAPI request.
|
| 33 |
+
call_next: Callable to invoke the next middleware or route handler.
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
Response: The HTTP response returned by the next middleware or route handler.
|
| 37 |
+
|
| 38 |
+
Raises:
|
| 39 |
+
Exception: Propagates any exceptions raised by downstream handlers after logging them.
|
| 40 |
+
|
| 41 |
+
"""
|
| 42 |
+
start_time = time.time()
|
| 43 |
+
client_host = request.client.host if request.client else "unknown"
|
| 44 |
+
|
| 45 |
+
# logger.debug(f"Request headers: {request.headers}")
|
| 46 |
+
# logger.debug(f"Request cookies: {request.cookies}")
|
| 47 |
+
|
| 48 |
+
# Exclude sensitive headers from logging
|
| 49 |
+
safe_headers = {
|
| 50 |
+
k: v for k, v in request.headers.items() if k.lower() not in {"authorization", "cookie"}
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
logger.info(
|
| 54 |
+
f"Incoming request: {request.method} {request.url} from {client_host} "
|
| 55 |
+
f"headers={safe_headers}"
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
try:
|
| 59 |
+
response = await call_next(request)
|
| 60 |
+
except Exception:
|
| 61 |
+
duration = (time.time() - start_time) * 1000
|
| 62 |
+
logger.exception(
|
| 63 |
+
f"Request failed: {request.method} {request.url} from {client_host} "
|
| 64 |
+
f"duration={duration:.2f}ms"
|
| 65 |
+
)
|
| 66 |
+
raise
|
| 67 |
+
|
| 68 |
+
duration = (time.time() - start_time) * 1000
|
| 69 |
+
logger.info(
|
| 70 |
+
f"Completed request: {request.method} {request.url} from {client_host} "
|
| 71 |
+
f"status_code={response.status_code} duration={duration:.2f}ms"
|
| 72 |
+
)
|
| 73 |
+
return response
|
src/api/models/__init__.py
ADDED
|
File without changes
|
src/api/models/api_models.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
# -----------------------
|
| 5 |
+
# Core search result model
|
| 6 |
+
# -----------------------
|
| 7 |
+
class SearchResult(BaseModel):
|
| 8 |
+
title: str = Field(default="", description="Title of the article")
|
| 9 |
+
feed_author: str | None = Field(default=None, description="Author of the article")
|
| 10 |
+
feed_name: str | None = Field(default=None, description="Name of the feed/newsletter")
|
| 11 |
+
article_author: list[str] | None = Field(default=None, description="List of article authors")
|
| 12 |
+
url: str | None = Field(default=None, description="URL of the article")
|
| 13 |
+
chunk_text: str | None = Field(default=None, description="Text content of the article chunk")
|
| 14 |
+
score: float = Field(default=0.0, description="Relevance score of the article")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# -----------------------
|
| 18 |
+
# Unique titles request/response
|
| 19 |
+
# -----------------------
|
| 20 |
+
class UniqueTitleRequest(BaseModel):
|
| 21 |
+
query_text: str = Field(default="", description="The user query text")
|
| 22 |
+
feed_author: str | None = Field(default=None, description="Filter by author name")
|
| 23 |
+
feed_name: str | None = Field(default=None, description="Filter by feed/newsletter name")
|
| 24 |
+
article_author: list[str] | None = Field(default=None, description="List of article authors")
|
| 25 |
+
title_keywords: str | None = Field(
|
| 26 |
+
default=None, description="Keywords or phrase to match in title"
|
| 27 |
+
)
|
| 28 |
+
limit: int = Field(default=5, description="Number of results to return")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class UniqueTitleResponse(BaseModel):
|
| 32 |
+
results: list[SearchResult] = Field(
|
| 33 |
+
default_factory=list, description="List of unique title search results"
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# -----------------------
|
| 38 |
+
# Ask request model
|
| 39 |
+
# -----------------------
|
| 40 |
+
class AskRequest(BaseModel):
|
| 41 |
+
query_text: str = Field(default="", description="The user query text")
|
| 42 |
+
feed_author: str | None = Field(default=None, description="Filter by author name")
|
| 43 |
+
feed_name: str | None = Field(default=None, description="Filter by feed/newsletter name")
|
| 44 |
+
article_author: list[str] | None = Field(default=None, description="List of article authors")
|
| 45 |
+
title_keywords: str | None = Field(
|
| 46 |
+
default=None, description="Keywords or phrase to match in title"
|
| 47 |
+
)
|
| 48 |
+
limit: int = Field(default=5, description="Number of results to return")
|
| 49 |
+
provider: str = Field(default="OpenRouter", description="The provider to use for the query")
|
| 50 |
+
model: str | None = Field(
|
| 51 |
+
default=None, description="The specific model to use for the provider, if applicable"
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# -----------------------
|
| 56 |
+
# Ask response model
|
| 57 |
+
# -----------------------
|
| 58 |
+
class AskResponse(BaseModel):
|
| 59 |
+
query: str = Field(default="", description="The original query text")
|
| 60 |
+
provider: str = Field(default="", description="The LLM provider used for generation")
|
| 61 |
+
answer: str = Field(default="", description="Generated answer from the LLM")
|
| 62 |
+
sources: list[SearchResult] = Field(
|
| 63 |
+
default_factory=list, description="List of source documents used in generation"
|
| 64 |
+
)
|
| 65 |
+
model: str | None = Field(
|
| 66 |
+
default=None, description="The specific model used by the provider, if available"
|
| 67 |
+
)
|
| 68 |
+
finish_reason: str | None = Field(
|
| 69 |
+
default=None, description="The reason why the generation finished, if available"
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# -----------------------
|
| 74 |
+
# Streaming "response" documentation
|
| 75 |
+
# -----------------------
|
| 76 |
+
class AskStreamingChunk(BaseModel):
|
| 77 |
+
delta: str = Field(default="", description="Partial text generated by the LLM")
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class AskStreamingResponse(BaseModel):
|
| 81 |
+
query: str = Field(default="", description="The original query text")
|
| 82 |
+
provider: str = Field(default="", description="The LLM provider used for generation")
|
| 83 |
+
chunks: list[AskStreamingChunk] = Field(
|
| 84 |
+
default_factory=list, description="Streamed chunks of generated text"
|
| 85 |
+
)
|
src/api/models/provider_models.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from enum import Enum
|
| 2 |
+
|
| 3 |
+
from pydantic import BaseModel, Field
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
# OpenRouter priority sort options
|
| 7 |
+
class ProviderSort(str, Enum):
|
| 8 |
+
latency = "latency"
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ModelConfig(BaseModel):
|
| 12 |
+
# The "entry point" model — required by OpenRouter API
|
| 13 |
+
primary_model: str = Field(default="", description="The initial model requested")
|
| 14 |
+
# Optional fallback / routing models
|
| 15 |
+
candidate_models: list[str] = Field(
|
| 16 |
+
default_factory=list, description="List of candidate models for fallback or routing"
|
| 17 |
+
)
|
| 18 |
+
provider_sort: ProviderSort = Field(
|
| 19 |
+
default=ProviderSort.latency, description="How to sort candidate models"
|
| 20 |
+
)
|
| 21 |
+
stream: bool = Field(default=False, description="Whether to stream responses")
|
| 22 |
+
max_completion_tokens: int = Field(
|
| 23 |
+
default=5000, description="Maximum number of tokens for completion"
|
| 24 |
+
)
|
| 25 |
+
temperature: float = Field(default=0.0, description="Sampling temperature")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class ModelRegistry(BaseModel):
|
| 29 |
+
models: dict[str, ModelConfig] = Field(default_factory=dict)
|
| 30 |
+
|
| 31 |
+
def get_config(self, provider: str) -> ModelConfig:
|
| 32 |
+
"""Retrieve the ModelConfig for the specified provider.
|
| 33 |
+
|
| 34 |
+
Args:
|
| 35 |
+
provider (str): The name of the provider.
|
| 36 |
+
|
| 37 |
+
Returns:resp
|
| 38 |
+
ModelConfig: The ModelConfig instance for the specified provider.
|
| 39 |
+
|
| 40 |
+
Raises:
|
| 41 |
+
ValueError: If the provider is not found in the registry.
|
| 42 |
+
"""
|
| 43 |
+
provider_lower = provider.lower()
|
| 44 |
+
if provider_lower not in self.models:
|
| 45 |
+
raise ValueError(f"ModelConfig not found for provider: {provider}")
|
| 46 |
+
return self.models[provider_lower]
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# -----------------------
|
| 50 |
+
# Default registry
|
| 51 |
+
# -----------------------
|
| 52 |
+
|
| 53 |
+
# Default ModelConfigs for models
|
| 54 |
+
# OpenRouter models show low latency and are highly ranked by OpenRouter
|
| 55 |
+
|
| 56 |
+
MODEL_REGISTRY = ModelRegistry(
|
| 57 |
+
models={
|
| 58 |
+
"openrouter": ModelConfig(
|
| 59 |
+
primary_model="openai/gpt-oss-20b:free",
|
| 60 |
+
candidate_models=[
|
| 61 |
+
# "meta-llama/llama-4-scout:free",
|
| 62 |
+
"cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
|
| 63 |
+
# "meta-llama/llama-3.3-8b-instruct:free",
|
| 64 |
+
# "openai/gpt-oss-20b:free",
|
| 65 |
+
# "openai/gpt-oss-120b:free",
|
| 66 |
+
"nvidia/nemotron-nano-9b-v2:free",
|
| 67 |
+
],
|
| 68 |
+
),
|
| 69 |
+
# "openai": ModelConfig(primary_model="gpt-4o-mini"),
|
| 70 |
+
"huggingface": ModelConfig(primary_model="deepseek-ai/DeepSeek-R1"),
|
| 71 |
+
}
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
# MODELS WITH LOGPROBS SUPPORT
|
| 75 |
+
|
| 76 |
+
# deepseek/deepseek-r1-0528-qwen3-8b:free
|
| 77 |
+
# mistralai/mistral-small-3.2-24b-instruct:free
|
src/api/routes/__init__.py
ADDED
|
File without changes
|
src/api/routes/health_routes.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
|
| 3 |
+
from fastapi import APIRouter, Request
|
| 4 |
+
from qdrant_client.http.exceptions import UnexpectedResponse
|
| 5 |
+
|
| 6 |
+
router = APIRouter()
|
| 7 |
+
|
| 8 |
+
start_time = time.time()
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
@router.get("/")
|
| 12 |
+
async def root():
|
| 13 |
+
"""Root endpoint.
|
| 14 |
+
|
| 15 |
+
Returns a simple JSON response indicating that the API is running.
|
| 16 |
+
|
| 17 |
+
Returns:
|
| 18 |
+
dict: {"message": "Hello! API is running."}
|
| 19 |
+
|
| 20 |
+
"""
|
| 21 |
+
return {"message": "Hello! API is running."}
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
@router.get("/health")
|
| 25 |
+
async def health_check():
|
| 26 |
+
"""Liveness check endpoint.
|
| 27 |
+
|
| 28 |
+
Returns basic service info, uptime, and environment variables.
|
| 29 |
+
"""
|
| 30 |
+
uptime = int(time.time() - start_time)
|
| 31 |
+
return {
|
| 32 |
+
"status": "ok",
|
| 33 |
+
"uptime_seconds": uptime,
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@router.get("/ready")
|
| 38 |
+
async def readiness_check(request: Request):
|
| 39 |
+
"""Readiness check endpoint.
|
| 40 |
+
|
| 41 |
+
Verifies whether the service is ready to handle requests by
|
| 42 |
+
checking connectivity to Qdrant.
|
| 43 |
+
"""
|
| 44 |
+
try:
|
| 45 |
+
vectorstore = request.app.state.vectorstore
|
| 46 |
+
# a lightweight check: list_collections is cheap
|
| 47 |
+
await vectorstore.client.get_collections()
|
| 48 |
+
return {"status": "ready"}
|
| 49 |
+
except UnexpectedResponse:
|
| 50 |
+
return {"status": "not ready", "reason": "Qdrant unexpected response"}
|
| 51 |
+
except Exception as e:
|
| 52 |
+
return {"status": "not ready", "reason": str(e)}
|
src/api/routes/search_routes.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
|
| 3 |
+
from fastapi import APIRouter, Request
|
| 4 |
+
from fastapi.responses import StreamingResponse
|
| 5 |
+
|
| 6 |
+
from src.api.models.api_models import (
|
| 7 |
+
AskRequest,
|
| 8 |
+
AskResponse,
|
| 9 |
+
AskStreamingResponse,
|
| 10 |
+
SearchResult,
|
| 11 |
+
UniqueTitleRequest,
|
| 12 |
+
UniqueTitleResponse,
|
| 13 |
+
)
|
| 14 |
+
from src.api.services.generation_service import generate_answer, get_streaming_function
|
| 15 |
+
from src.api.services.search_service import query_unique_titles, query_with_filters
|
| 16 |
+
|
| 17 |
+
router = APIRouter()
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@router.post("/unique-titles", response_model=UniqueTitleResponse)
|
| 21 |
+
async def search_unique(request: Request, params: UniqueTitleRequest):
|
| 22 |
+
"""Returns unique article titles based on a query and optional filters.
|
| 23 |
+
|
| 24 |
+
Deduplicates results by article title.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
request: FastAPI request object.
|
| 28 |
+
params: UniqueTitleRequest with search parameters.
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
UniqueTitleResponse: List of unique titles.
|
| 32 |
+
|
| 33 |
+
"""
|
| 34 |
+
results = await query_unique_titles(
|
| 35 |
+
request=request,
|
| 36 |
+
query_text=params.query_text,
|
| 37 |
+
feed_author=params.feed_author,
|
| 38 |
+
feed_name=params.feed_name,
|
| 39 |
+
title_keywords=params.title_keywords,
|
| 40 |
+
limit=params.limit,
|
| 41 |
+
)
|
| 42 |
+
return {"results": results}
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
@router.post("/ask", response_model=AskResponse)
|
| 46 |
+
async def ask_with_generation(request: Request, ask: AskRequest):
|
| 47 |
+
"""Non-streaming question-answering endpoint using vector search and LLM.
|
| 48 |
+
|
| 49 |
+
Workflow:
|
| 50 |
+
1. Retrieve relevant documents (possibly duplicate titles for richer context).
|
| 51 |
+
2. Generate an answer with the selected LLM provider.
|
| 52 |
+
|
| 53 |
+
Args:
|
| 54 |
+
request: FastAPI request object.
|
| 55 |
+
ask: AskRequest with query, provider, and limit.
|
| 56 |
+
|
| 57 |
+
Returns:
|
| 58 |
+
AskResponse: Generated answer and source documents.
|
| 59 |
+
|
| 60 |
+
"""
|
| 61 |
+
# Step 1: Retrieve relevant documents with filters
|
| 62 |
+
results: list[SearchResult] = await query_with_filters(
|
| 63 |
+
request,
|
| 64 |
+
query_text=ask.query_text,
|
| 65 |
+
feed_author=ask.feed_author,
|
| 66 |
+
feed_name=ask.feed_name,
|
| 67 |
+
title_keywords=ask.title_keywords,
|
| 68 |
+
limit=ask.limit,
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
# Step 2: Generate an answer
|
| 72 |
+
answer_data = await generate_answer(
|
| 73 |
+
query=ask.query_text, contexts=results, provider=ask.provider, selected_model=ask.model
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
return AskResponse(
|
| 77 |
+
query=ask.query_text,
|
| 78 |
+
provider=ask.provider,
|
| 79 |
+
answer=answer_data["answer"],
|
| 80 |
+
sources=results,
|
| 81 |
+
model=answer_data.get("model", None),
|
| 82 |
+
finish_reason=answer_data.get("finish_reason", None),
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
@router.post("/ask/stream", response_model=AskStreamingResponse)
|
| 87 |
+
async def ask_with_generation_stream(request: Request, ask: AskRequest):
|
| 88 |
+
"""Streaming question-answering endpoint using vector search and LLM.
|
| 89 |
+
|
| 90 |
+
Workflow:
|
| 91 |
+
1. Retrieve relevant documents (possibly duplicate titles for richer context).
|
| 92 |
+
2. Stream generated answer with the selected LLM provider.
|
| 93 |
+
|
| 94 |
+
Args:
|
| 95 |
+
request: FastAPI request object.
|
| 96 |
+
ask: AskRequest with query, provider, and limit.
|
| 97 |
+
|
| 98 |
+
Returns:
|
| 99 |
+
StreamingResponse: Yields text chunks as plain text.
|
| 100 |
+
|
| 101 |
+
"""
|
| 102 |
+
# Step 1: Retrieve relevant documents with filters
|
| 103 |
+
results: list[SearchResult] = await query_with_filters(
|
| 104 |
+
request,
|
| 105 |
+
query_text=ask.query_text,
|
| 106 |
+
feed_author=ask.feed_author,
|
| 107 |
+
feed_name=ask.feed_name,
|
| 108 |
+
title_keywords=ask.title_keywords,
|
| 109 |
+
limit=ask.limit,
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
# Step 2: Get the streaming generator
|
| 113 |
+
stream_func = get_streaming_function(
|
| 114 |
+
provider=ask.provider, query=ask.query_text, contexts=results, selected_model=ask.model
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# Step 3: Wrap streaming generator
|
| 118 |
+
async def stream_generator():
|
| 119 |
+
async for delta in stream_func():
|
| 120 |
+
yield delta
|
| 121 |
+
await asyncio.sleep(0) # allow event loop to handle other tasks
|
| 122 |
+
|
| 123 |
+
return StreamingResponse(stream_generator(), media_type="text/plain")
|
src/api/services/__init__.py
ADDED
|
File without changes
|
src/api/services/generation_service.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections.abc import AsyncGenerator, Callable
|
| 2 |
+
|
| 3 |
+
import opik
|
| 4 |
+
|
| 5 |
+
from src.api.models.api_models import SearchResult
|
| 6 |
+
from src.api.models.provider_models import MODEL_REGISTRY
|
| 7 |
+
from src.api.services.providers.huggingface_service import generate_huggingface, stream_huggingface
|
| 8 |
+
from src.api.services.providers.openai_service import generate_openai, stream_openai
|
| 9 |
+
from src.api.services.providers.openrouter_service import generate_openrouter, stream_openrouter
|
| 10 |
+
from src.api.services.providers.utils.evaluation_metrics import evaluate_metrics
|
| 11 |
+
from src.api.services.providers.utils.prompts import build_research_prompt
|
| 12 |
+
from src.utils.logger_util import setup_logging
|
| 13 |
+
|
| 14 |
+
logger = setup_logging()
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# -----------------------
|
| 18 |
+
# Non-streaming answer generator
|
| 19 |
+
# -----------------------
|
| 20 |
+
@opik.track(name="generate_answer")
|
| 21 |
+
async def generate_answer(
|
| 22 |
+
query: str,
|
| 23 |
+
contexts: list[SearchResult],
|
| 24 |
+
provider: str = "openrouter",
|
| 25 |
+
selected_model: str | None = None,
|
| 26 |
+
) -> dict:
|
| 27 |
+
"""Generate a non-streaming answer using the specified LLM provider.
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
query (str): The user's research query.
|
| 31 |
+
contexts (list[SearchResult]): List of context documents with metadata.
|
| 32 |
+
provider (str): The LLM provider to use ("openai", "openrouter", "huggingface").
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
dict: {"answer": str, "sources": list[str], "model": Optional[str]}
|
| 36 |
+
|
| 37 |
+
"""
|
| 38 |
+
prompt = build_research_prompt(contexts, query=query)
|
| 39 |
+
model_used: str | None = None
|
| 40 |
+
finish_reason: str | None = None
|
| 41 |
+
|
| 42 |
+
provider_lower = provider.lower()
|
| 43 |
+
|
| 44 |
+
config = MODEL_REGISTRY.get_config(provider_lower)
|
| 45 |
+
|
| 46 |
+
if provider_lower == "openai":
|
| 47 |
+
answer, model_used = await generate_openai(prompt, config=config)
|
| 48 |
+
elif provider_lower == "openrouter":
|
| 49 |
+
try:
|
| 50 |
+
answer, model_used, finish_reason = await generate_openrouter(
|
| 51 |
+
prompt, config=config, selected_model=selected_model
|
| 52 |
+
)
|
| 53 |
+
metrics_results = await evaluate_metrics(answer, prompt)
|
| 54 |
+
logger.info(f"G-Eval Faithfulness → {metrics_results}")
|
| 55 |
+
except Exception as e:
|
| 56 |
+
logger.error(f"Error occurred while generating answer from {provider_lower}: {e}")
|
| 57 |
+
raise
|
| 58 |
+
|
| 59 |
+
elif provider_lower == "huggingface":
|
| 60 |
+
answer, model_used = await generate_huggingface(prompt, config=config)
|
| 61 |
+
else:
|
| 62 |
+
raise ValueError(f"Unknown provider: {provider}")
|
| 63 |
+
|
| 64 |
+
return {
|
| 65 |
+
"answer": answer,
|
| 66 |
+
"sources": [r.url for r in contexts],
|
| 67 |
+
"model": model_used,
|
| 68 |
+
"finish_reason": finish_reason,
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# -----------------------
|
| 73 |
+
# Streaming answer generator
|
| 74 |
+
# -----------------------
|
| 75 |
+
@opik.track(name="get_streaming_function")
|
| 76 |
+
def get_streaming_function(
|
| 77 |
+
provider: str,
|
| 78 |
+
query: str,
|
| 79 |
+
contexts: list[SearchResult],
|
| 80 |
+
selected_model: str | None = None,
|
| 81 |
+
) -> Callable[[], AsyncGenerator[str, None]]:
|
| 82 |
+
"""Get a streaming function for the specified LLM provider.
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
provider (str): The LLM provider to use ("openai", "openrouter", "huggingface").
|
| 86 |
+
query (str): The user's research query.
|
| 87 |
+
contexts (list[SearchResult]): List of context documents with metadata.
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
Callable[[], AsyncGenerator[str, None]]: A function that returns an async generator yielding
|
| 91 |
+
response chunks.
|
| 92 |
+
|
| 93 |
+
"""
|
| 94 |
+
prompt = build_research_prompt(contexts, query=query)
|
| 95 |
+
provider_lower = provider.lower()
|
| 96 |
+
config = MODEL_REGISTRY.get_config(provider_lower)
|
| 97 |
+
logger.info(f"Using model config: {config}")
|
| 98 |
+
|
| 99 |
+
async def stream_gen() -> AsyncGenerator[str, None]:
|
| 100 |
+
"""Asynchronous generator that streams response chunks from the specified provider.
|
| 101 |
+
|
| 102 |
+
Yields:
|
| 103 |
+
str: The next chunk of the response.
|
| 104 |
+
|
| 105 |
+
"""
|
| 106 |
+
buffer = [] # collect all chunks here
|
| 107 |
+
|
| 108 |
+
if provider_lower == "openai":
|
| 109 |
+
async for chunk in stream_openai(prompt, config=config):
|
| 110 |
+
buffer.append(chunk)
|
| 111 |
+
yield chunk
|
| 112 |
+
|
| 113 |
+
elif provider_lower == "openrouter":
|
| 114 |
+
try:
|
| 115 |
+
async for chunk in stream_openrouter(
|
| 116 |
+
prompt, config=config, selected_model=selected_model
|
| 117 |
+
):
|
| 118 |
+
buffer.append(chunk)
|
| 119 |
+
yield chunk
|
| 120 |
+
|
| 121 |
+
full_output = "".join(buffer)
|
| 122 |
+
metrics_results = await evaluate_metrics(full_output, prompt)
|
| 123 |
+
logger.info(f"Metrics results: {metrics_results}")
|
| 124 |
+
|
| 125 |
+
except Exception as e:
|
| 126 |
+
logger.error(f"Error occurred while streaming from {provider}: {e}")
|
| 127 |
+
yield "__error__"
|
| 128 |
+
|
| 129 |
+
elif provider_lower == "huggingface":
|
| 130 |
+
async for chunk in stream_huggingface(prompt, config=config):
|
| 131 |
+
buffer.append(chunk)
|
| 132 |
+
yield chunk
|
| 133 |
+
|
| 134 |
+
else:
|
| 135 |
+
raise ValueError(f"Unknown provider: {provider}")
|
| 136 |
+
|
| 137 |
+
return stream_gen
|
src/api/services/providers/__init__.py
ADDED
|
File without changes
|
src/api/services/providers/huggingface_service.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections.abc import AsyncGenerator
|
| 2 |
+
|
| 3 |
+
from huggingface_hub import AsyncInferenceClient
|
| 4 |
+
|
| 5 |
+
from src.api.models.provider_models import ModelConfig
|
| 6 |
+
from src.api.services.providers.utils.messages import build_messages
|
| 7 |
+
from src.config import settings
|
| 8 |
+
from src.utils.logger_util import setup_logging
|
| 9 |
+
|
| 10 |
+
logger = setup_logging()
|
| 11 |
+
|
| 12 |
+
# -----------------------
|
| 13 |
+
# Hugging Face client
|
| 14 |
+
# -----------------------
|
| 15 |
+
hf_key = settings.hugging_face.api_key
|
| 16 |
+
hf_client = AsyncInferenceClient(provider="auto", api_key=hf_key)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
async def generate_huggingface(prompt: str, config: ModelConfig) -> tuple[str, None]:
|
| 20 |
+
"""Generate a response from Hugging Face for a given prompt and model configuration.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
prompt (str): The input prompt.
|
| 24 |
+
config (ModelConfig): The model configuration.
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
tuple[str, None]: The generated response and None for model and finish reason.
|
| 28 |
+
|
| 29 |
+
"""
|
| 30 |
+
resp = await hf_client.chat.completions.create(
|
| 31 |
+
model=config.primary_model,
|
| 32 |
+
messages=build_messages(prompt),
|
| 33 |
+
temperature=config.temperature,
|
| 34 |
+
max_tokens=config.max_completion_tokens,
|
| 35 |
+
)
|
| 36 |
+
return resp.choices[0].message.content or "", None
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def stream_huggingface(prompt: str, config: ModelConfig) -> AsyncGenerator[str, None]:
|
| 40 |
+
"""Stream a response from Hugging Face for a given prompt and model configuration.
|
| 41 |
+
|
| 42 |
+
Args:
|
| 43 |
+
prompt (str): The input prompt.
|
| 44 |
+
config (ModelConfig): The model configuration.
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
AsyncGenerator[str, None]: An asynchronous generator yielding response chunks.
|
| 48 |
+
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
async def gen() -> AsyncGenerator[str, None]:
|
| 52 |
+
stream = await hf_client.chat.completions.create(
|
| 53 |
+
model=config.primary_model,
|
| 54 |
+
messages=build_messages(prompt),
|
| 55 |
+
temperature=config.temperature,
|
| 56 |
+
max_tokens=config.max_completion_tokens,
|
| 57 |
+
stream=True,
|
| 58 |
+
)
|
| 59 |
+
async for chunk in stream:
|
| 60 |
+
delta_text = getattr(chunk.choices[0].delta, "content", None)
|
| 61 |
+
if delta_text:
|
| 62 |
+
yield delta_text
|
| 63 |
+
|
| 64 |
+
return gen()
|
src/api/services/providers/openai_service.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from collections.abc import AsyncGenerator
|
| 3 |
+
|
| 4 |
+
from openai import AsyncOpenAI
|
| 5 |
+
from opik.integrations.openai import track_openai
|
| 6 |
+
|
| 7 |
+
from src.api.models.provider_models import ModelConfig
|
| 8 |
+
from src.api.services.providers.utils.messages import build_messages
|
| 9 |
+
from src.config import settings
|
| 10 |
+
from src.utils.logger_util import setup_logging
|
| 11 |
+
|
| 12 |
+
logger = setup_logging()
|
| 13 |
+
|
| 14 |
+
# -----------------------
|
| 15 |
+
# OpenAI client
|
| 16 |
+
# -----------------------
|
| 17 |
+
openai_key = settings.openai.api_key
|
| 18 |
+
async_openai_client = AsyncOpenAI(api_key=openai_key)
|
| 19 |
+
|
| 20 |
+
# -----------------------
|
| 21 |
+
# Opik Observability
|
| 22 |
+
# -----------------------
|
| 23 |
+
|
| 24 |
+
os.environ["OPIK_API_KEY"] = settings.opik.api_key
|
| 25 |
+
os.environ["OPIK_PROJECT_NAME"] = settings.opik.project_name
|
| 26 |
+
|
| 27 |
+
async_openai_client = track_openai(async_openai_client)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
async def generate_openai(prompt: str, config: ModelConfig) -> tuple[str, None]:
|
| 31 |
+
"""Generate a response from OpenAI for a given prompt and model configuration.
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
prompt (str): The input prompt.
|
| 35 |
+
config (ModelConfig): The model configuration.
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
tuple[str, None]: The generated response and None for model and finish reason.
|
| 39 |
+
|
| 40 |
+
"""
|
| 41 |
+
### NOTES ON PARAMETERS
|
| 42 |
+
# logprobs: Include the log probabilities on the logprobs most likely tokens,
|
| 43 |
+
# as well the chosen tokens.
|
| 44 |
+
# temperature: 0.0 (more deterministic) to 1.0 (more creative)
|
| 45 |
+
# top_p: 0.0 to 1.0, nucleus sampling, 1.0 means no nucleus sampling
|
| 46 |
+
# 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
| 47 |
+
# presence_penalty: -2.0 to 2.0, positive values penalize new tokens based
|
| 48 |
+
# on whether they appear in the text so far
|
| 49 |
+
# (Encourages model to use more context from other chunks)
|
| 50 |
+
# frequency_penalty: -2.0 to 2.0, positive values penalize new tokens based
|
| 51 |
+
# on their existing frequency in the text so far (helpful if context chunks overlap.)
|
| 52 |
+
|
| 53 |
+
resp = await async_openai_client.chat.completions.create(
|
| 54 |
+
model="gpt-4o-mini",
|
| 55 |
+
messages=build_messages(prompt),
|
| 56 |
+
temperature=config.temperature,
|
| 57 |
+
max_completion_tokens=config.max_completion_tokens,
|
| 58 |
+
# logprobs=True,
|
| 59 |
+
# top_logprobs=3,
|
| 60 |
+
# top_p=1.0,
|
| 61 |
+
# presence_penalty=0.3,
|
| 62 |
+
# frequency_penalty=0.3,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
return resp.choices[0].message.content or "", None
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def stream_openai(prompt: str, config: ModelConfig) -> AsyncGenerator[str, None]:
|
| 69 |
+
"""Stream a response from OpenAI for a given prompt and model configuration.
|
| 70 |
+
|
| 71 |
+
Args:
|
| 72 |
+
prompt (str): The input prompt.
|
| 73 |
+
config (ModelConfig): The model configuration.
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
AsyncGenerator[str, None]: An asynchronous generator yielding response chunks.
|
| 77 |
+
|
| 78 |
+
"""
|
| 79 |
+
|
| 80 |
+
async def gen() -> AsyncGenerator[str, None]:
|
| 81 |
+
stream = await async_openai_client.chat.completions.create(
|
| 82 |
+
model=config.primary_model,
|
| 83 |
+
messages=build_messages(prompt),
|
| 84 |
+
temperature=config.temperature,
|
| 85 |
+
max_completion_tokens=config.max_completion_tokens,
|
| 86 |
+
stream=True,
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
last_finish_reason = None
|
| 90 |
+
async for chunk in stream:
|
| 91 |
+
delta_text = getattr(chunk.choices[0].delta, "content", None)
|
| 92 |
+
if delta_text:
|
| 93 |
+
yield delta_text
|
| 94 |
+
|
| 95 |
+
# Reasons: tool_calls, stop, length, content_filter, error
|
| 96 |
+
finish_reason = getattr(chunk.choices[0], "finish_reason", None)
|
| 97 |
+
|
| 98 |
+
if finish_reason:
|
| 99 |
+
last_finish_reason = finish_reason
|
| 100 |
+
|
| 101 |
+
logger.warning(f"Final finish_reason: {last_finish_reason}")
|
| 102 |
+
|
| 103 |
+
# Yield a chunk to trigger truncation warning in UI
|
| 104 |
+
if last_finish_reason == "length":
|
| 105 |
+
yield "__truncated__"
|
| 106 |
+
|
| 107 |
+
return gen()
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
# -----------------------
|
| 111 |
+
# Log Probs Parameter Experiment
|
| 112 |
+
# -----------------------
|
| 113 |
+
|
| 114 |
+
# import math
|
| 115 |
+
|
| 116 |
+
# async def generate_openai(prompt: str, config: ModelConfig) -> str:
|
| 117 |
+
# """
|
| 118 |
+
# Generate a response from OpenAI for a given prompt and model configuration,
|
| 119 |
+
# and calculate the average log probability of the generated tokens.
|
| 120 |
+
|
| 121 |
+
# Returns:
|
| 122 |
+
# tuple[str, float | None]: Generated response and average log probability
|
| 123 |
+
# """
|
| 124 |
+
# resp = await async_openai_client.chat.completions.create(
|
| 125 |
+
# model="gpt-4o-mini",
|
| 126 |
+
# messages=build_messages(prompt),
|
| 127 |
+
# temperature=config.temperature,
|
| 128 |
+
# max_completion_tokens=config.max_completion_tokens,
|
| 129 |
+
# logprobs=True, # include token log probabilities
|
| 130 |
+
# top_logprobs=3, # top 3 alternatives for each token
|
| 131 |
+
# top_p=1.0,
|
| 132 |
+
# presence_penalty=0.3,
|
| 133 |
+
# frequency_penalty=0.3,
|
| 134 |
+
# )
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
# content = resp.choices[0].message.content or ""
|
| 138 |
+
# token_logprobs_list = resp.choices[0].logprobs
|
| 139 |
+
|
| 140 |
+
# tokens_logprobs = []
|
| 141 |
+
# token_probs = []
|
| 142 |
+
|
| 143 |
+
# if (
|
| 144 |
+
# token_logprobs_list is not None
|
| 145 |
+
# and hasattr(token_logprobs_list, "content")
|
| 146 |
+
# and isinstance(token_logprobs_list.content, list)
|
| 147 |
+
# and len(token_logprobs_list.content) > 0
|
| 148 |
+
# ):
|
| 149 |
+
# for token_info in token_logprobs_list.content:
|
| 150 |
+
# if token_info is not None and hasattr(token_info, "logprob") \
|
| 151 |
+
# and hasattr(token_info, "token"):
|
| 152 |
+
# tokens_logprobs.append(token_info.logprob)
|
| 153 |
+
# token_probs.append((token_info.token, math.exp(token_info.logprob)))
|
| 154 |
+
|
| 155 |
+
# if tokens_logprobs:
|
| 156 |
+
# avg_logprob = sum(tokens_logprobs) / len(tokens_logprobs)
|
| 157 |
+
# avg_prob = math.exp(avg_logprob)
|
| 158 |
+
|
| 159 |
+
# # Sort by probability
|
| 160 |
+
# most_confident = sorted(token_probs, key=lambda x: x[1], reverse=True)[:5]
|
| 161 |
+
# least_confident = sorted(token_probs, key=lambda x: x[1])[:5]
|
| 162 |
+
|
| 163 |
+
# logger.info(f"Temperature: {config.temperature}")
|
| 164 |
+
# logger.info(f"Max completion tokens: {config.max_completion_tokens}")
|
| 165 |
+
# logger.info(f"Average log probability: {avg_logprob:.4f} "
|
| 166 |
+
# f"(≈ {avg_prob:.2%} avg token prob)")
|
| 167 |
+
|
| 168 |
+
# logger.info("Top 5 most confident tokens:")
|
| 169 |
+
# for tok, prob in most_confident:
|
| 170 |
+
# logger.info(f" '{tok}' → {prob:.2%}")
|
| 171 |
+
|
| 172 |
+
# logger.info("Top 5 least confident tokens:")
|
| 173 |
+
# for tok, prob in least_confident:
|
| 174 |
+
# logger.info(f" '{tok}' → {prob:.2%}")
|
| 175 |
+
|
| 176 |
+
# else:
|
| 177 |
+
# logger.warning("No logprob information found in response.")
|
| 178 |
+
|
| 179 |
+
# breakpoint()
|
| 180 |
+
|
| 181 |
+
# return content
|
src/api/services/providers/openrouter_service.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from collections.abc import AsyncGenerator
|
| 3 |
+
from typing import Any
|
| 4 |
+
|
| 5 |
+
import opik
|
| 6 |
+
from openai import AsyncOpenAI
|
| 7 |
+
from opik.integrations.openai import track_openai
|
| 8 |
+
|
| 9 |
+
from src.api.models.provider_models import ModelConfig
|
| 10 |
+
from src.api.services.providers.utils.messages import build_messages
|
| 11 |
+
from src.config import settings
|
| 12 |
+
from src.utils.logger_util import setup_logging
|
| 13 |
+
|
| 14 |
+
logger = setup_logging()
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# -----------------------
|
| 18 |
+
# OpenRouter client
|
| 19 |
+
# -----------------------
|
| 20 |
+
|
| 21 |
+
openrouter_key = settings.openrouter.api_key
|
| 22 |
+
openrouter_url = settings.openrouter.api_url
|
| 23 |
+
async_openrouter_client = AsyncOpenAI(base_url=openrouter_url, api_key=openrouter_key)
|
| 24 |
+
|
| 25 |
+
# -----------------------
|
| 26 |
+
# Opik Observability
|
| 27 |
+
# -----------------------
|
| 28 |
+
|
| 29 |
+
os.environ["OPIK_API_KEY"] = settings.opik.api_key
|
| 30 |
+
os.environ["OPIK_PROJECT_NAME"] = settings.opik.project_name
|
| 31 |
+
|
| 32 |
+
async_openrouter_client = track_openai(async_openrouter_client)
|
| 33 |
+
|
| 34 |
+
# -----------------------
|
| 35 |
+
# Helper to build extra body for OpenRouter
|
| 36 |
+
# -----------------------
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
@opik.track(name="build_openrouter_extra")
|
| 40 |
+
def build_openrouter_extra(config: ModelConfig) -> dict[str, Any]:
|
| 41 |
+
"""Build the extra body for OpenRouter API requests based on the ModelConfig.
|
| 42 |
+
|
| 43 |
+
Args:
|
| 44 |
+
config (ModelConfig): The model configuration.
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
dict[str, Any]: The extra body for OpenRouter API requests.
|
| 48 |
+
|
| 49 |
+
"""
|
| 50 |
+
body = {"provider": {"sort": config.provider_sort.value}}
|
| 51 |
+
if config.candidate_models:
|
| 52 |
+
body["models"] = list(config.candidate_models) # type: ignore
|
| 53 |
+
return body
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# -----------------------
|
| 57 |
+
# Core OpenRouter functions
|
| 58 |
+
# -----------------------
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@opik.track(name="generate_openrouter")
|
| 62 |
+
async def generate_openrouter(
|
| 63 |
+
prompt: str,
|
| 64 |
+
config: ModelConfig,
|
| 65 |
+
selected_model: str | None = None,
|
| 66 |
+
) -> tuple[str, str | None, str | None]:
|
| 67 |
+
"""Generate a response from OpenRouter for a given prompt and model configuration.
|
| 68 |
+
|
| 69 |
+
Args:
|
| 70 |
+
prompt (str): The input prompt.
|
| 71 |
+
config (ModelConfig): The model configuration.
|
| 72 |
+
selected_model (str | None): Optional specific model to use.
|
| 73 |
+
|
| 74 |
+
Returns:
|
| 75 |
+
tuple[str, str | None, str | None]: The generated response, model used, and finish reason.
|
| 76 |
+
|
| 77 |
+
"""
|
| 78 |
+
|
| 79 |
+
model_to_use = selected_model or config.primary_model
|
| 80 |
+
|
| 81 |
+
resp = await async_openrouter_client.chat.completions.create(
|
| 82 |
+
model=model_to_use,
|
| 83 |
+
messages=build_messages(prompt),
|
| 84 |
+
temperature=config.temperature,
|
| 85 |
+
max_completion_tokens=config.max_completion_tokens,
|
| 86 |
+
extra_body=build_openrouter_extra(config),
|
| 87 |
+
)
|
| 88 |
+
answer = resp.choices[0].message.content or ""
|
| 89 |
+
|
| 90 |
+
# Reasons: tool_calls, stop, length, content_filter, error
|
| 91 |
+
finish_reason = getattr(resp.choices[0], "native_finish_reason", None)
|
| 92 |
+
model_used = getattr(resp.choices[0], "model", None) or getattr(resp, "model", None)
|
| 93 |
+
|
| 94 |
+
logger.info(f"OpenRouter non-stream finish_reason: {finish_reason}")
|
| 95 |
+
if finish_reason == "length":
|
| 96 |
+
logger.warning("Response was truncated by token limit.")
|
| 97 |
+
|
| 98 |
+
model_used = getattr(resp.choices[0], "model", None) or getattr(resp, "model", None)
|
| 99 |
+
logger.info(f"OpenRouter non-stream finished. Model used: {model_used}")
|
| 100 |
+
|
| 101 |
+
return answer, model_used, finish_reason
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
@opik.track(name="stream_openrouter")
|
| 105 |
+
def stream_openrouter(
|
| 106 |
+
prompt: str,
|
| 107 |
+
config: ModelConfig,
|
| 108 |
+
selected_model: str | None = None,
|
| 109 |
+
) -> AsyncGenerator[str, None]:
|
| 110 |
+
"""Stream a response from OpenRouter for a given prompt and model configuration.
|
| 111 |
+
|
| 112 |
+
Args:
|
| 113 |
+
prompt (str): The input prompt.
|
| 114 |
+
config (ModelConfig): The model configuration.
|
| 115 |
+
selected_model (str | None): Optional specific model to use.
|
| 116 |
+
|
| 117 |
+
Returns:
|
| 118 |
+
AsyncGenerator[str, None]: An asynchronous generator yielding response chunks.
|
| 119 |
+
|
| 120 |
+
"""
|
| 121 |
+
|
| 122 |
+
async def gen() -> AsyncGenerator[str, None]:
|
| 123 |
+
"""Generate response chunks from OpenRouter.
|
| 124 |
+
|
| 125 |
+
Yields:
|
| 126 |
+
AsyncGenerator[str, None]: Response chunks.
|
| 127 |
+
|
| 128 |
+
"""
|
| 129 |
+
|
| 130 |
+
model_to_use = selected_model or config.primary_model
|
| 131 |
+
|
| 132 |
+
stream = await async_openrouter_client.chat.completions.create(
|
| 133 |
+
model=model_to_use,
|
| 134 |
+
messages=build_messages(prompt),
|
| 135 |
+
temperature=config.temperature,
|
| 136 |
+
max_completion_tokens=config.max_completion_tokens,
|
| 137 |
+
extra_body=build_openrouter_extra(config),
|
| 138 |
+
stream=True,
|
| 139 |
+
)
|
| 140 |
+
try:
|
| 141 |
+
first_chunk = await stream.__anext__()
|
| 142 |
+
model_used = getattr(first_chunk, "model", None)
|
| 143 |
+
if model_used:
|
| 144 |
+
yield f"__model_used__:{model_used}"
|
| 145 |
+
delta_text = getattr(first_chunk.choices[0].delta, "content", None)
|
| 146 |
+
if delta_text:
|
| 147 |
+
yield delta_text
|
| 148 |
+
except StopAsyncIteration:
|
| 149 |
+
return
|
| 150 |
+
|
| 151 |
+
last_finish_reason = None
|
| 152 |
+
async for chunk in stream:
|
| 153 |
+
delta_text = getattr(chunk.choices[0].delta, "content", None)
|
| 154 |
+
if delta_text:
|
| 155 |
+
yield delta_text
|
| 156 |
+
|
| 157 |
+
# Reasons: tool_calls, stop, length, content_filter, error
|
| 158 |
+
finish_reason = getattr(chunk.choices[0], "finish_reason", None)
|
| 159 |
+
|
| 160 |
+
if finish_reason:
|
| 161 |
+
last_finish_reason = finish_reason
|
| 162 |
+
|
| 163 |
+
logger.info(f"OpenRouter stream finished. Model used: {model_used}")
|
| 164 |
+
logger.warning(f"Final finish_reason: {last_finish_reason}")
|
| 165 |
+
|
| 166 |
+
# Yield a chunk to trigger truncation warning in UI
|
| 167 |
+
if last_finish_reason == "length":
|
| 168 |
+
yield "__truncated__"
|
| 169 |
+
|
| 170 |
+
return gen()
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
# ---------------------------------------
|
| 174 |
+
# Test Log Probs and Confidence Visualization
|
| 175 |
+
# ---------------------------------------
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
# import math
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
# def visualize_token_confidence(token_probs: list[tuple[str, float]]):
|
| 182 |
+
# """Print token probabilities as ASCII bars in the terminal."""
|
| 183 |
+
# for tok, prob in token_probs:
|
| 184 |
+
# bar_length = int(prob * 40) # scale bar to 40 chars max
|
| 185 |
+
# bar = "#" * bar_length
|
| 186 |
+
# print(f"{tok:>12}: [{bar:<40}] {prob:.2%}")
|
| 187 |
+
|
| 188 |
+
# async def generate_openrouter(
|
| 189 |
+
# prompt: str,
|
| 190 |
+
# config: ModelConfig,
|
| 191 |
+
# max_tokens: int | None = None) -> tuple[str, str | None, str | None]:
|
| 192 |
+
# """Generate a response from OpenRouter
|
| 193 |
+
# and log token-level statistics with confidence evolution."""
|
| 194 |
+
|
| 195 |
+
# resp = await async_openrouter_client.chat.completions.create(
|
| 196 |
+
# model=config.primary_model,
|
| 197 |
+
# messages=build_messages(prompt),
|
| 198 |
+
# temperature=config.temperature,
|
| 199 |
+
# max_completion_tokens=max_tokens or config.max_completion_tokens,
|
| 200 |
+
# extra_body={**build_openrouter_extra(config), "logprobs": True, "top_logprobs": 3},
|
| 201 |
+
# )
|
| 202 |
+
|
| 203 |
+
# choice = resp.choices[0]
|
| 204 |
+
# content = choice.message.content or ""
|
| 205 |
+
# finish_reason = getattr(choice, "native_finish_reason", None)
|
| 206 |
+
# model_used = getattr(choice, "model", None) or getattr(resp, "model", None)
|
| 207 |
+
|
| 208 |
+
# logger.info(f"OpenRouter non-stream finish_reason: {finish_reason}")
|
| 209 |
+
# if finish_reason == "length":
|
| 210 |
+
# logger.warning("Response was truncated by token limit.")
|
| 211 |
+
|
| 212 |
+
# # Extract logprobs
|
| 213 |
+
# token_logprobs_list = choice.logprobs
|
| 214 |
+
# tokens_logprobs = []
|
| 215 |
+
# token_probs = []
|
| 216 |
+
|
| 217 |
+
# if token_logprobs_list and hasattr(token_logprobs_list, "content"):
|
| 218 |
+
# for token_info in token_logprobs_list.content:
|
| 219 |
+
# tok = token_info.token
|
| 220 |
+
# logprob = token_info.logprob
|
| 221 |
+
# prob = math.exp(logprob)
|
| 222 |
+
|
| 223 |
+
# tokens_logprobs.append(logprob)
|
| 224 |
+
# token_probs.append((tok, prob))
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
# if tokens_logprobs:
|
| 228 |
+
# avg_logprob = sum(tokens_logprobs) / len(tokens_logprobs)
|
| 229 |
+
# avg_prob = math.exp(avg_logprob)
|
| 230 |
+
|
| 231 |
+
# most_confident = sorted(token_probs, key=lambda x: x[1], reverse=True)[:5]
|
| 232 |
+
# least_confident = sorted(token_probs, key=lambda x: x[1])[:5]
|
| 233 |
+
|
| 234 |
+
# logger.info(f"Temperature: {config.temperature}")
|
| 235 |
+
# logger.info(f"Max completion tokens: {config.max_completion_tokens}")
|
| 236 |
+
# logger.info(f"Average log probability: {avg_logprob:.4f} "
|
| 237 |
+
# f"(≈ {avg_prob:.2%} avg token prob)")"
|
| 238 |
+
|
| 239 |
+
# logger.info("Top 5 most confident tokens:")
|
| 240 |
+
# for tok, prob in most_confident:
|
| 241 |
+
# logger.info(f" '{tok}' → {prob:.2%}")
|
| 242 |
+
|
| 243 |
+
# logger.info("Top 5 least confident tokens:")
|
| 244 |
+
# for tok, prob in least_confident:
|
| 245 |
+
# logger.info(f" '{tok}' → {prob:.2%}")
|
| 246 |
+
|
| 247 |
+
# # Terminal visualization
|
| 248 |
+
# print("\nToken confidence evolution:")
|
| 249 |
+
# visualize_token_confidence(token_probs,)
|
| 250 |
+
|
| 251 |
+
# else:
|
| 252 |
+
# logger.warning("No logprob information found in response.")
|
| 253 |
+
|
| 254 |
+
# return content, model_used, finish_reason
|
src/api/services/providers/utils/__init__.py
ADDED
|
File without changes
|
src/api/services/providers/utils/evaluation_metrics.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from opik.evaluation import models
|
| 2 |
+
from opik.evaluation.metrics import GEval
|
| 3 |
+
|
| 4 |
+
from src.config import settings
|
| 5 |
+
from src.utils.logger_util import setup_logging
|
| 6 |
+
|
| 7 |
+
logger = setup_logging()
|
| 8 |
+
|
| 9 |
+
# -----------------------
|
| 10 |
+
# Evaluation helper
|
| 11 |
+
# -----------------------
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
async def evaluate_metrics(output: str, context: str) -> dict:
|
| 15 |
+
"""Evaluate multiple metrics for a given LLM output.
|
| 16 |
+
Metrics included: faithfulness, coherence, completeness.
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
output (str): The LLM-generated output to evaluate.
|
| 20 |
+
context (str): The context used to generate the output.
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
dict: A dictionary with metric names as keys and their evaluation results as values.
|
| 24 |
+
|
| 25 |
+
"""
|
| 26 |
+
settings.openai.api_key = None
|
| 27 |
+
logger.info(f"OpenAI key is not set: {settings.openai.api_key is None}")
|
| 28 |
+
|
| 29 |
+
if not output.strip():
|
| 30 |
+
logger.warning("Output is empty. Skipping evaluation.")
|
| 31 |
+
return {
|
| 32 |
+
"faithfulness": {"score": 0.0, "reason": "Empty output", "failed": True},
|
| 33 |
+
"coherence": {"score": 0.0, "reason": "Empty output", "failed": True},
|
| 34 |
+
"completeness": {"score": 0.0, "reason": "Empty output", "failed": True},
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
if not getattr(settings.openai, "api_key", None):
|
| 38 |
+
logger.info("OpenAI API key not set. Skipping metrics evaluation.")
|
| 39 |
+
return {
|
| 40 |
+
"faithfulness": {"score": None, "reason": "Skipped – no API key", "failed": True},
|
| 41 |
+
"coherence": {"score": None, "reason": "Skipped – no API key", "failed": True},
|
| 42 |
+
"completeness": {"score": None, "reason": "Skipped – no API key", "failed": True},
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
judge_model = models.LiteLLMChatModel(
|
| 46 |
+
model_name="gpt-4o", # gpt-4o, gpt-5-mini
|
| 47 |
+
api_key=settings.openai.api_key,
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
metric_configs = {
|
| 51 |
+
"faithfulness": (
|
| 52 |
+
(
|
| 53 |
+
"You are an expert judge tasked with evaluating whether an AI-generated answer is "
|
| 54 |
+
"faithful to the provided Substack excerpts."
|
| 55 |
+
),
|
| 56 |
+
(
|
| 57 |
+
"The OUTPUT must not introduce new information and beyond "
|
| 58 |
+
"what is contained in the CONTEXT. "
|
| 59 |
+
"All claims in the OUTPUT should be directly supported by the CONTEXT."
|
| 60 |
+
),
|
| 61 |
+
),
|
| 62 |
+
"coherence": (
|
| 63 |
+
(
|
| 64 |
+
"You are an expert judge tasked with evaluating whether an AI-generated answer is "
|
| 65 |
+
"logically coherent."
|
| 66 |
+
),
|
| 67 |
+
"The answer should be well-structured, readable, and maintain consistent reasoning.",
|
| 68 |
+
),
|
| 69 |
+
"completeness": (
|
| 70 |
+
(
|
| 71 |
+
"You are an expert judge tasked with evaluating whether an AI-generated answer "
|
| 72 |
+
"covers all relevant aspects of the query."
|
| 73 |
+
),
|
| 74 |
+
(
|
| 75 |
+
"The answer should include all major points from the CONTEXT "
|
| 76 |
+
"and address the user's "
|
| 77 |
+
"query "
|
| 78 |
+
"fully."
|
| 79 |
+
),
|
| 80 |
+
),
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
results = {}
|
| 84 |
+
for name, (task_intro, eval_criteria) in metric_configs.items():
|
| 85 |
+
try:
|
| 86 |
+
metric = GEval(
|
| 87 |
+
task_introduction=task_intro,
|
| 88 |
+
evaluation_criteria=eval_criteria,
|
| 89 |
+
model=judge_model,
|
| 90 |
+
name=f"G-Eval {name.capitalize()}",
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
eval_input = f"""
|
| 94 |
+
OUTPUT: {output}
|
| 95 |
+
CONTEXT: {context}
|
| 96 |
+
"""
|
| 97 |
+
|
| 98 |
+
score_result = await metric.ascore(eval_input)
|
| 99 |
+
|
| 100 |
+
results[name] = {
|
| 101 |
+
"score": score_result.value,
|
| 102 |
+
"reason": score_result.reason,
|
| 103 |
+
"failed": score_result.scoring_failed,
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
except Exception as e:
|
| 107 |
+
logger.warning(f"G-Eval {name} failed: {e}")
|
| 108 |
+
results[name] = {"score": 0.0, "reason": str(e), "failed": True}
|
| 109 |
+
|
| 110 |
+
return results
|
src/api/services/providers/utils/messages.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def build_messages(
|
| 5 |
+
prompt: str,
|
| 6 |
+
) -> list[ChatCompletionSystemMessageParam | ChatCompletionUserMessageParam]:
|
| 7 |
+
"""Build a list of messages for the OpenAI chat API.
|
| 8 |
+
|
| 9 |
+
Args:
|
| 10 |
+
prompt (str): The user prompt.
|
| 11 |
+
|
| 12 |
+
Returns:
|
| 13 |
+
list[ChatCompletionSystemMessageParam | ChatCompletionUserMessageParam]: A list of messages.
|
| 14 |
+
|
| 15 |
+
"""
|
| 16 |
+
return [
|
| 17 |
+
ChatCompletionUserMessageParam(role="user", content=prompt),
|
| 18 |
+
]
|
src/api/services/providers/utils/prompts.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import opik
|
| 2 |
+
|
| 3 |
+
from src.api.models.api_models import SearchResult
|
| 4 |
+
from src.api.models.provider_models import ModelConfig
|
| 5 |
+
|
| 6 |
+
config = ModelConfig()
|
| 7 |
+
|
| 8 |
+
PROMPT = """
|
| 9 |
+
You are a skilled research assistant specialized in analyzing Substack newsletters.
|
| 10 |
+
Respond to the user’s query using the provided context from these articles,
|
| 11 |
+
that is retrieved from a vector database without relying on outside knowledge or assumptions.
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
### Output Rules:
|
| 15 |
+
- Write a detailed, structured answer using **Markdown** (headings, bullet points,
|
| 16 |
+
short or long paragraphs as appropriate).
|
| 17 |
+
- Use up to **{tokens} tokens** without exceeding this limit.
|
| 18 |
+
- Only include facts from the provided context from the articles.
|
| 19 |
+
- Attribute each fact to the correct author(s) and source, and include **clickable links**.
|
| 20 |
+
- If the article author and feed author differ, mention both.
|
| 21 |
+
- There is no need to mention that you based your answer on the provided context.
|
| 22 |
+
- But if no relevant information exists, clearly state this and provide a fallback suggestion.
|
| 23 |
+
- At the very end, include a **funny quote** and wish the user a great day.
|
| 24 |
+
|
| 25 |
+
### Query:
|
| 26 |
+
{query}
|
| 27 |
+
|
| 28 |
+
### Context Articles:
|
| 29 |
+
{context_texts}
|
| 30 |
+
|
| 31 |
+
### Final Answer:
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# Create a new prompt
|
| 36 |
+
prompt = opik.Prompt(
|
| 37 |
+
name="substack_research_assistant", prompt=PROMPT, metadata={"environment": "development"}
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def build_research_prompt(
|
| 42 |
+
contexts: list[SearchResult],
|
| 43 |
+
query: str = "",
|
| 44 |
+
tokens: int = config.max_completion_tokens,
|
| 45 |
+
) -> str:
|
| 46 |
+
"""Construct a research-focused LLM prompt using the given query
|
| 47 |
+
and supporting context documents.
|
| 48 |
+
|
| 49 |
+
The prompt enforces Markdown formatting, citations, and strict length guidance.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
contexts (list[SearchResult]): List of context documents with metadata.
|
| 53 |
+
query (str): The user's research query.
|
| 54 |
+
tokens (int): Maximum number of tokens for the LLM response.
|
| 55 |
+
|
| 56 |
+
Returns:
|
| 57 |
+
str: The formatted prompt ready for LLM consumption.
|
| 58 |
+
|
| 59 |
+
"""
|
| 60 |
+
# Join all retrieved contexts into a readable format
|
| 61 |
+
context_texts = "\n\n".join(
|
| 62 |
+
(
|
| 63 |
+
f"- Feed Name: {r.feed_name}\n"
|
| 64 |
+
f" Article Title: {r.title}\n"
|
| 65 |
+
f" Article Author(s): {r.article_author}\n"
|
| 66 |
+
f" Feed Author: {r.feed_author}\n"
|
| 67 |
+
f" URL: {r.url}\n"
|
| 68 |
+
f" Snippet: {r.chunk_text}"
|
| 69 |
+
)
|
| 70 |
+
for r in contexts
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
return PROMPT.format(
|
| 74 |
+
query=query,
|
| 75 |
+
context_texts=context_texts,
|
| 76 |
+
tokens=tokens,
|
| 77 |
+
)
|
src/api/services/search_service.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import opik
|
| 2 |
+
from fastapi import Request
|
| 3 |
+
from qdrant_client.models import (
|
| 4 |
+
FieldCondition,
|
| 5 |
+
Filter,
|
| 6 |
+
Fusion,
|
| 7 |
+
FusionQuery,
|
| 8 |
+
MatchText,
|
| 9 |
+
MatchValue,
|
| 10 |
+
Prefetch,
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
from src.api.models.api_models import SearchResult
|
| 14 |
+
from src.infrastructure.qdrant.qdrant_vectorstore import AsyncQdrantVectorStore
|
| 15 |
+
from src.utils.logger_util import setup_logging
|
| 16 |
+
|
| 17 |
+
logger = setup_logging()
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@opik.track(name="query_with_filters")
|
| 21 |
+
async def query_with_filters(
|
| 22 |
+
request: Request,
|
| 23 |
+
query_text: str = "",
|
| 24 |
+
feed_author: str | None = None,
|
| 25 |
+
feed_name: str | None = None,
|
| 26 |
+
title_keywords: str | None = None,
|
| 27 |
+
limit: int = 5,
|
| 28 |
+
) -> list[SearchResult]:
|
| 29 |
+
"""Query the vector store with optional filters and return search results.
|
| 30 |
+
|
| 31 |
+
Performs a hybrid dense + sparse search on Qdrant and applies filters based
|
| 32 |
+
on feed author, feed name, and title keywords. Results are deduplicated by point ID.
|
| 33 |
+
|
| 34 |
+
Args:
|
| 35 |
+
request (Request): FastAPI request object containing the vector store in app.state.
|
| 36 |
+
query_text (str): Text query to search for.
|
| 37 |
+
feed_author (str | None): Optional filter for the feed author.
|
| 38 |
+
feed_name (str | None): Optional filter for the feed name.
|
| 39 |
+
title_keywords (str | None): Optional filter for title keywords.
|
| 40 |
+
limit (int): Maximum number of results to return.
|
| 41 |
+
|
| 42 |
+
Returns:
|
| 43 |
+
list[SearchResult]:
|
| 44 |
+
List of search results containing title, feed info, URL, chunk text, and score.
|
| 45 |
+
|
| 46 |
+
"""
|
| 47 |
+
vectorstore: AsyncQdrantVectorStore = request.app.state.vectorstore
|
| 48 |
+
dense_vector = vectorstore.dense_vectors([query_text])[0]
|
| 49 |
+
sparse_vector = vectorstore.sparse_vectors([query_text])[0]
|
| 50 |
+
|
| 51 |
+
# Build filter conditions
|
| 52 |
+
conditions: list[FieldCondition] = []
|
| 53 |
+
if feed_author:
|
| 54 |
+
conditions.append(FieldCondition(key="feed_author", match=MatchValue(value=feed_author)))
|
| 55 |
+
if feed_name:
|
| 56 |
+
conditions.append(FieldCondition(key="feed_name", match=MatchValue(value=feed_name)))
|
| 57 |
+
if title_keywords:
|
| 58 |
+
conditions.append(
|
| 59 |
+
FieldCondition(key="title", match=MatchText(text=title_keywords.strip().lower()))
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
query_filter = Filter(must=conditions) if conditions else None # type: ignore
|
| 63 |
+
|
| 64 |
+
fetch_limit = max(1, limit) * 100
|
| 65 |
+
logger.info(f"Fetching up to {fetch_limit} points for unique Ids.")
|
| 66 |
+
|
| 67 |
+
response = await vectorstore.client.query_points(
|
| 68 |
+
collection_name=vectorstore.collection_name,
|
| 69 |
+
query=FusionQuery(fusion=Fusion.RRF),
|
| 70 |
+
prefetch=[
|
| 71 |
+
Prefetch(query=dense_vector, using="Dense", limit=fetch_limit, filter=query_filter),
|
| 72 |
+
Prefetch(query=sparse_vector, using="Sparse", limit=fetch_limit, filter=query_filter),
|
| 73 |
+
],
|
| 74 |
+
query_filter=query_filter,
|
| 75 |
+
limit=fetch_limit,
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# Deduplicate by point ID
|
| 79 |
+
seen_ids: set[str] = set()
|
| 80 |
+
results: list[SearchResult] = []
|
| 81 |
+
for point in response.points:
|
| 82 |
+
if point.id in seen_ids:
|
| 83 |
+
continue
|
| 84 |
+
seen_ids.add(point.id) # type: ignore
|
| 85 |
+
payload = point.payload or {}
|
| 86 |
+
results.append(
|
| 87 |
+
SearchResult(
|
| 88 |
+
title=payload.get("title", ""),
|
| 89 |
+
feed_author=payload.get("feed_author"),
|
| 90 |
+
feed_name=payload.get("feed_name"),
|
| 91 |
+
article_author=payload.get("article_authors"),
|
| 92 |
+
url=payload.get("url"),
|
| 93 |
+
chunk_text=payload.get("chunk_text"),
|
| 94 |
+
score=point.score,
|
| 95 |
+
)
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
results = results[:limit]
|
| 99 |
+
logger.info(f"Returning {len(results)} results for matching query '{query_text}'")
|
| 100 |
+
return results
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
@opik.track(name="query_unique_titles")
|
| 104 |
+
async def query_unique_titles(
|
| 105 |
+
request: Request,
|
| 106 |
+
query_text: str,
|
| 107 |
+
feed_author: str | None = None,
|
| 108 |
+
feed_name: str | None = None,
|
| 109 |
+
title_keywords: str | None = None,
|
| 110 |
+
limit: int = 5,
|
| 111 |
+
) -> list[SearchResult]:
|
| 112 |
+
"""Query the vector store and return only unique titles.
|
| 113 |
+
|
| 114 |
+
Performs a hybrid dense + sparse search with optional filters and dynamically
|
| 115 |
+
increases the fetch limit to account for duplicates. Deduplicates results
|
| 116 |
+
by article title.
|
| 117 |
+
|
| 118 |
+
Args:
|
| 119 |
+
request (Request): FastAPI request object containing the vector store in app.state.
|
| 120 |
+
query_text (str): Text query to search for.
|
| 121 |
+
feed_author (str | None): Optional filter for the feed author.
|
| 122 |
+
feed_name (str | None): Optional filter for the feed name.
|
| 123 |
+
title_keywords (str | None): Optional filter for title keywords.
|
| 124 |
+
limit (int): Maximum number of unique results to return.
|
| 125 |
+
|
| 126 |
+
Returns:
|
| 127 |
+
list[SearchResult]:
|
| 128 |
+
List of unique search results containing title, feed info, URL, chunk text, and score.
|
| 129 |
+
|
| 130 |
+
"""
|
| 131 |
+
vectorstore: AsyncQdrantVectorStore = request.app.state.vectorstore
|
| 132 |
+
dense_vector = vectorstore.dense_vectors([query_text])[0]
|
| 133 |
+
sparse_vector = vectorstore.sparse_vectors([query_text])[0]
|
| 134 |
+
|
| 135 |
+
# Build filter conditions
|
| 136 |
+
conditions: list[FieldCondition] = []
|
| 137 |
+
if feed_author:
|
| 138 |
+
conditions.append(FieldCondition(key="feed_author", match=MatchValue(value=feed_author)))
|
| 139 |
+
if feed_name:
|
| 140 |
+
conditions.append(FieldCondition(key="feed_name", match=MatchValue(value=feed_name)))
|
| 141 |
+
if title_keywords:
|
| 142 |
+
conditions.append(
|
| 143 |
+
FieldCondition(key="title", match=MatchText(text=title_keywords.strip().lower()))
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
query_filter = Filter(must=conditions) if conditions else None # type: ignore
|
| 147 |
+
|
| 148 |
+
fetch_limit = max(1, limit) * 280
|
| 149 |
+
logger.info(f"Fetching up to {fetch_limit} points for unique titles.")
|
| 150 |
+
|
| 151 |
+
response = await vectorstore.client.query_points(
|
| 152 |
+
collection_name=vectorstore.collection_name,
|
| 153 |
+
query=FusionQuery(fusion=Fusion.RRF),
|
| 154 |
+
prefetch=[
|
| 155 |
+
Prefetch(query=dense_vector, using="Dense", limit=fetch_limit, filter=query_filter),
|
| 156 |
+
Prefetch(query=sparse_vector, using="Sparse", limit=fetch_limit, filter=query_filter),
|
| 157 |
+
],
|
| 158 |
+
query_filter=query_filter,
|
| 159 |
+
limit=fetch_limit,
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
# Deduplicate by title
|
| 163 |
+
seen_titles: set[str] = set()
|
| 164 |
+
results: list[SearchResult] = []
|
| 165 |
+
for point in response.points:
|
| 166 |
+
payload = point.payload or {}
|
| 167 |
+
title = payload.get("title")
|
| 168 |
+
if not title or title in seen_titles:
|
| 169 |
+
continue
|
| 170 |
+
seen_titles.add(title)
|
| 171 |
+
results.append(
|
| 172 |
+
SearchResult(
|
| 173 |
+
title=title,
|
| 174 |
+
feed_author=payload.get("feed_author"),
|
| 175 |
+
feed_name=payload.get("feed_name"),
|
| 176 |
+
article_author=payload.get("article_authors"),
|
| 177 |
+
url=payload.get("url"),
|
| 178 |
+
chunk_text=payload.get("chunk_text"),
|
| 179 |
+
score=point.score,
|
| 180 |
+
)
|
| 181 |
+
)
|
| 182 |
+
if len(results) >= limit:
|
| 183 |
+
break
|
| 184 |
+
|
| 185 |
+
logger.info(f"Returning {len(results)} unique title results for matching query '{query_text}'")
|
| 186 |
+
|
| 187 |
+
# logger.info(f"results: {results}")
|
| 188 |
+
return results
|
src/config.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from typing import ClassVar
|
| 3 |
+
|
| 4 |
+
import yaml
|
| 5 |
+
from pydantic import BaseModel, Field, SecretStr, model_validator
|
| 6 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 7 |
+
|
| 8 |
+
from src.models.article_models import FeedItem
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# -----------------------------
|
| 12 |
+
# Supabase database settings
|
| 13 |
+
# -----------------------------
|
| 14 |
+
class SupabaseDBSettings(BaseModel):
|
| 15 |
+
table_name: str = Field(default="substack_articles", description="Supabase table name")
|
| 16 |
+
host: str = Field(default="localhost", description="Database host")
|
| 17 |
+
name: str = Field(default="postgres", description="Database name")
|
| 18 |
+
user: str = Field(default="postgres", description="Database user")
|
| 19 |
+
password: SecretStr = Field(default=SecretStr("password"), description="Database password")
|
| 20 |
+
port: int = Field(default=6543, description="Database port")
|
| 21 |
+
test_database: str = Field(default="substack_test", description="Test database name")
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# -----------------------------
|
| 25 |
+
# RSS settings
|
| 26 |
+
# -----------------------------
|
| 27 |
+
class RSSSettings(BaseModel):
|
| 28 |
+
feeds: list[FeedItem] = Field(
|
| 29 |
+
default_factory=list[FeedItem], description="List of RSS feed items"
|
| 30 |
+
)
|
| 31 |
+
default_start_date: str = Field(default="2025-09-15", description="Default cutoff date")
|
| 32 |
+
batch_size: int = Field(
|
| 33 |
+
default=5, description="Number of articles to parse and ingest in a batch"
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# -----------------------------
|
| 38 |
+
# Qdrant settings
|
| 39 |
+
# -----------------------------
|
| 40 |
+
# BAAI/bge-large-en-v1.5 (1024), BAAI/bge-base-en-v1.5 (HF, 768). BAAI/bge-base-en (Fastembed, 768)
|
| 41 |
+
class QdrantSettings(BaseModel):
|
| 42 |
+
url: str = Field(default="", description="Qdrant API URL")
|
| 43 |
+
api_key: str = Field(default="", description="Qdrant API key")
|
| 44 |
+
timeout: int = Field(default=30, description="Qdrant client timeout")
|
| 45 |
+
collection_name: str = Field(
|
| 46 |
+
default="substack_collection", description="Qdrant collection name"
|
| 47 |
+
)
|
| 48 |
+
dense_model_name: str = Field(default="BAAI/bge-base-en", description="Dense model name")
|
| 49 |
+
sparse_model_name: str = Field(
|
| 50 |
+
default="Qdrant/bm25", description="Sparse model name"
|
| 51 |
+
) # prithivida/Splade_PP_en_v1 (larger)
|
| 52 |
+
vector_dim: int = Field(
|
| 53 |
+
default=768,
|
| 54 |
+
description="Vector dimension", # 768, 1024 with Jina or large HF
|
| 55 |
+
)
|
| 56 |
+
article_batch_size: int = Field(
|
| 57 |
+
default=5, description="Number of articles to parse and ingest in a batch"
|
| 58 |
+
)
|
| 59 |
+
sparse_batch_size: int = Field(default=32, description="Sparse batch size")
|
| 60 |
+
embed_batch_size: int = Field(default=50, description="Dense embedding batch")
|
| 61 |
+
upsert_batch_size: int = Field(default=25, description="Batch size for Qdrant upsert")
|
| 62 |
+
max_concurrent: int = Field(default=2, description="Maximum number of concurrent tasks")
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# -----------------------------
|
| 66 |
+
# Text splitting
|
| 67 |
+
# -----------------------------
|
| 68 |
+
class TextSplitterSettings(BaseModel):
|
| 69 |
+
chunk_size: int = Field(default=4000, description="Size of text chunks")
|
| 70 |
+
chunk_overlap: int = Field(default=200, description="Size of text chunks")
|
| 71 |
+
separators: list[str] = Field(
|
| 72 |
+
default_factory=lambda: [
|
| 73 |
+
"\n---\n",
|
| 74 |
+
"\n\n",
|
| 75 |
+
"\n```\n",
|
| 76 |
+
"\n## ",
|
| 77 |
+
"\n# ",
|
| 78 |
+
"\n**",
|
| 79 |
+
"\n",
|
| 80 |
+
". ",
|
| 81 |
+
"! ",
|
| 82 |
+
"? ",
|
| 83 |
+
" ",
|
| 84 |
+
"",
|
| 85 |
+
],
|
| 86 |
+
description="List of separators for text splitting. The order or separators matter",
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
# -----------------------------
|
| 91 |
+
# Jina Settings
|
| 92 |
+
# -----------------------------
|
| 93 |
+
class JinaSettings(BaseModel):
|
| 94 |
+
api_key: str = Field(default="", description="Jina API key")
|
| 95 |
+
url: str = Field(default="https://api.jina.ai/v1/embeddings", description="Jina API URL")
|
| 96 |
+
model: str = Field(default="jina-embeddings-v3", description="Jina model name") # 1024
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
# -----------------------------
|
| 100 |
+
# Hugging Face Settings
|
| 101 |
+
# -----------------------------
|
| 102 |
+
# BAAI/bge-large-en-v1.5 (1024), BAAI/bge-base-en-v1.5 (768)
|
| 103 |
+
class HuggingFaceSettings(BaseModel):
|
| 104 |
+
api_key: str = Field(default="", description="Hugging Face API key")
|
| 105 |
+
model: str = Field(default="BAAI/bge-base-en-v1.5", description="Hugging Face model name")
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
# -----------------------------
|
| 109 |
+
# Openai Settings
|
| 110 |
+
# -----------------------------
|
| 111 |
+
class OpenAISettings(BaseModel):
|
| 112 |
+
api_key: str | None = Field(default="", description="OpenAI API key")
|
| 113 |
+
# model: str = Field(default="gpt-4o-mini", description="OpenAI model name")
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
# -----------------------------
|
| 117 |
+
# OpenRouter Settings
|
| 118 |
+
# -----------------------------
|
| 119 |
+
class OpenRouterSettings(BaseModel):
|
| 120 |
+
api_key: str = Field(default="", description="OpenRouter API key")
|
| 121 |
+
api_url: str = Field(default="https://openrouter.ai/api/v1", description="OpenRouter API URL")
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
# -----------------------------
|
| 125 |
+
# Opik Observability Settings
|
| 126 |
+
# -----------------------------
|
| 127 |
+
class OpikObservabilitySettings(BaseModel):
|
| 128 |
+
api_key: str = Field(default="", description="Opik Observability API key")
|
| 129 |
+
project_name: str = Field(default="substack-pipeline", description="Opik project name")
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
# -----------------------------
|
| 133 |
+
# YAML loader
|
| 134 |
+
# -----------------------------
|
| 135 |
+
def load_yaml_feeds(path: str) -> list[FeedItem]:
|
| 136 |
+
"""
|
| 137 |
+
Load RSS feed items from a YAML file.
|
| 138 |
+
If the file does not exist or is empty, returns an empty list.
|
| 139 |
+
|
| 140 |
+
Args:
|
| 141 |
+
path (str): Path to the YAML file.
|
| 142 |
+
|
| 143 |
+
Returns:
|
| 144 |
+
list[FeedItem]: List of FeedItem instances loaded from the file.
|
| 145 |
+
"""
|
| 146 |
+
if not os.path.exists(path):
|
| 147 |
+
return []
|
| 148 |
+
with open(path, encoding="utf-8") as f:
|
| 149 |
+
data = yaml.safe_load(f)
|
| 150 |
+
feed_list = data.get("feeds", [])
|
| 151 |
+
return [FeedItem(**feed) for feed in feed_list]
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# -----------------------------
|
| 155 |
+
# Main Settings
|
| 156 |
+
# -----------------------------
|
| 157 |
+
class Settings(BaseSettings):
|
| 158 |
+
supabase_db: SupabaseDBSettings = Field(default_factory=SupabaseDBSettings)
|
| 159 |
+
qdrant: QdrantSettings = Field(default_factory=QdrantSettings)
|
| 160 |
+
rss: RSSSettings = Field(default_factory=RSSSettings)
|
| 161 |
+
text_splitter: TextSplitterSettings = Field(default_factory=TextSplitterSettings)
|
| 162 |
+
|
| 163 |
+
jina: JinaSettings = Field(default_factory=JinaSettings)
|
| 164 |
+
hugging_face: HuggingFaceSettings = Field(default_factory=HuggingFaceSettings)
|
| 165 |
+
openai: OpenAISettings = Field(default_factory=OpenAISettings)
|
| 166 |
+
openrouter: OpenRouterSettings = Field(default_factory=OpenRouterSettings)
|
| 167 |
+
opik: OpikObservabilitySettings = Field(default_factory=OpikObservabilitySettings)
|
| 168 |
+
|
| 169 |
+
rss_config_yaml_path: str = "src/configs/feeds_rss.yaml"
|
| 170 |
+
|
| 171 |
+
# Pydantic v2 model config
|
| 172 |
+
model_config: ClassVar[SettingsConfigDict] = SettingsConfigDict(
|
| 173 |
+
env_file=[".env"],
|
| 174 |
+
env_file_encoding="utf-8",
|
| 175 |
+
extra="ignore",
|
| 176 |
+
env_nested_delimiter="__",
|
| 177 |
+
case_sensitive=False,
|
| 178 |
+
frozen=True,
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
@model_validator(mode="after")
|
| 182 |
+
def load_yaml_rss_feeds(self) -> "Settings":
|
| 183 |
+
"""
|
| 184 |
+
Load RSS feeds from a YAML file after model initialization.
|
| 185 |
+
If the file does not exist or is empty, the feeds list remains unchanged.
|
| 186 |
+
|
| 187 |
+
Args:
|
| 188 |
+
self (Settings): The settings instance.
|
| 189 |
+
|
| 190 |
+
Returns:
|
| 191 |
+
Settings: The updated settings instance.
|
| 192 |
+
"""
|
| 193 |
+
yaml_feeds = load_yaml_feeds(self.rss_config_yaml_path)
|
| 194 |
+
if yaml_feeds:
|
| 195 |
+
self.rss.feeds = yaml_feeds
|
| 196 |
+
return self
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
# -----------------------------
|
| 200 |
+
# Instantiate settings
|
| 201 |
+
# -----------------------------
|
| 202 |
+
settings = Settings()
|
src/configs/feeds_rss.yaml
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
feeds:
|
| 2 |
+
- name: "AI Echoes"
|
| 3 |
+
author: "Benito Martin"
|
| 4 |
+
url: "https://aiechoes.substack.com/feed"
|
| 5 |
+
- name: "The Neural Maze"
|
| 6 |
+
author: "Miguel Otero"
|
| 7 |
+
url: "https://theneuralmaze.substack.com/feed"
|
| 8 |
+
- name: "Decoding ML"
|
| 9 |
+
author: "Paul Iusztin"
|
| 10 |
+
url: "https://decodingml.substack.com/feed"
|
| 11 |
+
- name: "Swirl AI Newsletter"
|
| 12 |
+
author: "Aurimas Griciūnas"
|
| 13 |
+
url: "https://www.newsletter.swirlai.com/feed"
|
| 14 |
+
- name: "Marvelous MLOps Substack"
|
| 15 |
+
author: "Başak Tuğçe Eskili and Maria Vechtomova"
|
| 16 |
+
url: "https://marvelousmlops.substack.com/feed"
|
| 17 |
+
- name: "Jam with AI"
|
| 18 |
+
author: "Shirin Khosravi Jam and Shantanu Ladhwe"
|
| 19 |
+
url: "https://jamwithai.substack.com/feed"
|
| 20 |
+
- name: "Hamel's Substack"
|
| 21 |
+
author: "Hamel Husain"
|
| 22 |
+
url: "https://hamelhusain.substack.com/feed"
|
| 23 |
+
- name: "Neural Bits"
|
| 24 |
+
author: "Alex Razvant"
|
| 25 |
+
url: "https://multimodalai.substack.com/feed"
|
| 26 |
+
- name: "DiamantAI"
|
| 27 |
+
author: "Nir Diamant"
|
| 28 |
+
url: "https://diamantai.substack.com/feed"
|
| 29 |
+
- name: "ByteByteGo Newsletter"
|
| 30 |
+
author: "Alex Xu"
|
| 31 |
+
url: "https://blog.bytebytego.com/feed"
|
| 32 |
+
- name: "Latent.Space"
|
| 33 |
+
author: "Latent.Space"
|
| 34 |
+
url: "https://www.latent.space/feed"
|
| 35 |
+
- name: "Adaline Labs"
|
| 36 |
+
author: "Adaline"
|
| 37 |
+
url: "https://labs.adaline.ai/feed"
|
| 38 |
+
- name: "Gradient Ascent"
|
| 39 |
+
author: "Sairam Sundaresan"
|
| 40 |
+
url: "https://newsletter.artofsaience.com/feed"
|
| 41 |
+
- name: "Daily Dose of Data Science"
|
| 42 |
+
author: "Avi Chawla"
|
| 43 |
+
url: "https://blog.dailydoseofds.com/feed"
|
| 44 |
+
- name: "Generative AI for Everyone"
|
| 45 |
+
author: "Hamza Farooq"
|
| 46 |
+
url: "https://boringbot.substack.com/feed"
|
| 47 |
+
- name: "Vizuara's AI Newsletter"
|
| 48 |
+
author: "Vizuara AI Labs"
|
| 49 |
+
url: "https://www.vizuaranewsletter.com/feed"
|
| 50 |
+
- name: "Deep (Learning) Focus"
|
| 51 |
+
author: "Cameron R. Wolfe, Ph.D."
|
| 52 |
+
url: "https://cameronrwolfe.substack.com/feed"
|
| 53 |
+
- name: "Language Models & Co."
|
| 54 |
+
author: "Jay Alammar"
|
| 55 |
+
url: "https://newsletter.languagemodels.co/feed"
|
| 56 |
+
- name: "Exploring Language Models"
|
| 57 |
+
author: "Maarten Grootendorst"
|
| 58 |
+
url: "https://newsletter.maartengrootendorst.com/feed"
|
| 59 |
+
- name: "Hyperplane"
|
| 60 |
+
author: "Cube Digital"
|
| 61 |
+
url: "https://thehyperplane.substack.com/feed"
|
| 62 |
+
- name: "ModelCraft"
|
| 63 |
+
author: "Abi Aryan"
|
| 64 |
+
url: "https://modelcraft.substack.com/feed"
|
| 65 |
+
- name: "NeoSage"
|
| 66 |
+
author: "Shivani Virdi"
|
| 67 |
+
url: "https://blog.neosage.io/feed"
|
| 68 |
+
- name: "Nnitiwe's AI Blog"
|
| 69 |
+
author: "Samuel Theophilus"
|
| 70 |
+
url: "https://blog.nnitiwe.io/feed"
|
| 71 |
+
- name: "The Palindrome"
|
| 72 |
+
author: "Tivadar Danka"
|
| 73 |
+
url: "https://thepalindrome.org/feed"
|
| 74 |
+
- name: "Python & Chill"
|
| 75 |
+
author: "Banias Baabe"
|
| 76 |
+
url: "https://pythonandchill.substack.com/feed"
|
| 77 |
+
- name: "Rami's Data Newsletter"
|
| 78 |
+
author: "Rami Krispin"
|
| 79 |
+
url: "https://ramikrispin.substack.com/feed"
|
| 80 |
+
- name: "To Data & Beyond"
|
| 81 |
+
author: "Youssef Hosni"
|
| 82 |
+
url: "https://youssefh.substack.com/feed"
|
| 83 |
+
- name: "Vanishing Gradients"
|
| 84 |
+
author: "Hugo Bowne-Anderson"
|
| 85 |
+
url: "https://hugobowne.substack.com/feed"
|
| 86 |
+
- name: "When Engineers meet AI"
|
| 87 |
+
author: "Kannan Kalidasan"
|
| 88 |
+
url: "https://engineersmeetai.substack.com/feed"
|
| 89 |
+
- name: "slys.dev"
|
| 90 |
+
author: "Anna & Jakub Slys"
|
| 91 |
+
url: "https://iam.slys.dev/feed"
|
src/infrastructure/__init__.py
ADDED
|
File without changes
|
src/infrastructure/qdrant/__init__.py
ADDED
|
File without changes
|
src/infrastructure/qdrant/create_collection.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
|
| 3 |
+
from src.infrastructure.qdrant.qdrant_vectorstore import AsyncQdrantVectorStore
|
| 4 |
+
from src.utils.logger_util import setup_logging
|
| 5 |
+
|
| 6 |
+
logger = setup_logging()
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
async def main() -> None:
|
| 10 |
+
"""Create a Qdrant collection asynchronously using AsyncQdrantVectorStore.
|
| 11 |
+
|
| 12 |
+
This function initializes an AsyncQdrantVectorStore instance and calls its
|
| 13 |
+
create_collection method to set up a Qdrant collection for vector storage.
|
| 14 |
+
Errors during collection creation are logged
|
| 15 |
+
and handled gracefully.
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
None
|
| 19 |
+
|
| 20 |
+
Returns:
|
| 21 |
+
None
|
| 22 |
+
|
| 23 |
+
Raises:
|
| 24 |
+
RuntimeError: If an error occurs during Qdrant collection creation.
|
| 25 |
+
Exception: For unexpected errors during execution.
|
| 26 |
+
|
| 27 |
+
"""
|
| 28 |
+
# Initialize the logger
|
| 29 |
+
logger.info("Creating Qdrant collection")
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
# Initialize the AsyncQdrantVectorStore instance
|
| 33 |
+
vectorstore = AsyncQdrantVectorStore()
|
| 34 |
+
# Create the Qdrant collection asynchronously
|
| 35 |
+
await vectorstore.create_collection()
|
| 36 |
+
logger.info("Qdrant collection created successfully")
|
| 37 |
+
|
| 38 |
+
except RuntimeError as e:
|
| 39 |
+
logger.error(f"Failed to create Qdrant collection: {e}")
|
| 40 |
+
raise RuntimeError("Error creating Qdrant collection") from e
|
| 41 |
+
except Exception as e:
|
| 42 |
+
logger.error(f"Unexpected error during Qdrant collection creation: {e}")
|
| 43 |
+
raise
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
if __name__ == "__main__":
|
| 47 |
+
asyncio.run(main())
|
src/infrastructure/qdrant/create_indexes.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
|
| 3 |
+
from src.infrastructure.qdrant.qdrant_vectorstore import AsyncQdrantVectorStore
|
| 4 |
+
from src.utils.logger_util import setup_logging
|
| 5 |
+
|
| 6 |
+
logger = setup_logging()
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
async def main() -> None:
|
| 10 |
+
"""Create necessary indexes for the Qdrant vector store.
|
| 11 |
+
|
| 12 |
+
Initializes an AsyncQdrantVectorStore and creates HNSW, title, article authors,
|
| 13 |
+
feed author, and feed name indexes. Logs errors and ensures proper execution.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
None
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
None
|
| 20 |
+
|
| 21 |
+
Raises:
|
| 22 |
+
RuntimeError: If an error occurs during index creation.
|
| 23 |
+
Exception: For unexpected errors during execution.
|
| 24 |
+
|
| 25 |
+
"""
|
| 26 |
+
logger.info("Creating Qdrant indexes")
|
| 27 |
+
try:
|
| 28 |
+
vectorstore = AsyncQdrantVectorStore()
|
| 29 |
+
await vectorstore.enable_hnsw()
|
| 30 |
+
await vectorstore.create_title_index()
|
| 31 |
+
await vectorstore.create_article_authors_index()
|
| 32 |
+
await vectorstore.create_feed_author_index()
|
| 33 |
+
await vectorstore.create_article_feed_name_index()
|
| 34 |
+
logger.info("Qdrant indexes created successfully")
|
| 35 |
+
except RuntimeError as e:
|
| 36 |
+
logger.error(f"Failed to create Qdrant indexes: {e}")
|
| 37 |
+
raise RuntimeError("Error creating Qdrant indexes") from e
|
| 38 |
+
except Exception as e:
|
| 39 |
+
logger.error(f"Unexpected error creating Qdrant indexes: {e}")
|
| 40 |
+
raise
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
if __name__ == "__main__":
|
| 44 |
+
asyncio.run(main())
|