IndraneelKumar commited on
Commit
266d7bc
·
0 Parent(s):

Initial search engine commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +64 -0
  2. .env.example +58 -0
  3. .github/workflows/cd.yml +32 -0
  4. .github/workflows/ci.yml +78 -0
  5. .gitignore +219 -0
  6. .pre-commit-config.yaml +67 -0
  7. .prefectignore +41 -0
  8. .python-version +1 -0
  9. .vscode/settings.json +7 -0
  10. Dockerfile +65 -0
  11. Makefile +200 -0
  12. README.md +74 -0
  13. cloudbuild_fastapi.yaml +12 -0
  14. deploy_fastapi.sh +105 -0
  15. frontend/__init__.py +0 -0
  16. frontend/app.py +560 -0
  17. prefect-cloud.yaml +52 -0
  18. prefect-local.yaml +53 -0
  19. pyproject.toml +174 -0
  20. requirements.txt +23 -0
  21. src/__init__.py +0 -0
  22. src/api/__init__.py +0 -0
  23. src/api/exceptions/__init__.py +0 -0
  24. src/api/exceptions/exception_handlers.py +97 -0
  25. src/api/main.py +142 -0
  26. src/api/middleware/__init__.py +0 -0
  27. src/api/middleware/logging_middleware.py +73 -0
  28. src/api/models/__init__.py +0 -0
  29. src/api/models/api_models.py +85 -0
  30. src/api/models/provider_models.py +77 -0
  31. src/api/routes/__init__.py +0 -0
  32. src/api/routes/health_routes.py +52 -0
  33. src/api/routes/search_routes.py +123 -0
  34. src/api/services/__init__.py +0 -0
  35. src/api/services/generation_service.py +137 -0
  36. src/api/services/providers/__init__.py +0 -0
  37. src/api/services/providers/huggingface_service.py +64 -0
  38. src/api/services/providers/openai_service.py +181 -0
  39. src/api/services/providers/openrouter_service.py +254 -0
  40. src/api/services/providers/utils/__init__.py +0 -0
  41. src/api/services/providers/utils/evaluation_metrics.py +110 -0
  42. src/api/services/providers/utils/messages.py +18 -0
  43. src/api/services/providers/utils/prompts.py +77 -0
  44. src/api/services/search_service.py +188 -0
  45. src/config.py +202 -0
  46. src/configs/feeds_rss.yaml +91 -0
  47. src/infrastructure/__init__.py +0 -0
  48. src/infrastructure/qdrant/__init__.py +0 -0
  49. src/infrastructure/qdrant/create_collection.py +47 -0
  50. src/infrastructure/qdrant/create_indexes.py +44 -0
.dockerignore ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Git
2
+ .git
3
+ .gitignore
4
+
5
+ # Python
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+ *.so
10
+ .Python
11
+ .pytest_cache/
12
+ .coverage
13
+ htmlcov/
14
+ .tox/
15
+ .ruff_cache/
16
+ .mypy_cache/
17
+
18
+ # Virtual environments
19
+ venv/
20
+ .venv/
21
+ env/
22
+ ENV/
23
+
24
+ # IDE files
25
+ .idea/
26
+ .vscode/
27
+ *.swp
28
+ *.swo
29
+
30
+ # Build directories
31
+ dist/
32
+ build/
33
+ *.egg-info/
34
+
35
+ # Docker
36
+ .dockerignore
37
+ docker-compose*.yml
38
+
39
+ # Logs
40
+ logs/
41
+ *.log
42
+
43
+ # Temporary files
44
+ .tmp/
45
+ tmp/
46
+
47
+ # Documentation
48
+ docs/
49
+ # README.md
50
+ CHANGELOG.md
51
+ LICENSE
52
+
53
+ # Test data
54
+ images/
55
+
56
+ # Project directories
57
+ tests/
58
+ src/pipelines/
59
+ src/infrastructure/supabase/
60
+ # uv.lock
61
+ pre-commit-config.yaml
62
+ # pyproject.toml
63
+ .python-version
64
+ MEMORY.md
.env.example ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SUPABASE_DB__TABLE_NAME=substack_articles
2
+ SUPABASE_DB__HOST=your_supabase_db_host_here
3
+ SUPABASE_DB__NAME=postgres
4
+ SUPABASE_DB__USER=your_supabase_db_user_here
5
+ SUPABASE_DB__PASSWORD=your_supabase_db_password_here
6
+ SUPABASE_DB__PORT=6543
7
+
8
+ # RSS
9
+ RSS__DEFAULT_START_DATE=2025-07-01
10
+ RSS__BATCH_SIZE=30
11
+
12
+ # Qdrant configurationbatch
13
+ QDRANT__API_KEY=your_qdrant_api_key_here
14
+ QDRANT__URL=your_qdrant_url_here
15
+ QDRANT__COLLECTION_NAME=substack_collection
16
+ QDRANT__DENSE_MODEL_NAME=BAAI/bge-base-en-v1.5 # BAAI/bge-large-en-v1.5 (1024), BAAI/bge-base-en-v1.5 (HF, 768). BAAI/bge-base-en (Fastembed, 768)
17
+ QDRANT__SPARSE_MODEL_NAME=Qdrant/bm25 # prithivida/Splade_PP_en_v1, Qdrant/bm25
18
+ QDRANT__VECTOR_DIM=768 # 768, 1024
19
+ QDRANT__ARTICLE_BATCH_SIZE=5
20
+ QDRANT__SPARSE_BATCH_SIZE=32
21
+ QDRANT__EMBED_BATCH_SIZE=50 # 50
22
+ QDRANT__UPSERT_BATCH_SIZE=100 # 50
23
+ QDRANT__MAX_CONCURRENT=3
24
+
25
+ # Text splitting
26
+ TS__CHUNK_SIZE=4000
27
+ TS__CHUNK_OVERLAP=200
28
+
29
+ # PREFECT
30
+ PREFECT__API_KEY=your_prefect_api_key_here
31
+ PREFECT__WORKSPACE=your_prefect_workspace_here
32
+ PREFECT__API_URL=your_prefect_api_url_here
33
+
34
+ # JINA
35
+ JINA__API_KEY=your_jina_api_key_here
36
+ JINA__URL=https://api.jina.ai/v1/embeddings
37
+ JINA__MODEL=jina-embeddings-v3
38
+
39
+ # HUGGING FACE
40
+ HUGGING_FACE__API_KEY=your_hugging_face_api_key_here
41
+ HUGGING_FACE__MODEL=BAAI/bge-base-en-v1.5
42
+
43
+ # OPENAI
44
+ OPENAI__API_KEY=your_openai_api_key_here
45
+
46
+ # OPENROUTER
47
+ OPENROUTER__API_KEY=your_openrouter_api_key_here
48
+ OPENROUTER__API_URL=https://openrouter.ai/api/v1
49
+
50
+ # OPIK OBSERVABILITY
51
+ OPIK__API_KEY=your_opik_api_key_here
52
+ OPIK__PROJECT_NAME=substack-pipeline
53
+
54
+ # FastAPI Endpoint
55
+ BACKEND_URL=your_fastapi_backend_url_here
56
+
57
+ # Default (8501)
58
+ ALLOWED_ORIGINS=your_allowed_origins_here_as_comma_separated_values
.github/workflows/cd.yml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CD
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ push:
6
+ branches:
7
+ # - main
8
+ - develop
9
+
10
+ jobs:
11
+ deploy:
12
+ runs-on: ubuntu-latest
13
+
14
+ steps:
15
+ - name: Checkout code
16
+ uses: actions/checkout@v4
17
+
18
+ - name: Set up Python
19
+ uses: actions/setup-python@v5
20
+ with:
21
+ python-version-file: .python-version
22
+
23
+ - name: Run Prefect Deploy
24
+ uses: PrefectHQ/actions-prefect-deploy@v4
25
+ with:
26
+ all-deployments: "true" # deploy all deployments in prefect.yaml
27
+ requirements-file-paths: ./requirements.txt
28
+ deployment-file-path: ./prefect-cloud.yaml
29
+ env:
30
+ PREFECT_API_KEY: ${{ secrets.PREFECT__API_KEY }}
31
+ PREFECT_WORKSPACE: ${{ secrets.PREFECT__WORKSPACE }}
32
+ PREFECT_API_URL: ${{ secrets.PREFECT__API_URL }}
.github/workflows/ci.yml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI
2
+
3
+ on:
4
+ workflow_dispatch:
5
+
6
+ push:
7
+
8
+ branches:
9
+ # - main
10
+ - develop
11
+
12
+ jobs:
13
+ lint-and-test:
14
+ runs-on: ubuntu-latest
15
+
16
+ env:
17
+ # Supabase secrets
18
+ SUPABASE_DB__TABLE_NAME: ${{ secrets.SUPABASE_DB__TABLE_NAME }}
19
+ SUPABASE_DB__HOST: ${{ secrets.SUPABASE_DB__HOST }}
20
+ SUPABASE_DB__NAME: ${{ secrets.SUPABASE_DB__NAME }}
21
+ SUPABASE_DB__USER: ${{ secrets.SUPABASE_DB__USER }}
22
+ SUPABASE_DB__PASSWORD: ${{ secrets.SUPABASE_DB__PASSWORD }}
23
+ SUPABASE_DB__PORT: ${{ secrets.SUPABASE_DB__PORT }}
24
+
25
+ # Qdrant secrets
26
+ QDRANT__API_KEY: ${{ secrets.QDRANT__API_KEY }}
27
+ QDRANT__URL: ${{ secrets.QDRANT__URL }}
28
+ QDRANT__COLLECTION_NAME: ${{ secrets.QDRANT__COLLECTION_NAME }}
29
+
30
+ # OpenRouter secrets
31
+ OPENROUTER__API_KEY: ${{ secrets.OPENROUTER__API_KEY }}
32
+ OPENROUTER__API_URL: ${{ secrets.OPENROUTER__API_URL }}
33
+
34
+ # OPIK secrets
35
+ OPIK__API_KEY: ${{ secrets.OPIK__API_KEY }}
36
+ OPIK__PROJECT_NAME: ${{ secrets.OPIK__PROJECT_NAME }}
37
+
38
+ # FastAPI secrets
39
+ ALLOWED_ORIGINS: ${{ secrets.ALLOWED_ORIGINS }}
40
+
41
+ steps:
42
+ - name: Checkout code
43
+ uses: actions/checkout@v4
44
+
45
+ - name: Install uv
46
+ uses: astral-sh/setup-uv@v5
47
+
48
+ - name: Set up Python
49
+ uses: actions/setup-python@v5
50
+ with:
51
+ python-version-file: .python-version
52
+
53
+ - name: Install dependencies
54
+ run: uv sync --all-groups
55
+
56
+ - name: Run pre-commit hooks
57
+ run: |
58
+ source .venv/bin/activate
59
+ pre-commit install
60
+ pre-commit run --all-files
61
+
62
+ # 🔹 Debug step: check that DB env vars are set
63
+ - name: Check DB environment variables
64
+ run: |
65
+ for var in SUPABASE_DB__HOST SUPABASE_DB__NAME SUPABASE_DB__USER SUPABASE_DB__PORT SUPABASE_DB__TABLE_NAME \
66
+ QDRANT__API_KEY QDRANT__URL QDRANT__COLLECTION_NAME \
67
+ OPENROUTER__API_KEY OPENROUTER__API_URL \
68
+ ALLOWED_ORIGINS; do
69
+ if [ -z "${!var}" ]; then
70
+ echo "ERROR: $var is empty!"
71
+ exit 1
72
+ else
73
+ echo "$var is set"
74
+ fi
75
+ done
76
+
77
+ - name: Run tests
78
+ run: uv run pytest
.gitignore ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Abstra
171
+ # Abstra is an AI-powered process automation framework.
172
+ # Ignore directories containing user credentials, local state, and settings.
173
+ # Learn more at https://abstra.io/docs
174
+ .abstra/
175
+
176
+ # Visual Studio Code
177
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
178
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
179
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
180
+ # you could uncomment the following to ignore the enitre vscode folder
181
+ # .vscode/
182
+
183
+ # Ruff stuff:
184
+ .ruff_cache/
185
+
186
+ # PyPI configuration file
187
+ .pypirc
188
+
189
+ # Cursor
190
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
191
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
192
+ # refer to https://docs.cursor.com/context/ignore-files
193
+ .cursorignore
194
+ .cursorindexingignore
195
+
196
+ # FILES
197
+ MEMORY.md
198
+ DOCKER.md
199
+ INSTRUCTIONS.md
200
+ create_prefect_secrets.py
201
+ OLD_README.md
202
+ delete_deployment.sh
203
+ update_deploy_fastapi.sh
204
+ src/infrastructure/qdrant/query_scroll.py
205
+ src/infrastructure/qdrant/query_search.py
206
+ src/pipelines/flows/rss_ingestion_flow_old.py
207
+ src/pipelines/tasks/fetch_rss_old.py
208
+ src/pipelines/tasks/parse_articles_new.py
209
+ src/pipelines/tasks/batch_parse_ingest_articles.py
210
+ experiments/
211
+ src/configs/all_feeds.yaml
212
+ src/pipelines/flows/backfilling_archive_flow.py
213
+ src/pipelines/tasks/fetch_archive.py
214
+ src/pipelines/tasks/ingest_archive.py
215
+ src/pipelines/tasks/parse_archive.py
216
+ src/configs/feeds_archive.yaml
217
+ deploy_gradio.sh
218
+ frontend/Dockerfile
219
+ frontend/requirements.txt
.pre-commit-config.yaml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/astral-sh/uv-pre-commit
3
+ # uv version.
4
+ rev: 0.8.17
5
+ hooks:
6
+ # Update the uv lockfile
7
+ - id: uv-lock
8
+
9
+ - repo: https://github.com/pre-commit/pre-commit-hooks
10
+ rev: v6.0.0
11
+ hooks:
12
+ - id: check-added-large-files
13
+ args: ['--maxkb=20000']
14
+ - id: check-toml
15
+ - id: check-yaml
16
+ args: [--allow-multiple-documents]
17
+ - id: end-of-file-fixer
18
+ - id: trailing-whitespace
19
+ - id: check-json
20
+ - id: detect-private-key
21
+
22
+ - repo: https://github.com/pre-commit/mirrors-mypy
23
+ rev: v1.18.1
24
+ hooks:
25
+ - id: mypy
26
+ additional_dependencies:
27
+ - types-pyyaml>=6.0.12.20250822
28
+ - types-requests>=2.32.4.20250809
29
+ - types-python-dateutil>=2.9.0.20250822
30
+ - types-markdown>=3.9.0.20250906
31
+ args: ["--config-file=pyproject.toml"]
32
+
33
+ - repo: https://github.com/astral-sh/ruff-pre-commit
34
+ rev: v0.13.0
35
+ hooks:
36
+ - id: ruff-check
37
+ args:
38
+ [
39
+ --fix,
40
+ --exit-non-zero-on-fix,
41
+ --show-fixes
42
+ ]
43
+ - id: ruff-format
44
+
45
+ - repo: https://github.com/hukkin/mdformat
46
+ rev: 0.7.22
47
+ hooks:
48
+ - id: mdformat
49
+ additional_dependencies:
50
+ - mdformat-gfm
51
+ exclude: ^team_data/
52
+
53
+
54
+ - repo: https://github.com/gitleaks/gitleaks
55
+ rev: v8.28.0
56
+ hooks:
57
+ - id: gitleaks
58
+
59
+
60
+ # - repo: local
61
+ # hooks:
62
+ # - id: pytest
63
+ # name: pytest
64
+ # entry: pytest
65
+ # language: system
66
+ # types: [python]
67
+ # pass_filenames: false
.prefectignore ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # prefect artifacts
2
+ .prefectignore
3
+
4
+ # python artifacts
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+ *.egg-info/
9
+ *.egg
10
+
11
+ # Type checking artifacts
12
+ .mypy_cache/
13
+ .dmypy.json
14
+ dmypy.json
15
+ .pyre/
16
+
17
+ # IPython
18
+ profile_default/
19
+ ipython_config.py
20
+ *.ipynb_checkpoints/*
21
+
22
+ # Environments
23
+ .python-version
24
+ .env
25
+ .venv
26
+ env/
27
+ venv/
28
+
29
+ # MacOS
30
+ .DS_Store
31
+
32
+ # Dask
33
+ dask-worker-space/
34
+
35
+ # Editors
36
+ .idea/
37
+ .vscode/
38
+
39
+ # VCS
40
+ .git/
41
+ .hg/
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12
.vscode/settings.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "terminal.integrated.defaultProfile.linux": "zsh",
3
+ "terminal.integrated.defaultProfile.windows": "",
4
+ "cSpell.words": [
5
+ "fastapi"
6
+ ]
7
+ }
Dockerfile ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---------- Build Stage ----------
2
+ FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
3
+
4
+ WORKDIR /app
5
+
6
+ # System deps required for building some Python wheels (e.g., madoka)
7
+ RUN apt-get update && apt-get install -y --no-install-recommends \
8
+ build-essential g++ \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ # Configure UV for optimal performance
12
+ ENV UV_COMPILE_BYTECODE=1
13
+ ENV UV_LINK_MODE=copy
14
+ ENV UV_PYTHON_DOWNLOADS=never
15
+
16
+ # Copy dependency files and sync dependencies
17
+ RUN --mount=type=cache,target=/root/.cache/uv \
18
+ --mount=type=bind,source=uv.lock,target=uv.lock \
19
+ --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
20
+ uv sync --locked --no-install-project --no-dev
21
+
22
+ # Copy source code selectively
23
+ COPY src/api ./src/api
24
+ COPY src/config.py ./src/config.py
25
+ COPY src/infrastructure/qdrant ./src/infrastructure/qdrant
26
+ COPY src/models ./src/models
27
+ COPY src/utils ./src/utils
28
+
29
+ # Also copy README.md, pyproject.toml and uv.lock for the final sync
30
+ COPY pyproject.toml uv.lock README.md ./
31
+
32
+ # Install project dependencies into virtualenv
33
+ RUN --mount=type=cache,target=/root/.cache/uv \
34
+ uv sync --locked --no-dev
35
+
36
+
37
+ # ---------- Runtime Stage ----------
38
+ FROM python:3.12-slim-bookworm
39
+
40
+ # Copy built application and virtualenv from builder
41
+ COPY --from=builder /app /app
42
+
43
+ # Install runtime tools used by HEALTHCHECK
44
+ RUN apt-get update && apt-get install -y --no-install-recommends \
45
+ curl \
46
+ && rm -rf /var/lib/apt/lists/*
47
+
48
+ # Set Python path and environment variables
49
+ ENV PATH="/app/.venv/bin:$PATH"
50
+ ENV PYTHONPATH=/app
51
+ ENV HF_HOME=/tmp/huggingface
52
+ ENV FASTEMBED_CACHE=/tmp/fastembed_cache
53
+ ENV PORT=8080
54
+
55
+ # Create cache directories
56
+ RUN mkdir -p $HF_HOME $FASTEMBED_CACHE && chmod -R 755 $HF_HOME $FASTEMBED_CACHE
57
+
58
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
59
+ CMD curl -f http://localhost:$PORT/health || exit 1
60
+
61
+ # Expose Cloud Run port
62
+ EXPOSE $PORT
63
+
64
+ # Run FastAPI with uvicorn
65
+ CMD ["uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "8080", "--workers", "1", "--loop", "uvloop"]
Makefile ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Makefile
2
+
3
+ # Check if .env exists
4
+ ifeq (,$(wildcard .env))
5
+ $(error .env file is missing at .env. Please create one based on .env.example)
6
+ endif
7
+
8
+ # Load environment variables from .env
9
+ include .env
10
+
11
+ .PHONY: tests mypy clean help ruff-check ruff-check-fix ruff-format ruff-format-fix all-check all-fix
12
+
13
+ #################################################################################
14
+ ## Supabase Commands
15
+ #################################################################################
16
+
17
+ supabase-create: ## Create Supabase database
18
+ @echo "Creating Supabase database..."
19
+ uv run python src/infrastructure/supabase/create_db.py
20
+
21
+ supabase-delete: ## Delete Supabase database
22
+ @echo "Deleting Supabase database..."
23
+ uv run python src/infrastructure/supabase/delete_db.py
24
+
25
+ #################################################################################
26
+ ## Qdrant Commands
27
+ #################################################################################
28
+
29
+ qdrant-create-collection: ## Create Qdrant collection
30
+ @echo "Creating Qdrant collection..."
31
+ uv run python src/infrastructure/qdrant/create_collection.py
32
+
33
+ qdrant-delete-collection: ## Delete Qdrant collection
34
+ @echo "Deleting Qdrant collection..."
35
+ uv run python src/infrastructure/qdrant/delete_collection.py
36
+
37
+ qdrant-create-index: ## Create Qdrant index
38
+ @echo "Updating HNSW and creating Qdrant indexes..."
39
+ uv run python src/infrastructure/qdrant/create_indexes.py
40
+
41
+ qdrant-ingest-from-sql: ## Ingest data from SQL to Qdrant
42
+ @echo "Ingesting data from SQL to Qdrant..."
43
+ uv run python src/infrastructure/qdrant/ingest_from_sql.py
44
+ @echo "Data ingestion complete."
45
+
46
+ #################################################################################
47
+ ## Prefect Flow Commands
48
+ #################################################################################
49
+
50
+ ingest-rss-articles-flow: ## Ingest RSS articles flow
51
+ @echo "Running ingest RSS articles flow..."
52
+ uv run python src/pipelines/flows/rss_ingestion_flow.py
53
+ @echo "Ingest RSS articles flow completed."
54
+
55
+ ingest-embeddings-flow: ## Ingest embeddings flow
56
+ @echo "Running ingest embeddings flow..."
57
+ $(if $(FROM_DATE), \
58
+ uv run python src/pipelines/flows/embeddings_ingestion_flow.py --from-date $(FROM_DATE), \
59
+ uv run python src/pipelines/flows/embeddings_ingestion_flow.py)
60
+ @echo "Ingest embeddings flow completed."
61
+
62
+ #################################################################################
63
+ ## Prefect Deployment Commands
64
+ #################################################################################
65
+ deploy-cloud-flows: ## Deploy Prefect flows to Prefect Cloud
66
+ @echo "Deploying Prefect flows to Prefect Cloud..."
67
+ prefect deploy --prefect-file prefect-cloud.yaml
68
+ @echo "Prefect Cloud deployment complete."
69
+
70
+ deploy-local-flows: ## Deploy Prefect flows to Prefect Local Server
71
+ @echo "Deploying Prefect flows to Prefect Local Server..."
72
+ prefect deploy --prefect-file prefect-local.yaml
73
+ @echo "Prefect Local deployment complete."
74
+
75
+ #################################################################################
76
+ ## Recreate Commands
77
+ #################################################################################
78
+
79
+ recreate-supabase: supabase-delete supabase-create ## Recreate Supabase resources
80
+
81
+ recreate-qdrant: qdrant-delete-collection qdrant-create-collection ## Recreate Qdrant resources
82
+
83
+ recreate-all: supabase-delete qdrant-delete-collection supabase-create qdrant-create-collection ## Recreate Qdrant and Supabase resources
84
+
85
+ #################################################################################
86
+ ## FastAPI Commands
87
+ #################################################################################
88
+
89
+ run-api: ## Run FastAPI application
90
+ @echo "Starting FastAPI application..."
91
+ uv run src/api/main.py
92
+ @echo "FastAPI application stopped."
93
+
94
+ #################################################################################
95
+ ## Gradio Commands
96
+ #################################################################################
97
+
98
+ run-gradio: ## Run Gradio application
99
+ @echo "Starting Gradio application..."
100
+ uv run frontend/app.py
101
+ @echo "Gradio application stopped."
102
+
103
+ #################################################################################
104
+ ## Testing Commands
105
+ #################################################################################
106
+
107
+ unit-tests: ## Run all unit tests
108
+ @echo "Running all unit tests..."
109
+ uv run pytest tests/unit
110
+ @echo "All unit tests completed."
111
+
112
+ integration-tests: ## Run all integration tests
113
+ @echo "Running all integration tests..."
114
+ uv run pytest tests/integration
115
+ @echo "All integration tests completed."
116
+
117
+ all-tests: ## Run all tests
118
+ @echo "Running all tests..."
119
+ uv run pytest
120
+ @echo "All tests completed."
121
+
122
+ ################################################################################
123
+ ## Pre-commit Commands
124
+ ################################################################################
125
+
126
+ pre-commit-run: ## Run pre-commit hooks
127
+ @echo "Running pre-commit hooks..."
128
+ pre-commit run --all-files
129
+ @echo "Pre-commit checks complete."
130
+
131
+ ################################################################################
132
+ ## Linting
133
+ ################################################################################
134
+
135
+ # Linting (just checks)
136
+ ruff-check: ## Check code lint violations (--diff to show possible changes)
137
+ @echo "Checking Ruff formatting..."
138
+ uv run ruff check .
139
+ @echo "Ruff lint checks complete."
140
+
141
+ ruff-check-fix: ## Auto-format code using Ruff
142
+ @echo "Formatting code with Ruff..."
143
+ uv run ruff check . --fix --exit-non-zero-on-fix
144
+ @echo "Formatting complete."
145
+
146
+ ################################################################################
147
+ ## Formatting
148
+ ################################################################################
149
+
150
+ # Formatting (just checks)
151
+ ruff-format: ## Check code format violations (--diff to show possible changes)
152
+ @echo "Checking Ruff formatting..."
153
+ uv run ruff format . --check
154
+ @echo "Ruff format checks complete."
155
+
156
+ ruff-format-fix: ## Auto-format code using Ruff
157
+ @echo "Formatting code with Ruff..."
158
+ uv run ruff format .
159
+ @echo "Formatting complete."
160
+
161
+ #################################################################################
162
+ ## Static Type Checking
163
+ #################################################################################
164
+
165
+ mypy: ## Run MyPy static type checker
166
+ @echo "Running MyPy static type checker..."
167
+ uv run mypy
168
+ @echo "MyPy static type checker complete."
169
+
170
+ ################################################################################
171
+ ## Cleanup
172
+ ################################################################################
173
+
174
+ clean: ## Clean up cached generated files
175
+ @echo "Cleaning up generated files..."
176
+ find . -type d -name "__pycache__" -exec rm -rf {} +
177
+ find . -type d -name ".pytest_cache" -exec rm -rf {} +
178
+ find . -type d -name ".ruff_cache" -exec rm -rf {} +
179
+ find . -type d -name ".mypy_cache" -exec rm -rf {} +
180
+ find . -type f -name "*.pyc" -delete
181
+ @echo "Cleanup complete."
182
+
183
+ ################################################################################
184
+ ## Composite Commands
185
+ ################################################################################
186
+
187
+ all-check: ruff-format ruff-check clean ## Run all: linting, formatting and type checking
188
+
189
+ all-fix: ruff-format-fix ruff-check-fix mypy clean ## Run all fix: auto-formatting and linting fixes
190
+
191
+ ################################################################################
192
+ ## Help
193
+ ################################################################################
194
+
195
+ help: ## Display this help message
196
+ @echo "Default target: $(.DEFAULT_GOAL)"
197
+ @echo "Available targets:"
198
+ @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
199
+
200
+ .DEFAULT_GOAL := help
README.md ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Articles Search Engine
2
+
3
+ A compact, production-style RAG pipeline. It ingests Substack RSS articles, stores them in Postgres (Supabase), creates dense/sparse embeddings in Qdrant, and exposes search and answer endpoints via FastAPI with a simple Gradio UI.
4
+
5
+ ## How it works (brief)
6
+ - Ingest RSS → Supabase:
7
+ - Prefect flow (`src/pipelines/flows/rss_ingestion_flow.py`) reads feeds from `src/configs/feeds_rss.yaml`, parses articles, and writes them to Postgres using SQLAlchemy models.
8
+ - Embed + index in Qdrant:
9
+ - Content is chunked, embedded (e.g., BAAI bge models), and upserted to a Qdrant collection with payload indexes for filtering and hybrid search.
10
+ - Collection and indexes are created via utilities in `src/infrastructure/qdrant/`.
11
+ - Search + generate:
12
+ - FastAPI (`src/api/main.py`) exposes search endpoints (keyword, semantic, hybrid) and assembles answers with citations.
13
+ - LLM providers are pluggable with fallback (OpenRouter, OpenAI, Hugging Face).
14
+ - UI + deploy:
15
+ - Gradio app for quick local search (`frontend/app.py`).
16
+ - Containerization with Docker and optional deploy to Google Cloud Run.
17
+
18
+ ## Tech stack
19
+ - Python 3.12, FastAPI, Prefect, SQLAlchemy
20
+ - Supabase (Postgres) for articles
21
+ - Qdrant for vector search (dense + sparse/hybrid)
22
+ - OpenRouter / OpenAI / Hugging Face for LLM completion
23
+ - Gradio UI, Docker, Google Cloud Run
24
+ - Config via Pydantic Settings, `uv` or `pip` for deps
25
+
26
+ ## Run locally (minimal)
27
+ 1) Configure environment (either `.env` or shell). Key variables (Pydantic nested with `__`):
28
+ - Supabase: `SUPABASE_DB__HOST`, `SUPABASE_DB__PORT`, `SUPABASE_DB__NAME`, `SUPABASE_DB__USER`, `SUPABASE_DB__PASSWORD`
29
+ - Qdrant: `QDRANT__URL`, `QDRANT__API_KEY`
30
+ - LLM (choose one): `OPENROUTER__API_KEY` or `OPENAI__API_KEY` or `HUGGING_FACE__API_KEY`
31
+ - Optional CORS: `ALLOWED_ORIGINS`
32
+
33
+ 2) Install dependencies:
34
+ ```bash
35
+ # with uv
36
+ uv venv && source .venv/bin/activate
37
+ uv pip install -r requirements.txt
38
+
39
+ # or with pip
40
+ python -m venv .venv && source .venv/bin/activate
41
+ pip install -r requirements.txt
42
+ ```
43
+
44
+ 3) Initialize storage:
45
+ ```bash
46
+ python src/infrastructure/supabase/create_db.py
47
+ python src/infrastructure/qdrant/create_collection.py
48
+ python src/infrastructure/qdrant/create_indexes.py
49
+ ```
50
+
51
+ 4) Ingest and embed:
52
+ ```bash
53
+ python src/pipelines/flows/rss_ingestion_flow.py
54
+ python src/pipelines/flows/embeddings_ingestion_flow.py
55
+ ```
56
+
57
+ 5) Start services:
58
+ ```bash
59
+ # REST API
60
+ uvicorn src.api.main:app --reload
61
+
62
+ # Gradio UI (optional)
63
+ python frontend/app.py
64
+ ```
65
+
66
+ ## Project structure (high-level)
67
+ - `src/api/` — FastAPI app, routes, middleware, exceptions
68
+ - `src/infrastructure/supabase/` — DB init and sessions
69
+ - `src/infrastructure/qdrant/` — Vector store and collection utilities
70
+ - `src/pipelines/` — Prefect flows and tasks for ingestion/embeddings
71
+ - `src/models/` — SQL and vector models
72
+ - `frontend/` — Gradio UI
73
+ - `configs/` — RSS feeds config
74
+
cloudbuild_fastapi.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ steps:
2
+ - name: 'gcr.io/cloud-builders/docker'
3
+ entrypoint: 'bash'
4
+ args:
5
+ - '-c'
6
+ - |
7
+ export DOCKER_BUILDKIT=1
8
+ docker build -t gcr.io/${PROJECT_ID}/${_SERVICE_NAME} -f Dockerfile .
9
+ substitutions:
10
+ _SERVICE_NAME: "substack-pipeline-fastapi"
11
+ images:
12
+ - "gcr.io/${PROJECT_ID}/${_SERVICE_NAME}"
deploy_fastapi.sh ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # -----------------------
3
+ # FastAPI Backend Deployment to Cloud Run
4
+ # -----------------------
5
+
6
+ # Exit immediately if a command exits with a non-zero status
7
+ set -e
8
+
9
+ #-----------------------
10
+ # Load environment variables
11
+ #-----------------------
12
+
13
+ if [ ! -f .env ]; then
14
+ echo "❌ .env file not found!"
15
+ exit 1
16
+ fi
17
+
18
+ # Load environment variables from .env file
19
+ set -o allexport
20
+ source .env
21
+ set +o allexport
22
+
23
+ echo "✅ Environment variables loaded."
24
+
25
+ # -----------------------
26
+ # Configuration
27
+ # -----------------------
28
+ PROJECT_ID="personal-projects-477710"
29
+ SERVICE_NAME="substack-pipeline-fastapi"
30
+ REGION="asia-south2" #europe-west1 "europe-west6"
31
+ IMAGE_NAME="gcr.io/$PROJECT_ID/$SERVICE_NAME"
32
+
33
+ # -----------------------
34
+ # Set project
35
+ # -----------------------
36
+ echo "🔧 Setting GCP project to $PROJECT_ID..."
37
+ gcloud config set project "$PROJECT_ID"
38
+
39
+
40
+ # -----------------------
41
+ # Enable required APIs
42
+ # -----------------------
43
+ echo "🔧 Enabling required GCP services..."
44
+ gcloud services enable \
45
+ cloudbuild.googleapis.com \
46
+ run.googleapis.com \
47
+ containerregistry.googleapis.com
48
+
49
+ # -----------------------
50
+ # Build and push Docker image
51
+ # -----------------------
52
+ echo "🐳 Building and pushing Docker image..."
53
+ gcloud builds submit --config cloudbuild_fastapi.yaml \
54
+ --substitutions=_SERVICE_NAME=$SERVICE_NAME
55
+
56
+ # -----------------------
57
+ # Deploy to Cloud Run
58
+ # -----------------------
59
+ echo "🚀 Deploying $SERVICE_NAME to Cloud Run..."
60
+ gcloud run deploy "$SERVICE_NAME" \
61
+ --image "$IMAGE_NAME" \
62
+ --platform managed \
63
+ --region "$REGION" \
64
+ --allow-unauthenticated \
65
+ --memory 2.5Gi \
66
+ --cpu 1 \
67
+ --timeout 180 \
68
+ --concurrency 2 \
69
+ --min-instances 0 \
70
+ --max-instances 2 \
71
+ --execution-environment gen2 \
72
+ --cpu-boost \
73
+ --set-env-vars HF_HOME=/tmp/huggingface \
74
+ --set-env-vars HUGGING_FACE__API_KEY=$HUGGING_FACE__API_KEY \
75
+ --set-env-vars QDRANT__API_KEY=$QDRANT__API_KEY \
76
+ --set-env-vars QDRANT__URL=$QDRANT__URL \
77
+ --set-env-vars QDRANT__COLLECTION_NAME=$QDRANT__COLLECTION_NAME \
78
+ --set-env-vars QDRANT__DENSE_MODEL_NAME=$QDRANT__DENSE_MODEL_NAME \
79
+ --set-env-vars QDRANT__SPARSE_MODEL_NAME=$QDRANT__SPARSE_MODEL_NAME \
80
+ --set-env-vars OPENROUTER__API_KEY=$OPENROUTER__API_KEY \
81
+ --set-env-vars OPIK__API_KEY=$OPIK__API_KEY \
82
+ --set-env-vars OPIK__PROJECT_NAME=$OPIK__PROJECT_NAME \
83
+ --set-env-vars "^@^ALLOWED_ORIGINS=$ALLOWED_ORIGINS@" \
84
+
85
+ # Log the allowed origins
86
+ echo "✅ Allowed origins set to: $ALLOWED_ORIGINS"
87
+
88
+ # -----------------------
89
+ # Capture the deployed service URL and update BACKEND_URL
90
+ #-----------------------
91
+ SERVICE_URL=$(gcloud run services describe $SERVICE_NAME --region=$REGION --format='value(status.url)')
92
+ echo "Deployment complete!"
93
+ echo "Service URL: $SERVICE_URL"
94
+
95
+
96
+
97
+ # # -----------------------
98
+ # # Update BACKEND_URL dynamically
99
+ # # -----------------------
100
+ # echo "🔄 Updating BACKEND_URL to $SERVICE_URL..."
101
+ # gcloud run services update "$SERVICE_NAME" \
102
+ # --region "$REGION" \
103
+ # --update-env-vars BACKEND_URL="$SERVICE_URL"
104
+
105
+ # echo "✅ BACKEND_URL updated successfully."
frontend/__init__.py ADDED
File without changes
frontend/app.py ADDED
@@ -0,0 +1,560 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+ import markdown
5
+ import requests
6
+ import yaml
7
+ from dotenv import load_dotenv
8
+
9
+ try:
10
+ from src.api.models.provider_models import MODEL_REGISTRY
11
+ except ImportError as e:
12
+ raise ImportError(
13
+ "Could not import MODEL_REGISTRY from src.api.models.provider_models. "
14
+ "Check the path and file existence."
15
+ ) from e
16
+
17
+ # Initialize environment variables
18
+ load_dotenv()
19
+
20
+ BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:8080")
21
+ API_BASE_URL = f"{BACKEND_URL}/search"
22
+
23
+
24
+ # Load feeds from YAML
25
+ def load_feeds():
26
+ """Load feeds from the YAML configuration file.
27
+ Returns:
28
+ list: List of feeds with their details.
29
+ """
30
+ feeds_path = os.path.join(os.path.dirname(__file__), "../src/configs/feeds_rss.yaml")
31
+ with open(feeds_path) as f:
32
+ feeds_yaml = yaml.safe_load(f)
33
+ return feeds_yaml.get("feeds", [])
34
+
35
+
36
+ feeds = load_feeds()
37
+ feed_names = [f["name"] for f in feeds]
38
+ feed_authors = [f["author"] for f in feeds]
39
+
40
+
41
+ # -----------------------
42
+ # API helpers
43
+ # -----------------------
44
+ def fetch_unique_titles(payload):
45
+ """
46
+ Fetch unique article titles based on the search criteria.
47
+
48
+ Args:
49
+ payload (dict): The search criteria including query_text, feed_author,
50
+ feed_name, limit, and optional title_keywords.
51
+ Returns:
52
+ list: A list of articles matching the criteria.
53
+ Raises:
54
+ Exception: If the API request fails.
55
+ """
56
+ try:
57
+ resp = requests.post(f"{API_BASE_URL}/unique-titles", json=payload)
58
+ resp.raise_for_status()
59
+ return resp.json().get("results", [])
60
+ except Exception as e:
61
+ raise Exception(f"Failed to fetch titles: {str(e)}") from e
62
+
63
+
64
+ def call_ai(payload, streaming=True):
65
+ """ "
66
+ Call the AI endpoint with the given payload.
67
+ Args:
68
+ payload (dict): The payload to send to the AI endpoint.
69
+ streaming (bool): Whether to use streaming or non-streaming endpoint.
70
+ Yields:
71
+ tuple: A tuple containing the type of response and the response text.
72
+ """
73
+ endpoint = f"{API_BASE_URL}/ask/stream" if streaming else f"{API_BASE_URL}/ask"
74
+ answer_text = ""
75
+ try:
76
+ if streaming:
77
+ with requests.post(endpoint, json=payload, stream=True) as r:
78
+ r.raise_for_status()
79
+ for chunk in r.iter_content(chunk_size=None, decode_unicode=True):
80
+ if not chunk:
81
+ continue
82
+ if chunk.startswith("__model_used__:"):
83
+ yield "model", chunk.replace("__model_used__:", "").strip()
84
+ elif chunk.startswith("__error__"):
85
+ yield "error", "Request failed. Please try again later."
86
+ break
87
+ elif chunk.startswith("__truncated__"):
88
+ yield "truncated", "AI response truncated due to token limit."
89
+ else:
90
+ answer_text += chunk
91
+ yield "text", answer_text
92
+ else:
93
+ resp = requests.post(endpoint, json=payload)
94
+ resp.raise_for_status()
95
+ data = resp.json()
96
+ answer_text = data.get("answer", "")
97
+ yield "text", answer_text
98
+ if data.get("finish_reason") == "length":
99
+ yield "truncated", "AI response truncated due to token limit."
100
+ except Exception as e:
101
+ yield "error", f"Request failed: {str(e)}"
102
+
103
+
104
+ def get_models_for_provider(provider):
105
+ """
106
+ Get available models for a provider
107
+
108
+ Args:
109
+ provider (str): The name of the provider (e.g., "openrouter", "openai")
110
+ Returns:
111
+ list: List of model names available for the provider
112
+ """
113
+ provider_key = provider.lower()
114
+ try:
115
+ config = MODEL_REGISTRY.get_config(provider_key)
116
+ return (
117
+ ["Automatic Model Selection (Model Routing)"]
118
+ + ([config.primary_model] if config.primary_model else [])
119
+ + list(config.candidate_models)
120
+ )
121
+ except Exception:
122
+ return ["Automatic Model Selection (Model Routing)"]
123
+
124
+
125
+ # -----------------------
126
+ # Gradio interface functions
127
+ # -----------------------
128
+ def handle_search_articles(query_text, feed_name, feed_author, title_keywords, limit):
129
+ """
130
+ Handle article search
131
+
132
+ Args:
133
+ query_text (str): The text to search for in article titles.
134
+ feed_name (str): The name of the feed to filter articles by.
135
+ feed_author (str): The author of the feed to filter articles by.
136
+ title_keywords (str): Keywords to search for in article titles.
137
+ limit (int): The maximum number of articles to return.
138
+ Returns:
139
+ str: HTML formatted string of search results or error message.
140
+ Raises:
141
+ Exception: If the API request fails.
142
+ """
143
+ if not query_text.strip():
144
+ return "Please enter a query text."
145
+
146
+ payload = {
147
+ "query_text": query_text.strip().lower(),
148
+ "feed_author": feed_author.strip() if feed_author else "",
149
+ "feed_name": feed_name.strip() if feed_name else "",
150
+ "limit": limit,
151
+ "title_keywords": title_keywords.strip().lower() if title_keywords else None,
152
+ }
153
+
154
+ try:
155
+ results = fetch_unique_titles(payload)
156
+ if not results:
157
+ return "No results found."
158
+
159
+ html_output = ""
160
+ for item in results:
161
+ html_output += (
162
+ f"<div style='background-color:#F0F8FF; padding:20px; "
163
+ f"border-radius:10px; font-size:18px; margin-bottom:15px;'>\n"
164
+ f" <h2 style='font-size:22px; color:#1f4e79; margin-top:0;'>"
165
+ f"{item.get('title', 'No title')}</h2>\n"
166
+ f" <p style='margin:5px 0;'>"
167
+ f"<b>Newsletter:</b> {item.get('feed_name', 'N/A')}"
168
+ f"</p>\n"
169
+ f" <p style='margin:5px 0;'>"
170
+ f"<b>Author:</b> {item.get('feed_author', 'N/A')}"
171
+ f"</p>\n"
172
+ f" <p style='margin:5px 0;'><b>Article Authors:</b> "
173
+ f"{', '.join(item.get('article_author') or ['N/A'])}</p>\n"
174
+ f" <p style='margin:5px 0;'><b>URL:</b> "
175
+ f"<a href='{item.get('url', '#')}' target='_blank' style='color:#0066cc;'>"
176
+ f"{item.get('url', 'No URL')}</a></p>\n"
177
+ f"</div>\n"
178
+ )
179
+ return html_output
180
+
181
+ except Exception as e:
182
+ return f"<div style='color:red; padding:10px;'>Error: {str(e)}</div>"
183
+
184
+
185
+ def handle_ai_question_streaming(
186
+ query_text,
187
+ feed_name,
188
+ feed_author,
189
+ limit,
190
+ provider,
191
+ model,
192
+ ):
193
+ """
194
+ Handle AI question with streaming
195
+
196
+ Args:
197
+ query_text (str): The question to ask the AI.
198
+ feed_name (str): The name of the feed to filter articles by.
199
+ feed_author (str): The author of the feed to filter articles by.
200
+ limit (int): The maximum number of articles to consider.
201
+ provider (str): The LLM provider to use.
202
+ model (str): The specific model to use from the provider.
203
+ Yields:
204
+ tuple: (HTML formatted answer string, model info string)
205
+ """
206
+ if not query_text.strip():
207
+ yield "Please enter a query text.", ""
208
+ return
209
+
210
+ if not provider or not model:
211
+ yield "Please select provider and model.", ""
212
+ return
213
+
214
+ payload = {
215
+ "query_text": query_text.strip().lower(),
216
+ "feed_author": feed_author.strip() if feed_author else "",
217
+ "feed_name": feed_name.strip() if feed_name else "",
218
+ "limit": limit,
219
+ "provider": provider.lower(),
220
+ }
221
+
222
+ if model != "Automatic Model Selection (Model Routing)":
223
+ payload["model"] = model
224
+
225
+ try:
226
+ answer_html = ""
227
+ model_info = f"Provider: {provider}"
228
+
229
+ for _, (event_type, content) in enumerate(call_ai(payload, streaming=True)):
230
+ if event_type == "text":
231
+ # Convert markdown to HTML
232
+ html_content = markdown.markdown(content, extensions=["tables"])
233
+ answer_html = (
234
+ f"\n"
235
+ f"<div style='background-color:#E8F0FE; "
236
+ f"padding:15px; border-radius:10px; font-size:16px;'>\n"
237
+ f" {html_content}\n"
238
+ f"</div>\n"
239
+ )
240
+ yield answer_html, model_info
241
+
242
+ elif event_type == "model":
243
+ model_info = f"Provider: {provider} | Model: {content}"
244
+ yield answer_html, model_info
245
+
246
+ elif event_type == "truncated":
247
+ answer_html += (
248
+ f"<div style='color:#ff6600; padding:10px; font-weight:bold;'>⚠️ {content}</div>"
249
+ )
250
+ yield answer_html, model_info
251
+
252
+ elif event_type == "error":
253
+ error_html = (
254
+ f"<div style='color:red; padding:10px; font-weight:bold;'>❌ {content}</div>"
255
+ )
256
+ yield error_html, model_info
257
+ break
258
+
259
+ except Exception as e:
260
+ error_html = f"<div style='color:red; padding:10px;'>Error: {str(e)}</div>"
261
+ yield error_html, model_info
262
+
263
+
264
+ def handle_ai_question_non_streaming(query_text, feed_name, feed_author, limit, provider, model):
265
+ """
266
+ Handle AI question without streaming
267
+
268
+ Args:
269
+ query_text (str): The question to ask the AI.
270
+ feed_name (str): The name of the feed to filter articles by.
271
+ feed_author (str): The author of the feed to filter articles by.
272
+ limit (int): The maximum number of articles to consider.
273
+ provider (str): The LLM provider to use.
274
+ model (str): The specific model to use from the provider.
275
+
276
+ Returns:
277
+ tuple: (HTML formatted answer string, model info string)
278
+ """
279
+ if not query_text.strip():
280
+ return "Please enter a query text.", ""
281
+
282
+ if not provider or not model:
283
+ return "Please select provider and model.", ""
284
+
285
+ payload = {
286
+ "query_text": query_text.strip().lower(),
287
+ "feed_author": feed_author.strip() if feed_author else "",
288
+ "feed_name": feed_name.strip() if feed_name else "",
289
+ "limit": limit,
290
+ "provider": provider.lower(),
291
+ }
292
+
293
+ if model != "Automatic Model Selection (Model Routing)":
294
+ payload["model"] = model
295
+
296
+ try:
297
+ answer_html = ""
298
+ model_info = f"Provider: {provider}"
299
+
300
+ for event_type, content in call_ai(payload, streaming=False):
301
+ if event_type == "text":
302
+ html_content = markdown.markdown(content, extensions=["tables"])
303
+ answer_html = (
304
+ "<div style='background-color:#E8F0FE; "
305
+ "padding:15px; border-radius:10px; font-size:16px;'>\n"
306
+ f"{html_content}\n"
307
+ "</div>\n"
308
+ )
309
+ elif event_type == "model":
310
+ model_info = f"Provider: {provider} | Model: {content}"
311
+ elif event_type == "truncated":
312
+ answer_html += (
313
+ f"<div style='color:#ff6600; padding:10px; font-weight:bold;'>⚠️ {content}</div>"
314
+ )
315
+ elif event_type == "error":
316
+ return (
317
+ f"<div style='color:red; padding:10px; font-weight:bold;'>❌ {content}</div>",
318
+ model_info,
319
+ )
320
+
321
+ return answer_html, model_info
322
+
323
+ except Exception as e:
324
+ return (
325
+ f"<div style='color:red; padding:10px;'>Error: {str(e)}</div>",
326
+ f"Provider: {provider}",
327
+ )
328
+
329
+
330
+ def update_model_choices(provider):
331
+ """
332
+ Update model choices based on selected provider
333
+ Args:
334
+ provider (str): The selected LLM provider
335
+ Returns:
336
+ gr.Dropdown: Updated model dropdown component
337
+ """
338
+ models = get_models_for_provider(provider)
339
+ return gr.Dropdown(choices=models, value=models[0] if models else "")
340
+
341
+
342
+ # -----------------------
343
+ # Gradio UI
344
+ # -----------------------
345
+ with gr.Blocks(title="Substack Articles LLM Engine", theme=gr.themes.Soft()) as demo:
346
+ # Header
347
+ gr.HTML(
348
+ "<div style='background-color:#ff6719; padding:20px; border-radius:12px; "
349
+ "text-align:center; margin-bottom:20px;'>\n"
350
+ " <h1 style='color:white; font-size:42px; font-family:serif; margin:0;'>\n"
351
+ " 📰 Substack Articles LLM Engine\n"
352
+ " </h1>\n"
353
+ "</div>\n"
354
+ )
355
+
356
+ with gr.Row():
357
+ with gr.Column(scale=1):
358
+ # Search Mode Selection
359
+ gr.Markdown("## 🔍 Select Search Mode")
360
+ search_type = gr.Radio(
361
+ choices=["Search Articles", "Ask the AI"],
362
+ value="Search Articles",
363
+ label="Search Mode",
364
+ info="Choose between searching for articles or asking AI questions",
365
+ )
366
+
367
+ # Common filters
368
+ gr.Markdown("### Filters")
369
+ query_text = gr.Textbox(label="Query", placeholder="Type your query here...", lines=3)
370
+ feed_author = gr.Dropdown(
371
+ choices=[""] + feed_authors, label="Author (optional)", value=""
372
+ )
373
+ feed_name = gr.Dropdown(
374
+ choices=[""] + feed_names, label="Newsletter (optional)", value=""
375
+ )
376
+
377
+ # Conditional fields based on search type
378
+ title_keywords = gr.Textbox(
379
+ label="Title Keywords (optional)",
380
+ placeholder="Filter by words in the title",
381
+ visible=True,
382
+ )
383
+
384
+ limit = gr.Slider(
385
+ minimum=1, maximum=20, step=1, label="Number of results", value=5, visible=True
386
+ )
387
+
388
+ # LLM Options (only visible for AI mode)
389
+ with gr.Group(visible=False) as llm_options:
390
+ gr.Markdown("### ⚙️ LLM Options")
391
+ provider = gr.Dropdown(
392
+ choices=["OpenRouter", "HuggingFace", "OpenAI"],
393
+ label="Select LLM Provider",
394
+ value="OpenRouter",
395
+ )
396
+ model = gr.Dropdown(
397
+ choices=get_models_for_provider("OpenRouter"),
398
+ label="Select Model",
399
+ value="Automatic Model Selection (Model Routing)",
400
+ )
401
+ streaming_mode = gr.Radio(
402
+ choices=["Streaming", "Non-Streaming"],
403
+ value="Streaming",
404
+ label="Answer Mode",
405
+ info="Streaming shows results as they're generated",
406
+ )
407
+
408
+ # Submit button
409
+ submit_btn = gr.Button("🔎 Search / Ask AI", variant="primary", size="lg")
410
+
411
+ with gr.Column(scale=2):
412
+ # Output area
413
+ output_html = gr.HTML(label="Results")
414
+ model_info = gr.HTML(visible=False)
415
+
416
+ # Event handlers
417
+ def toggle_visibility(search_type):
418
+ """
419
+ Toggle visibility of components based on search type
420
+
421
+ Args:
422
+ search_type (str): The selected search type
423
+ Returns:
424
+ tuple: Visibility states for (llm_options, title_keywords, model_info)
425
+ """
426
+
427
+ show_title_keywords = search_type == "Search Articles"
428
+ show_llm_options = search_type == "Ask the AI"
429
+ show_model_info = search_type == "Ask the AI"
430
+ show_limit_slider = search_type == "Search Articles"
431
+
432
+ return (
433
+ gr.Group(visible=show_llm_options), # llm_options
434
+ gr.Textbox(visible=show_title_keywords), # title_keywords
435
+ gr.HTML(visible=show_model_info), # model_info
436
+ gr.Slider(visible=show_limit_slider), # limit
437
+ )
438
+
439
+ search_type.change(
440
+ fn=toggle_visibility,
441
+ inputs=[search_type],
442
+ outputs=[llm_options, title_keywords, model_info, limit],
443
+ )
444
+
445
+ # Update model dropdown when provider changes
446
+ provider.change(fn=update_model_choices, inputs=[provider], outputs=[model])
447
+
448
+ # Unified submission handler
449
+ def handle_submission(
450
+ search_type,
451
+ streaming_mode,
452
+ query_text,
453
+ feed_name,
454
+ feed_author,
455
+ title_keywords,
456
+ limit,
457
+ provider,
458
+ model,
459
+ ):
460
+ """
461
+ Handle submission based on search type and streaming mode
462
+ Args:
463
+ search_type (str): The selected search type
464
+ streaming_mode (str): The selected streaming mode
465
+ query_text (str): The query text
466
+ feed_name (str): The selected feed name
467
+ feed_author (str): The selected feed author
468
+ title_keywords (str): The title keywords (if applicable)
469
+ limit (int): The number of results to return
470
+ provider (str): The selected LLM provider (if applicable)
471
+ model (str): The selected model (if applicable)
472
+ Returns:
473
+ tuple: (HTML formatted answer string, model info string)
474
+ """
475
+ if search_type == "Search Articles":
476
+ result = handle_search_articles(
477
+ query_text, feed_name, feed_author, title_keywords, limit
478
+ )
479
+ return result, "" # Always return two values
480
+ else: # Ask the AI
481
+ if streaming_mode == "Non-Streaming":
482
+ return handle_ai_question_non_streaming(
483
+ query_text, feed_name, feed_author, limit, provider, model
484
+ )
485
+ else:
486
+ # For streaming, we'll use a separate handler
487
+ return "", ""
488
+
489
+ # Streaming handler
490
+ def handle_streaming_submission(
491
+ search_type,
492
+ streaming_mode,
493
+ query_text,
494
+ feed_name,
495
+ feed_author,
496
+ title_keywords,
497
+ limit,
498
+ provider,
499
+ model,
500
+ ):
501
+ """
502
+ Handle submission with streaming support
503
+ Args:
504
+ search_type (str): The selected search type
505
+ streaming_mode (str): The selected streaming mode
506
+ query_text (str): The query text
507
+ feed_name (str): The selected feed name
508
+ feed_author (str): The selected feed author
509
+ title_keywords (str): The title keywords (if applicable)
510
+ limit (int): The number of results to return
511
+ provider (str): The selected LLM provider (if applicable)
512
+ model (str): The selected model (if applicable)
513
+ Yields:
514
+ tuple: (HTML formatted answer string, model info string)
515
+ """
516
+ if search_type == "Ask the AI" and streaming_mode == "Streaming":
517
+ yield from handle_ai_question_streaming(
518
+ query_text, feed_name, feed_author, limit, provider, model
519
+ )
520
+ else:
521
+ # For non-streaming cases, just return the regular result
522
+ if search_type == "Search Articles":
523
+ result = handle_search_articles(
524
+ query_text, feed_name, feed_author, title_keywords, limit
525
+ )
526
+ yield result, ""
527
+ else:
528
+ result_html, model_info_text = handle_ai_question_non_streaming(
529
+ query_text, feed_name, feed_author, limit, provider, model
530
+ )
531
+ yield result_html, model_info_text
532
+
533
+ # Single click handler that routes based on mode
534
+ submit_btn.click(
535
+ fn=handle_streaming_submission,
536
+ inputs=[
537
+ search_type,
538
+ streaming_mode,
539
+ query_text,
540
+ feed_name,
541
+ feed_author,
542
+ title_keywords,
543
+ limit,
544
+ provider,
545
+ model,
546
+ ],
547
+ outputs=[output_html, model_info],
548
+ show_progress=True,
549
+ )
550
+
551
+ # For local testing
552
+ if __name__ == "__main__":
553
+ demo.launch()
554
+
555
+ # # For Google Cloud Run deployment
556
+ # if __name__ == "__main__":
557
+ # demo.launch(
558
+ # server_name="0.0.0.0",
559
+ # server_port=int(os.environ.get("PORT", 8080))
560
+ # )
prefect-cloud.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pull:
2
+ - prefect.deployments.steps.git_clone:
3
+ id: clone-step
4
+ repository: https://github.com/Indraneel99/substack-newsletters-search-course
5
+ credentials: "{{ prefect.blocks.github-credentials.my-gh-creds }}"
6
+
7
+ - prefect.deployments.steps.run_shell_script:
8
+ id: install-build-tools
9
+ script: |
10
+ apt-get update -y
11
+ apt-get install -y --no-install-recommends build-essential g++
12
+
13
+ - prefect.deployments.steps.pip_install_requirements:
14
+ directory: "{{ clone-step.directory }}"
15
+ requirements_file: requirements.txt
16
+ stream_output: true
17
+
18
+ deployments:
19
+ - name: rss-ingest
20
+ entrypoint: src/pipelines/flows/rss_ingestion_flow.py:rss_ingest_flow
21
+ work_pool:
22
+ name: default-work-pool
23
+ job_variables:
24
+ env:
25
+ SUPABASE_DB__TABLE_NAME: "{{ prefect.blocks.secret.supabase-db--table-name }}"
26
+ SUPABASE_DB__HOST: "{{ prefect.blocks.secret.supabase-db--host }}"
27
+ SUPABASE_DB__NAME: "{{ prefect.blocks.secret.supabase-db--name }}"
28
+ SUPABASE_DB__USER: "{{ prefect.blocks.secret.supabase-db--user }}"
29
+ SUPABASE_DB__PASSWORD: "{{ prefect.blocks.secret.supabase-db--password }}"
30
+ SUPABASE_DB__PORT: "{{ prefect.blocks.secret.supabase-db--port }}"
31
+
32
+ schedule:
33
+ cron: "0 0 * * 7"
34
+
35
+ - name: qdrant-embeddings
36
+ entrypoint: src/pipelines/flows/embeddings_ingestion_flow.py:qdrant_ingest_flow
37
+ work_pool:
38
+ name: default-work-pool
39
+ job_variables:
40
+ env:
41
+ SUPABASE_DB__TABLE_NAME: "{{ prefect.blocks.secret.supabase-db--table-name }}"
42
+ SUPABASE_DB__HOST: "{{ prefect.blocks.secret.supabase-db--host }}"
43
+ SUPABASE_DB__NAME: "{{ prefect.blocks.secret.supabase-db--name }}"
44
+ SUPABASE_DB__USER: "{{ prefect.blocks.secret.supabase-db--user }}"
45
+ SUPABASE_DB__PASSWORD: "{{ prefect.blocks.secret.supabase-db--password }}"
46
+ SUPABASE_DB__PORT: "{{ prefect.blocks.secret.supabase-db--port }}"
47
+ QDRANT__API_KEY: "{{ prefect.blocks.secret.qdrant--api-key }}"
48
+ QDRANT__URL: "{{ prefect.blocks.secret.qdrant--url }}"
49
+ QDRANT__COLLECTION_NAME: "{{ prefect.blocks.secret.qdrant--collection-name }}"
50
+
51
+ schedule:
52
+ cron: "0 0 * * 7"
prefect-local.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pull:
2
+ - prefect.deployments.steps.git_clone:
3
+ id: clone-step
4
+ repository: https://github.com/Indraneel99/substack-newsletters-search-course
5
+ credentials: "{{ prefect.blocks.github-credentials.my-gh-creds }}"
6
+
7
+ # This function ensures pip is installed in the environment (Only needed for Prefect Server)
8
+ - prefect.deployments.steps.run_shell_script:
9
+ id: install-pip
10
+ directory: "{{ clone-step.directory }}"
11
+ script: |
12
+ python -m ensurepip --upgrade
13
+
14
+ - prefect.deployments.steps.pip_install_requirements:
15
+ directory: "{{ clone-step.directory }}"
16
+ requirements_file: requirements.txt
17
+ stream_output: true
18
+
19
+ deployments:
20
+ - name: rss-ingest
21
+ entrypoint: src/pipelines/flows/rss_ingestion_flow.py:rss_ingest_flow
22
+ work_pool:
23
+ name: default-work-pool
24
+ job_variables:
25
+ env:
26
+ SUPABASE_DB__TABLE_NAME: "{{ prefect.blocks.secret.supabase-db--table-name }}"
27
+ SUPABASE_DB__HOST: "{{ prefect.blocks.secret.supabase-db--host }}"
28
+ SUPABASE_DB__NAME: "{{ prefect.blocks.secret.supabase-db--name }}"
29
+ SUPABASE_DB__USER: "{{ prefect.blocks.secret.supabase-db--user }}"
30
+ SUPABASE_DB__PASSWORD: "{{ prefect.blocks.secret.supabase-db--password }}"
31
+ SUPABASE_DB__PORT: "{{ prefect.blocks.secret.supabase-db--port }}"
32
+
33
+ schedule:
34
+ cron: "0 0 * * 7"
35
+
36
+ - name: qdrant-embeddings
37
+ entrypoint: src/pipelines/flows/embeddings_ingestion_flow.py:qdrant_ingest_flow
38
+ work_pool:
39
+ name: default-work-pool
40
+ job_variables:
41
+ env:
42
+ SUPABASE_DB__TABLE_NAME: "{{ prefect.blocks.secret.supabase-db--table-name }}"
43
+ SUPABASE_DB__HOST: "{{ prefect.blocks.secret.supabase-db--host }}"
44
+ SUPABASE_DB__NAME: "{{ prefect.blocks.secret.supabase-db--name }}"
45
+ SUPABASE_DB__USER: "{{ prefect.blocks.secret.supabase-db--user }}"
46
+ SUPABASE_DB__PASSWORD: "{{ prefect.blocks.secret.supabase-db--password }}"
47
+ SUPABASE_DB__PORT: "{{ prefect.blocks.secret.supabase-db--port }}"
48
+ QDRANT__API_KEY: "{{ prefect.blocks.secret.qdrant--api-key }}"
49
+ QDRANT__URL: "{{ prefect.blocks.secret.qdrant--url }}"
50
+ QDRANT__COLLECTION_NAME: "{{ prefect.blocks.secret.qdrant--collection-name }}"
51
+
52
+ schedule:
53
+ cron: "0 0 * * 7"
pyproject.toml ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "substack-newsletters-search-course"
3
+ version = "1.0.0"
4
+ description = "A pipeline to retrieve Newsletters from Substack"
5
+ readme = "README.md"
6
+ authors = [
7
+ {name = "Benito Martin"}
8
+ ]
9
+ license = {text = "MIT License"}
10
+ requires-python = ">=3.12"
11
+
12
+ dependencies = [
13
+ "aiohttp>=3.12.15",
14
+ "beautifulsoup4>=4.13.5",
15
+ "fastapi[standard]>=0.116.1",
16
+ "fastembed>=0.7.2",
17
+ "langchain>=0.3.27",
18
+ "langchain-text-splitters>=0.3.9",
19
+ "loguru>=0.7.3",
20
+ "lxml>=5.4.0",
21
+ "openai>=1.103.0",
22
+ "opik>=1.8.29",
23
+ "prefect>=3.4.14",
24
+ "psutil>=7.0.0",
25
+ "psycopg2-binary>=2.9.10",
26
+ "pydantic>=2.11.7",
27
+ "pydantic-settings>=2.10.1",
28
+ "qdrant-client>=1.15.1",
29
+ "sqlalchemy>=2.0.43",
30
+ "supabase>=2.18.1",
31
+ "uvloop>=0.21.0",
32
+ "gradio>=5.45.0",
33
+ "markdown>=3.9",
34
+ "python-dotenv>=1.1.1",
35
+ "markdownify>=1.2.0",
36
+ "prefect-github>=0.3.1",
37
+ "requests>=2.32.5",
38
+ ]
39
+
40
+ # [[tool.uv.index]]
41
+ # name = "pytorch-cpu"
42
+ # url = "https://download.pytorch.org/whl/cpu"
43
+ # explicit = true
44
+
45
+
46
+ [dependency-groups]
47
+ dev = [
48
+ "pre-commit>=4.3.0",
49
+ "types-python-dateutil>=2.9.0.20250822",
50
+ "types-pyyaml>=6.0.12.20250822",
51
+ "types-requests>=2.32.4.20250809",
52
+ ]
53
+ lint = [
54
+ "mypy>=1.17.1",
55
+ "ruff>=0.12.10",
56
+ "types-markdown>=3.9.0.20250906",
57
+ "types-python-dateutil>=2.9.0.20250822",
58
+ "types-pyyaml>=6.0.12.20250822",
59
+ "types-requests>=2.32.4.20250809",
60
+ ]
61
+ test = [
62
+ "pytest>=8.4.1",
63
+ "pytest-asyncio>=1.1.0",
64
+ "responses>=0.25.8",
65
+ ]
66
+
67
+ [build-system]
68
+ requires = ["hatchling"]
69
+ build-backend = "hatchling.build"
70
+
71
+ [tool.hatch.build]
72
+ packages = ["src"]
73
+
74
+ ######################################
75
+ # --- Linting & Formatting Tools --- #
76
+ ######################################
77
+
78
+ [tool.ruff]
79
+ # Assume Python 3.12
80
+ target-version = "py312"
81
+
82
+ # Same as Black.
83
+ line-length = 100
84
+ indent-width = 4
85
+
86
+ # Exclude a variety of commonly ignored directories.
87
+ exclude = [
88
+ ".bzr",
89
+ ".direnv",
90
+ ".eggs",
91
+ ".git",
92
+ ".git-rewrite",
93
+ ".hg",
94
+ ".mypy_cache",
95
+ ".nox",
96
+ ".pants.d",
97
+ ".pytype",
98
+ ".ruff_cache",
99
+ ".svn",
100
+ ".tox",
101
+ ".venv",
102
+ "__pypackages__",
103
+ "_build",
104
+ "buck-out",
105
+ "build",
106
+ "dist",
107
+ "node_modules",
108
+ "venv",
109
+ ]
110
+
111
+ # Whether to show an enumeration of all fixed lint violations
112
+ show-fixes = true
113
+
114
+ # Enable common lint rules.
115
+ lint.select = [
116
+ "B", # flake8-bugbear
117
+ "E", # pycodestyle
118
+ "F", # Pyflakes1
119
+ "I", # isort
120
+ "SIM", # similarity
121
+ "UP", # pyupgrade
122
+ "D102", # docstring method
123
+ "D103", # docstring function
124
+ "D414", # docstring missing section
125
+ "D419", # empty docstring
126
+ # "D101", # docstring missing class
127
+ ]
128
+
129
+ lint.ignore = []
130
+
131
+ # Allow autofix for all enabled rules (when `--fix`) is provided.
132
+ lint.fixable = ["ALL"]
133
+ lint.unfixable = []
134
+
135
+ [tool.ruff.lint.mccabe]
136
+ # Maximum allowed McCabe complexity.
137
+ max-complexity = 10
138
+
139
+
140
+ #########################
141
+ # --- Static Typing --- #
142
+ #########################
143
+
144
+ [tool.mypy]
145
+ # Use `packages` to specify the package root
146
+ packages = ["src"]
147
+ explicit_package_bases = true
148
+
149
+ # All other configurations
150
+ ignore_missing_imports = true
151
+ disallow_untyped_defs = false
152
+ check_untyped_defs = true
153
+ # warn_redundant_casts = true
154
+ warn_unused_ignores = false
155
+ warn_return_any = false
156
+ strict_optional = true
157
+
158
+ # [tool.mypy]
159
+ # # Only check src directory, with src as the package root
160
+ # files = ["src"] # Check from project root instead of just src
161
+ # mypy_path = ["src"] # Set mypy path to project root
162
+
163
+ #########################
164
+ # --- Testing Tools --- #
165
+ #########################
166
+
167
+ [tool.pytest.ini_options]
168
+ testpaths = [ "tests" ]
169
+ python_files = [ "test_*.py" ]
170
+ addopts = "-ra -v -s"
171
+ filterwarnings = [
172
+ "ignore::DeprecationWarning",
173
+ "ignore::UserWarning"
174
+ ]
requirements.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp
2
+ beautifulsoup4
3
+ fastapi[standard]
4
+ fastembed
5
+ langchain
6
+ langchain-text-splitters
7
+ loguru
8
+ lxml
9
+ openai
10
+ opik
11
+ prefect
12
+ psutil
13
+ psycopg2-binary
14
+ pydantic
15
+ pydantic-settings
16
+ qdrant-client
17
+ sqlalchemy
18
+ supabase
19
+ uvloop
20
+ gradio
21
+ markdown
22
+ python-dotenv
23
+ markdownify
src/__init__.py ADDED
File without changes
src/api/__init__.py ADDED
File without changes
src/api/exceptions/__init__.py ADDED
File without changes
src/api/exceptions/exception_handlers.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Request
2
+ from fastapi.exceptions import RequestValidationError
3
+ from fastapi.responses import JSONResponse
4
+ from qdrant_client.http.exceptions import UnexpectedResponse
5
+
6
+ from src.utils.logger_util import setup_logging
7
+
8
+ logger = setup_logging()
9
+
10
+
11
+ async def validation_exception_handler(request: Request, exc: Exception) -> JSONResponse:
12
+ """Handle FastAPI request validation errors.
13
+
14
+ Args:
15
+ request (Request): The incoming request that caused the validation error.
16
+ exc (Exception): The exception instance.
17
+
18
+ Returns:
19
+ JSONResponse: A JSON response with status code 422 and error details.
20
+
21
+ """
22
+ if isinstance(exc, RequestValidationError):
23
+ logger.warning(f"Validation error on {request.url}: {exc.errors()}")
24
+ return JSONResponse(
25
+ status_code=422,
26
+ content={
27
+ "type": "validation_error",
28
+ "message": "Invalid request",
29
+ "details": exc.errors(),
30
+ },
31
+ )
32
+
33
+ logger.exception(f"Unexpected exception on {request.url}: {exc}")
34
+ return JSONResponse(
35
+ status_code=500,
36
+ content={
37
+ "type": "internal_error",
38
+ "message": "Internal server error",
39
+ "details": str(exc),
40
+ },
41
+ )
42
+
43
+
44
+ async def qdrant_exception_handler(request: Request, exc: Exception) -> JSONResponse:
45
+ """Handle unexpected responses from Qdrant.
46
+
47
+ Args:
48
+ request (Request): The incoming request that caused the error.
49
+ exc (Exception): The exception instance.
50
+
51
+ Returns:
52
+ JSONResponse: A JSON response with status code 500 and error details.
53
+
54
+ """
55
+ if isinstance(exc, UnexpectedResponse):
56
+ logger.error(f"Qdrant error on {request.url}: {exc}")
57
+ return JSONResponse(
58
+ status_code=500,
59
+ content={
60
+ "type": "qdrant_error",
61
+ "message": "Vector store error",
62
+ "details": str(exc),
63
+ },
64
+ )
65
+
66
+ # Fallback to general internal error if exception is not UnexpectedResponse
67
+ logger.exception(f"Unexpected exception on {request.url}: {exc}")
68
+ return JSONResponse(
69
+ status_code=500,
70
+ content={
71
+ "type": "internal_error",
72
+ "message": "Internal server error",
73
+ "details": str(exc),
74
+ },
75
+ )
76
+
77
+
78
+ async def general_exception_handler(request: Request, exc: Exception) -> JSONResponse:
79
+ """Handle all uncaught exceptions in FastAPI.
80
+
81
+ Args:
82
+ request (Request): The incoming request that caused the error.
83
+ exc (Exception): The exception instance.
84
+
85
+ Returns:
86
+ JSONResponse: A JSON response with status code 500 and error details.
87
+
88
+ """
89
+ logger.exception(f"Unhandled exception on {request.url}: {exc}")
90
+ return JSONResponse(
91
+ status_code=500,
92
+ content={
93
+ "type": "internal_error",
94
+ "message": "Internal server error",
95
+ "details": str(exc),
96
+ },
97
+ )
src/api/main.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from contextlib import asynccontextmanager
3
+
4
+ import dotenv
5
+ from fastapi import FastAPI
6
+ from fastapi.exceptions import RequestValidationError
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from qdrant_client.http.exceptions import UnexpectedResponse
9
+
10
+ from src.api.exceptions.exception_handlers import (
11
+ general_exception_handler,
12
+ qdrant_exception_handler,
13
+ validation_exception_handler,
14
+ )
15
+ from src.api.middleware.logging_middleware import LoggingMiddleware
16
+ from src.api.routes.health_routes import router as health_router
17
+ from src.api.routes.search_routes import router as search_router
18
+ from src.infrastructure.qdrant.qdrant_vectorstore import AsyncQdrantVectorStore
19
+ from src.utils.logger_util import setup_logging
20
+
21
+ # Load environment variables from .env file
22
+ dotenv.load_dotenv()
23
+
24
+ # -----------------------
25
+ # Logger setup
26
+ # -----------------------
27
+ logger = setup_logging()
28
+
29
+
30
+ # -----------------------
31
+ # Lifespan
32
+ # -----------------------
33
+ @asynccontextmanager
34
+ async def lifespan(app: FastAPI):
35
+ """
36
+ Lifespan context manager to handle startup and shutdown events.
37
+ Initializes the Qdrant vector store on startup and ensures proper cleanup on shutdown.
38
+
39
+ Args:
40
+ app (FastAPI): The FastAPI application instance.
41
+ Yields:
42
+ None
43
+
44
+ Exceptions:
45
+ Raises exceptions if initialization or cleanup fails.
46
+ """
47
+ ## Ensure the cache directory exists and is writable (HF downloads the models here)
48
+ cache_dir = "/tmp/fastembed_cache"
49
+ os.makedirs(cache_dir, exist_ok=True) # Ensure directory exists
50
+ # Force /tmp/huggingface in Google Cloud so that it's writable.
51
+ # This is the default cache dir of Huggingface.
52
+ # Otherwise it tries ~/.cache/huggingface (read-only directory) in Google Cloud.
53
+ # That directory is not writable.
54
+ logger.info(f"HF_HOME: {os.environ.get('HF_HOME', 'Not set')}")
55
+ logger.info(f"Cache dir: {cache_dir}, Writable: {os.access(cache_dir, os.W_OK)}")
56
+ cache_contents = os.listdir(cache_dir) if os.path.exists(cache_dir) else "Empty"
57
+ logger.info(f"Cache contents before: {cache_contents}")
58
+ try:
59
+ # creates Qdrant client internally
60
+ app.state.vectorstore = AsyncQdrantVectorStore(cache_dir=cache_dir)
61
+ except Exception as e:
62
+ logger.exception("Failed to initialize QdrantVectorStore")
63
+ raise e
64
+ yield
65
+ try:
66
+ await app.state.vectorstore.client.close()
67
+ except Exception:
68
+ logger.exception("Failed to close Qdrant client")
69
+
70
+
71
+ # -----------------------
72
+ # FastAPI application
73
+ # -----------------------
74
+
75
+ app = FastAPI(
76
+ title="Substack RAG API",
77
+ version="1.0",
78
+ description="API for Substack Retrieval-Augmented Generation (RAG) system",
79
+ lifespan=lifespan,
80
+ # root_path=root_path,
81
+ )
82
+
83
+
84
+ # -----------------------
85
+ # Middleware
86
+ # -----------------------
87
+
88
+
89
+ # Log the allowed origins
90
+ allowed_origins = os.getenv("ALLOWED_ORIGINS", "").split(",")
91
+ logger.info(f"CORS allowed origins: {allowed_origins}")
92
+
93
+ app.add_middleware(
94
+ CORSMiddleware,
95
+ allow_origins=allowed_origins, # ["*"], # allowed_origins,
96
+ allow_credentials=True,
97
+ allow_methods=["GET", "POST", "OPTIONS"], # only the methods the app uses
98
+ allow_headers=["Authorization", "Content-Type"], # only headers needed
99
+ )
100
+
101
+ app.add_middleware(LoggingMiddleware)
102
+
103
+
104
+ # -----------------------
105
+ # Exception Handlers
106
+ # -----------------------
107
+ app.add_exception_handler(RequestValidationError, validation_exception_handler)
108
+ app.add_exception_handler(UnexpectedResponse, qdrant_exception_handler)
109
+ app.add_exception_handler(Exception, general_exception_handler)
110
+
111
+
112
+ # -----------------------
113
+ # Routers
114
+ # -----------------------
115
+ app.include_router(search_router, prefix="/search", tags=["search"])
116
+ app.include_router(health_router, tags=["health"])
117
+
118
+ # For Cloud Run, run the app directly
119
+ if __name__ == "__main__":
120
+ import uvicorn
121
+
122
+ port = int(os.environ.get("PORT", 8080)) # Cloud Run provides PORT env var
123
+
124
+ uvicorn.run(
125
+ "src.api.main:app",
126
+ host="0.0.0.0",
127
+ port=port,
128
+ log_level="info",
129
+ reload=True, # Enable auto-reload for development
130
+ )
131
+
132
+ # config = uvicorn.Config(
133
+ # app,
134
+ # port=port,
135
+ # log_level="info",
136
+ # # loop="uvloop",
137
+ # # workers=1,
138
+ # reload=True
139
+ # )
140
+ # server = uvicorn.Server(config)
141
+
142
+ # server.run()
src/api/middleware/__init__.py ADDED
File without changes
src/api/middleware/logging_middleware.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+
3
+ from fastapi import Request
4
+ from starlette.middleware.base import BaseHTTPMiddleware
5
+
6
+ from src.utils.logger_util import setup_logging
7
+
8
+ logger = setup_logging()
9
+
10
+
11
+ class LoggingMiddleware(BaseHTTPMiddleware):
12
+ """Middleware for logging incoming HTTP requests and their responses.
13
+
14
+ Logs the request method, URL, client IP, and headers.
15
+ Excludes sensitive headers like Authorization and Cookie.
16
+ as well as the response status code and request duration in milliseconds.
17
+ Exceptions raised during request processing are logged with the full traceback.
18
+
19
+ Usage:
20
+ Add this middleware to your FastAPI app:
21
+ app.add_middleware(LoggingMiddleware)
22
+
23
+ Attributes:
24
+ logger: Configured logger from `setup_logging`.
25
+
26
+ """
27
+
28
+ async def dispatch(self, request: Request, call_next):
29
+ """Process the incoming request, log its details, and measure execution time.
30
+
31
+ Args:
32
+ request (Request): The incoming FastAPI request.
33
+ call_next: Callable to invoke the next middleware or route handler.
34
+
35
+ Returns:
36
+ Response: The HTTP response returned by the next middleware or route handler.
37
+
38
+ Raises:
39
+ Exception: Propagates any exceptions raised by downstream handlers after logging them.
40
+
41
+ """
42
+ start_time = time.time()
43
+ client_host = request.client.host if request.client else "unknown"
44
+
45
+ # logger.debug(f"Request headers: {request.headers}")
46
+ # logger.debug(f"Request cookies: {request.cookies}")
47
+
48
+ # Exclude sensitive headers from logging
49
+ safe_headers = {
50
+ k: v for k, v in request.headers.items() if k.lower() not in {"authorization", "cookie"}
51
+ }
52
+
53
+ logger.info(
54
+ f"Incoming request: {request.method} {request.url} from {client_host} "
55
+ f"headers={safe_headers}"
56
+ )
57
+
58
+ try:
59
+ response = await call_next(request)
60
+ except Exception:
61
+ duration = (time.time() - start_time) * 1000
62
+ logger.exception(
63
+ f"Request failed: {request.method} {request.url} from {client_host} "
64
+ f"duration={duration:.2f}ms"
65
+ )
66
+ raise
67
+
68
+ duration = (time.time() - start_time) * 1000
69
+ logger.info(
70
+ f"Completed request: {request.method} {request.url} from {client_host} "
71
+ f"status_code={response.status_code} duration={duration:.2f}ms"
72
+ )
73
+ return response
src/api/models/__init__.py ADDED
File without changes
src/api/models/api_models.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ # -----------------------
5
+ # Core search result model
6
+ # -----------------------
7
+ class SearchResult(BaseModel):
8
+ title: str = Field(default="", description="Title of the article")
9
+ feed_author: str | None = Field(default=None, description="Author of the article")
10
+ feed_name: str | None = Field(default=None, description="Name of the feed/newsletter")
11
+ article_author: list[str] | None = Field(default=None, description="List of article authors")
12
+ url: str | None = Field(default=None, description="URL of the article")
13
+ chunk_text: str | None = Field(default=None, description="Text content of the article chunk")
14
+ score: float = Field(default=0.0, description="Relevance score of the article")
15
+
16
+
17
+ # -----------------------
18
+ # Unique titles request/response
19
+ # -----------------------
20
+ class UniqueTitleRequest(BaseModel):
21
+ query_text: str = Field(default="", description="The user query text")
22
+ feed_author: str | None = Field(default=None, description="Filter by author name")
23
+ feed_name: str | None = Field(default=None, description="Filter by feed/newsletter name")
24
+ article_author: list[str] | None = Field(default=None, description="List of article authors")
25
+ title_keywords: str | None = Field(
26
+ default=None, description="Keywords or phrase to match in title"
27
+ )
28
+ limit: int = Field(default=5, description="Number of results to return")
29
+
30
+
31
+ class UniqueTitleResponse(BaseModel):
32
+ results: list[SearchResult] = Field(
33
+ default_factory=list, description="List of unique title search results"
34
+ )
35
+
36
+
37
+ # -----------------------
38
+ # Ask request model
39
+ # -----------------------
40
+ class AskRequest(BaseModel):
41
+ query_text: str = Field(default="", description="The user query text")
42
+ feed_author: str | None = Field(default=None, description="Filter by author name")
43
+ feed_name: str | None = Field(default=None, description="Filter by feed/newsletter name")
44
+ article_author: list[str] | None = Field(default=None, description="List of article authors")
45
+ title_keywords: str | None = Field(
46
+ default=None, description="Keywords or phrase to match in title"
47
+ )
48
+ limit: int = Field(default=5, description="Number of results to return")
49
+ provider: str = Field(default="OpenRouter", description="The provider to use for the query")
50
+ model: str | None = Field(
51
+ default=None, description="The specific model to use for the provider, if applicable"
52
+ )
53
+
54
+
55
+ # -----------------------
56
+ # Ask response model
57
+ # -----------------------
58
+ class AskResponse(BaseModel):
59
+ query: str = Field(default="", description="The original query text")
60
+ provider: str = Field(default="", description="The LLM provider used for generation")
61
+ answer: str = Field(default="", description="Generated answer from the LLM")
62
+ sources: list[SearchResult] = Field(
63
+ default_factory=list, description="List of source documents used in generation"
64
+ )
65
+ model: str | None = Field(
66
+ default=None, description="The specific model used by the provider, if available"
67
+ )
68
+ finish_reason: str | None = Field(
69
+ default=None, description="The reason why the generation finished, if available"
70
+ )
71
+
72
+
73
+ # -----------------------
74
+ # Streaming "response" documentation
75
+ # -----------------------
76
+ class AskStreamingChunk(BaseModel):
77
+ delta: str = Field(default="", description="Partial text generated by the LLM")
78
+
79
+
80
+ class AskStreamingResponse(BaseModel):
81
+ query: str = Field(default="", description="The original query text")
82
+ provider: str = Field(default="", description="The LLM provider used for generation")
83
+ chunks: list[AskStreamingChunk] = Field(
84
+ default_factory=list, description="Streamed chunks of generated text"
85
+ )
src/api/models/provider_models.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ # OpenRouter priority sort options
7
+ class ProviderSort(str, Enum):
8
+ latency = "latency"
9
+
10
+
11
+ class ModelConfig(BaseModel):
12
+ # The "entry point" model — required by OpenRouter API
13
+ primary_model: str = Field(default="", description="The initial model requested")
14
+ # Optional fallback / routing models
15
+ candidate_models: list[str] = Field(
16
+ default_factory=list, description="List of candidate models for fallback or routing"
17
+ )
18
+ provider_sort: ProviderSort = Field(
19
+ default=ProviderSort.latency, description="How to sort candidate models"
20
+ )
21
+ stream: bool = Field(default=False, description="Whether to stream responses")
22
+ max_completion_tokens: int = Field(
23
+ default=5000, description="Maximum number of tokens for completion"
24
+ )
25
+ temperature: float = Field(default=0.0, description="Sampling temperature")
26
+
27
+
28
+ class ModelRegistry(BaseModel):
29
+ models: dict[str, ModelConfig] = Field(default_factory=dict)
30
+
31
+ def get_config(self, provider: str) -> ModelConfig:
32
+ """Retrieve the ModelConfig for the specified provider.
33
+
34
+ Args:
35
+ provider (str): The name of the provider.
36
+
37
+ Returns:resp
38
+ ModelConfig: The ModelConfig instance for the specified provider.
39
+
40
+ Raises:
41
+ ValueError: If the provider is not found in the registry.
42
+ """
43
+ provider_lower = provider.lower()
44
+ if provider_lower not in self.models:
45
+ raise ValueError(f"ModelConfig not found for provider: {provider}")
46
+ return self.models[provider_lower]
47
+
48
+
49
+ # -----------------------
50
+ # Default registry
51
+ # -----------------------
52
+
53
+ # Default ModelConfigs for models
54
+ # OpenRouter models show low latency and are highly ranked by OpenRouter
55
+
56
+ MODEL_REGISTRY = ModelRegistry(
57
+ models={
58
+ "openrouter": ModelConfig(
59
+ primary_model="openai/gpt-oss-20b:free",
60
+ candidate_models=[
61
+ # "meta-llama/llama-4-scout:free",
62
+ "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
63
+ # "meta-llama/llama-3.3-8b-instruct:free",
64
+ # "openai/gpt-oss-20b:free",
65
+ # "openai/gpt-oss-120b:free",
66
+ "nvidia/nemotron-nano-9b-v2:free",
67
+ ],
68
+ ),
69
+ # "openai": ModelConfig(primary_model="gpt-4o-mini"),
70
+ "huggingface": ModelConfig(primary_model="deepseek-ai/DeepSeek-R1"),
71
+ }
72
+ )
73
+
74
+ # MODELS WITH LOGPROBS SUPPORT
75
+
76
+ # deepseek/deepseek-r1-0528-qwen3-8b:free
77
+ # mistralai/mistral-small-3.2-24b-instruct:free
src/api/routes/__init__.py ADDED
File without changes
src/api/routes/health_routes.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+
3
+ from fastapi import APIRouter, Request
4
+ from qdrant_client.http.exceptions import UnexpectedResponse
5
+
6
+ router = APIRouter()
7
+
8
+ start_time = time.time()
9
+
10
+
11
+ @router.get("/")
12
+ async def root():
13
+ """Root endpoint.
14
+
15
+ Returns a simple JSON response indicating that the API is running.
16
+
17
+ Returns:
18
+ dict: {"message": "Hello! API is running."}
19
+
20
+ """
21
+ return {"message": "Hello! API is running."}
22
+
23
+
24
+ @router.get("/health")
25
+ async def health_check():
26
+ """Liveness check endpoint.
27
+
28
+ Returns basic service info, uptime, and environment variables.
29
+ """
30
+ uptime = int(time.time() - start_time)
31
+ return {
32
+ "status": "ok",
33
+ "uptime_seconds": uptime,
34
+ }
35
+
36
+
37
+ @router.get("/ready")
38
+ async def readiness_check(request: Request):
39
+ """Readiness check endpoint.
40
+
41
+ Verifies whether the service is ready to handle requests by
42
+ checking connectivity to Qdrant.
43
+ """
44
+ try:
45
+ vectorstore = request.app.state.vectorstore
46
+ # a lightweight check: list_collections is cheap
47
+ await vectorstore.client.get_collections()
48
+ return {"status": "ready"}
49
+ except UnexpectedResponse:
50
+ return {"status": "not ready", "reason": "Qdrant unexpected response"}
51
+ except Exception as e:
52
+ return {"status": "not ready", "reason": str(e)}
src/api/routes/search_routes.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+
3
+ from fastapi import APIRouter, Request
4
+ from fastapi.responses import StreamingResponse
5
+
6
+ from src.api.models.api_models import (
7
+ AskRequest,
8
+ AskResponse,
9
+ AskStreamingResponse,
10
+ SearchResult,
11
+ UniqueTitleRequest,
12
+ UniqueTitleResponse,
13
+ )
14
+ from src.api.services.generation_service import generate_answer, get_streaming_function
15
+ from src.api.services.search_service import query_unique_titles, query_with_filters
16
+
17
+ router = APIRouter()
18
+
19
+
20
+ @router.post("/unique-titles", response_model=UniqueTitleResponse)
21
+ async def search_unique(request: Request, params: UniqueTitleRequest):
22
+ """Returns unique article titles based on a query and optional filters.
23
+
24
+ Deduplicates results by article title.
25
+
26
+ Args:
27
+ request: FastAPI request object.
28
+ params: UniqueTitleRequest with search parameters.
29
+
30
+ Returns:
31
+ UniqueTitleResponse: List of unique titles.
32
+
33
+ """
34
+ results = await query_unique_titles(
35
+ request=request,
36
+ query_text=params.query_text,
37
+ feed_author=params.feed_author,
38
+ feed_name=params.feed_name,
39
+ title_keywords=params.title_keywords,
40
+ limit=params.limit,
41
+ )
42
+ return {"results": results}
43
+
44
+
45
+ @router.post("/ask", response_model=AskResponse)
46
+ async def ask_with_generation(request: Request, ask: AskRequest):
47
+ """Non-streaming question-answering endpoint using vector search and LLM.
48
+
49
+ Workflow:
50
+ 1. Retrieve relevant documents (possibly duplicate titles for richer context).
51
+ 2. Generate an answer with the selected LLM provider.
52
+
53
+ Args:
54
+ request: FastAPI request object.
55
+ ask: AskRequest with query, provider, and limit.
56
+
57
+ Returns:
58
+ AskResponse: Generated answer and source documents.
59
+
60
+ """
61
+ # Step 1: Retrieve relevant documents with filters
62
+ results: list[SearchResult] = await query_with_filters(
63
+ request,
64
+ query_text=ask.query_text,
65
+ feed_author=ask.feed_author,
66
+ feed_name=ask.feed_name,
67
+ title_keywords=ask.title_keywords,
68
+ limit=ask.limit,
69
+ )
70
+
71
+ # Step 2: Generate an answer
72
+ answer_data = await generate_answer(
73
+ query=ask.query_text, contexts=results, provider=ask.provider, selected_model=ask.model
74
+ )
75
+
76
+ return AskResponse(
77
+ query=ask.query_text,
78
+ provider=ask.provider,
79
+ answer=answer_data["answer"],
80
+ sources=results,
81
+ model=answer_data.get("model", None),
82
+ finish_reason=answer_data.get("finish_reason", None),
83
+ )
84
+
85
+
86
+ @router.post("/ask/stream", response_model=AskStreamingResponse)
87
+ async def ask_with_generation_stream(request: Request, ask: AskRequest):
88
+ """Streaming question-answering endpoint using vector search and LLM.
89
+
90
+ Workflow:
91
+ 1. Retrieve relevant documents (possibly duplicate titles for richer context).
92
+ 2. Stream generated answer with the selected LLM provider.
93
+
94
+ Args:
95
+ request: FastAPI request object.
96
+ ask: AskRequest with query, provider, and limit.
97
+
98
+ Returns:
99
+ StreamingResponse: Yields text chunks as plain text.
100
+
101
+ """
102
+ # Step 1: Retrieve relevant documents with filters
103
+ results: list[SearchResult] = await query_with_filters(
104
+ request,
105
+ query_text=ask.query_text,
106
+ feed_author=ask.feed_author,
107
+ feed_name=ask.feed_name,
108
+ title_keywords=ask.title_keywords,
109
+ limit=ask.limit,
110
+ )
111
+
112
+ # Step 2: Get the streaming generator
113
+ stream_func = get_streaming_function(
114
+ provider=ask.provider, query=ask.query_text, contexts=results, selected_model=ask.model
115
+ )
116
+
117
+ # Step 3: Wrap streaming generator
118
+ async def stream_generator():
119
+ async for delta in stream_func():
120
+ yield delta
121
+ await asyncio.sleep(0) # allow event loop to handle other tasks
122
+
123
+ return StreamingResponse(stream_generator(), media_type="text/plain")
src/api/services/__init__.py ADDED
File without changes
src/api/services/generation_service.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections.abc import AsyncGenerator, Callable
2
+
3
+ import opik
4
+
5
+ from src.api.models.api_models import SearchResult
6
+ from src.api.models.provider_models import MODEL_REGISTRY
7
+ from src.api.services.providers.huggingface_service import generate_huggingface, stream_huggingface
8
+ from src.api.services.providers.openai_service import generate_openai, stream_openai
9
+ from src.api.services.providers.openrouter_service import generate_openrouter, stream_openrouter
10
+ from src.api.services.providers.utils.evaluation_metrics import evaluate_metrics
11
+ from src.api.services.providers.utils.prompts import build_research_prompt
12
+ from src.utils.logger_util import setup_logging
13
+
14
+ logger = setup_logging()
15
+
16
+
17
+ # -----------------------
18
+ # Non-streaming answer generator
19
+ # -----------------------
20
+ @opik.track(name="generate_answer")
21
+ async def generate_answer(
22
+ query: str,
23
+ contexts: list[SearchResult],
24
+ provider: str = "openrouter",
25
+ selected_model: str | None = None,
26
+ ) -> dict:
27
+ """Generate a non-streaming answer using the specified LLM provider.
28
+
29
+ Args:
30
+ query (str): The user's research query.
31
+ contexts (list[SearchResult]): List of context documents with metadata.
32
+ provider (str): The LLM provider to use ("openai", "openrouter", "huggingface").
33
+
34
+ Returns:
35
+ dict: {"answer": str, "sources": list[str], "model": Optional[str]}
36
+
37
+ """
38
+ prompt = build_research_prompt(contexts, query=query)
39
+ model_used: str | None = None
40
+ finish_reason: str | None = None
41
+
42
+ provider_lower = provider.lower()
43
+
44
+ config = MODEL_REGISTRY.get_config(provider_lower)
45
+
46
+ if provider_lower == "openai":
47
+ answer, model_used = await generate_openai(prompt, config=config)
48
+ elif provider_lower == "openrouter":
49
+ try:
50
+ answer, model_used, finish_reason = await generate_openrouter(
51
+ prompt, config=config, selected_model=selected_model
52
+ )
53
+ metrics_results = await evaluate_metrics(answer, prompt)
54
+ logger.info(f"G-Eval Faithfulness → {metrics_results}")
55
+ except Exception as e:
56
+ logger.error(f"Error occurred while generating answer from {provider_lower}: {e}")
57
+ raise
58
+
59
+ elif provider_lower == "huggingface":
60
+ answer, model_used = await generate_huggingface(prompt, config=config)
61
+ else:
62
+ raise ValueError(f"Unknown provider: {provider}")
63
+
64
+ return {
65
+ "answer": answer,
66
+ "sources": [r.url for r in contexts],
67
+ "model": model_used,
68
+ "finish_reason": finish_reason,
69
+ }
70
+
71
+
72
+ # -----------------------
73
+ # Streaming answer generator
74
+ # -----------------------
75
+ @opik.track(name="get_streaming_function")
76
+ def get_streaming_function(
77
+ provider: str,
78
+ query: str,
79
+ contexts: list[SearchResult],
80
+ selected_model: str | None = None,
81
+ ) -> Callable[[], AsyncGenerator[str, None]]:
82
+ """Get a streaming function for the specified LLM provider.
83
+
84
+ Args:
85
+ provider (str): The LLM provider to use ("openai", "openrouter", "huggingface").
86
+ query (str): The user's research query.
87
+ contexts (list[SearchResult]): List of context documents with metadata.
88
+
89
+ Returns:
90
+ Callable[[], AsyncGenerator[str, None]]: A function that returns an async generator yielding
91
+ response chunks.
92
+
93
+ """
94
+ prompt = build_research_prompt(contexts, query=query)
95
+ provider_lower = provider.lower()
96
+ config = MODEL_REGISTRY.get_config(provider_lower)
97
+ logger.info(f"Using model config: {config}")
98
+
99
+ async def stream_gen() -> AsyncGenerator[str, None]:
100
+ """Asynchronous generator that streams response chunks from the specified provider.
101
+
102
+ Yields:
103
+ str: The next chunk of the response.
104
+
105
+ """
106
+ buffer = [] # collect all chunks here
107
+
108
+ if provider_lower == "openai":
109
+ async for chunk in stream_openai(prompt, config=config):
110
+ buffer.append(chunk)
111
+ yield chunk
112
+
113
+ elif provider_lower == "openrouter":
114
+ try:
115
+ async for chunk in stream_openrouter(
116
+ prompt, config=config, selected_model=selected_model
117
+ ):
118
+ buffer.append(chunk)
119
+ yield chunk
120
+
121
+ full_output = "".join(buffer)
122
+ metrics_results = await evaluate_metrics(full_output, prompt)
123
+ logger.info(f"Metrics results: {metrics_results}")
124
+
125
+ except Exception as e:
126
+ logger.error(f"Error occurred while streaming from {provider}: {e}")
127
+ yield "__error__"
128
+
129
+ elif provider_lower == "huggingface":
130
+ async for chunk in stream_huggingface(prompt, config=config):
131
+ buffer.append(chunk)
132
+ yield chunk
133
+
134
+ else:
135
+ raise ValueError(f"Unknown provider: {provider}")
136
+
137
+ return stream_gen
src/api/services/providers/__init__.py ADDED
File without changes
src/api/services/providers/huggingface_service.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections.abc import AsyncGenerator
2
+
3
+ from huggingface_hub import AsyncInferenceClient
4
+
5
+ from src.api.models.provider_models import ModelConfig
6
+ from src.api.services.providers.utils.messages import build_messages
7
+ from src.config import settings
8
+ from src.utils.logger_util import setup_logging
9
+
10
+ logger = setup_logging()
11
+
12
+ # -----------------------
13
+ # Hugging Face client
14
+ # -----------------------
15
+ hf_key = settings.hugging_face.api_key
16
+ hf_client = AsyncInferenceClient(provider="auto", api_key=hf_key)
17
+
18
+
19
+ async def generate_huggingface(prompt: str, config: ModelConfig) -> tuple[str, None]:
20
+ """Generate a response from Hugging Face for a given prompt and model configuration.
21
+
22
+ Args:
23
+ prompt (str): The input prompt.
24
+ config (ModelConfig): The model configuration.
25
+
26
+ Returns:
27
+ tuple[str, None]: The generated response and None for model and finish reason.
28
+
29
+ """
30
+ resp = await hf_client.chat.completions.create(
31
+ model=config.primary_model,
32
+ messages=build_messages(prompt),
33
+ temperature=config.temperature,
34
+ max_tokens=config.max_completion_tokens,
35
+ )
36
+ return resp.choices[0].message.content or "", None
37
+
38
+
39
+ def stream_huggingface(prompt: str, config: ModelConfig) -> AsyncGenerator[str, None]:
40
+ """Stream a response from Hugging Face for a given prompt and model configuration.
41
+
42
+ Args:
43
+ prompt (str): The input prompt.
44
+ config (ModelConfig): The model configuration.
45
+
46
+ Returns:
47
+ AsyncGenerator[str, None]: An asynchronous generator yielding response chunks.
48
+
49
+ """
50
+
51
+ async def gen() -> AsyncGenerator[str, None]:
52
+ stream = await hf_client.chat.completions.create(
53
+ model=config.primary_model,
54
+ messages=build_messages(prompt),
55
+ temperature=config.temperature,
56
+ max_tokens=config.max_completion_tokens,
57
+ stream=True,
58
+ )
59
+ async for chunk in stream:
60
+ delta_text = getattr(chunk.choices[0].delta, "content", None)
61
+ if delta_text:
62
+ yield delta_text
63
+
64
+ return gen()
src/api/services/providers/openai_service.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from collections.abc import AsyncGenerator
3
+
4
+ from openai import AsyncOpenAI
5
+ from opik.integrations.openai import track_openai
6
+
7
+ from src.api.models.provider_models import ModelConfig
8
+ from src.api.services.providers.utils.messages import build_messages
9
+ from src.config import settings
10
+ from src.utils.logger_util import setup_logging
11
+
12
+ logger = setup_logging()
13
+
14
+ # -----------------------
15
+ # OpenAI client
16
+ # -----------------------
17
+ openai_key = settings.openai.api_key
18
+ async_openai_client = AsyncOpenAI(api_key=openai_key)
19
+
20
+ # -----------------------
21
+ # Opik Observability
22
+ # -----------------------
23
+
24
+ os.environ["OPIK_API_KEY"] = settings.opik.api_key
25
+ os.environ["OPIK_PROJECT_NAME"] = settings.opik.project_name
26
+
27
+ async_openai_client = track_openai(async_openai_client)
28
+
29
+
30
+ async def generate_openai(prompt: str, config: ModelConfig) -> tuple[str, None]:
31
+ """Generate a response from OpenAI for a given prompt and model configuration.
32
+
33
+ Args:
34
+ prompt (str): The input prompt.
35
+ config (ModelConfig): The model configuration.
36
+
37
+ Returns:
38
+ tuple[str, None]: The generated response and None for model and finish reason.
39
+
40
+ """
41
+ ### NOTES ON PARAMETERS
42
+ # logprobs: Include the log probabilities on the logprobs most likely tokens,
43
+ # as well the chosen tokens.
44
+ # temperature: 0.0 (more deterministic) to 1.0 (more creative)
45
+ # top_p: 0.0 to 1.0, nucleus sampling, 1.0 means no nucleus sampling
46
+ # 0.1 means only the tokens comprising the top 10% probability mass are considered.
47
+ # presence_penalty: -2.0 to 2.0, positive values penalize new tokens based
48
+ # on whether they appear in the text so far
49
+ # (Encourages model to use more context from other chunks)
50
+ # frequency_penalty: -2.0 to 2.0, positive values penalize new tokens based
51
+ # on their existing frequency in the text so far (helpful if context chunks overlap.)
52
+
53
+ resp = await async_openai_client.chat.completions.create(
54
+ model="gpt-4o-mini",
55
+ messages=build_messages(prompt),
56
+ temperature=config.temperature,
57
+ max_completion_tokens=config.max_completion_tokens,
58
+ # logprobs=True,
59
+ # top_logprobs=3,
60
+ # top_p=1.0,
61
+ # presence_penalty=0.3,
62
+ # frequency_penalty=0.3,
63
+ )
64
+
65
+ return resp.choices[0].message.content or "", None
66
+
67
+
68
+ def stream_openai(prompt: str, config: ModelConfig) -> AsyncGenerator[str, None]:
69
+ """Stream a response from OpenAI for a given prompt and model configuration.
70
+
71
+ Args:
72
+ prompt (str): The input prompt.
73
+ config (ModelConfig): The model configuration.
74
+
75
+ Returns:
76
+ AsyncGenerator[str, None]: An asynchronous generator yielding response chunks.
77
+
78
+ """
79
+
80
+ async def gen() -> AsyncGenerator[str, None]:
81
+ stream = await async_openai_client.chat.completions.create(
82
+ model=config.primary_model,
83
+ messages=build_messages(prompt),
84
+ temperature=config.temperature,
85
+ max_completion_tokens=config.max_completion_tokens,
86
+ stream=True,
87
+ )
88
+
89
+ last_finish_reason = None
90
+ async for chunk in stream:
91
+ delta_text = getattr(chunk.choices[0].delta, "content", None)
92
+ if delta_text:
93
+ yield delta_text
94
+
95
+ # Reasons: tool_calls, stop, length, content_filter, error
96
+ finish_reason = getattr(chunk.choices[0], "finish_reason", None)
97
+
98
+ if finish_reason:
99
+ last_finish_reason = finish_reason
100
+
101
+ logger.warning(f"Final finish_reason: {last_finish_reason}")
102
+
103
+ # Yield a chunk to trigger truncation warning in UI
104
+ if last_finish_reason == "length":
105
+ yield "__truncated__"
106
+
107
+ return gen()
108
+
109
+
110
+ # -----------------------
111
+ # Log Probs Parameter Experiment
112
+ # -----------------------
113
+
114
+ # import math
115
+
116
+ # async def generate_openai(prompt: str, config: ModelConfig) -> str:
117
+ # """
118
+ # Generate a response from OpenAI for a given prompt and model configuration,
119
+ # and calculate the average log probability of the generated tokens.
120
+
121
+ # Returns:
122
+ # tuple[str, float | None]: Generated response and average log probability
123
+ # """
124
+ # resp = await async_openai_client.chat.completions.create(
125
+ # model="gpt-4o-mini",
126
+ # messages=build_messages(prompt),
127
+ # temperature=config.temperature,
128
+ # max_completion_tokens=config.max_completion_tokens,
129
+ # logprobs=True, # include token log probabilities
130
+ # top_logprobs=3, # top 3 alternatives for each token
131
+ # top_p=1.0,
132
+ # presence_penalty=0.3,
133
+ # frequency_penalty=0.3,
134
+ # )
135
+
136
+
137
+ # content = resp.choices[0].message.content or ""
138
+ # token_logprobs_list = resp.choices[0].logprobs
139
+
140
+ # tokens_logprobs = []
141
+ # token_probs = []
142
+
143
+ # if (
144
+ # token_logprobs_list is not None
145
+ # and hasattr(token_logprobs_list, "content")
146
+ # and isinstance(token_logprobs_list.content, list)
147
+ # and len(token_logprobs_list.content) > 0
148
+ # ):
149
+ # for token_info in token_logprobs_list.content:
150
+ # if token_info is not None and hasattr(token_info, "logprob") \
151
+ # and hasattr(token_info, "token"):
152
+ # tokens_logprobs.append(token_info.logprob)
153
+ # token_probs.append((token_info.token, math.exp(token_info.logprob)))
154
+
155
+ # if tokens_logprobs:
156
+ # avg_logprob = sum(tokens_logprobs) / len(tokens_logprobs)
157
+ # avg_prob = math.exp(avg_logprob)
158
+
159
+ # # Sort by probability
160
+ # most_confident = sorted(token_probs, key=lambda x: x[1], reverse=True)[:5]
161
+ # least_confident = sorted(token_probs, key=lambda x: x[1])[:5]
162
+
163
+ # logger.info(f"Temperature: {config.temperature}")
164
+ # logger.info(f"Max completion tokens: {config.max_completion_tokens}")
165
+ # logger.info(f"Average log probability: {avg_logprob:.4f} "
166
+ # f"(≈ {avg_prob:.2%} avg token prob)")
167
+
168
+ # logger.info("Top 5 most confident tokens:")
169
+ # for tok, prob in most_confident:
170
+ # logger.info(f" '{tok}' → {prob:.2%}")
171
+
172
+ # logger.info("Top 5 least confident tokens:")
173
+ # for tok, prob in least_confident:
174
+ # logger.info(f" '{tok}' → {prob:.2%}")
175
+
176
+ # else:
177
+ # logger.warning("No logprob information found in response.")
178
+
179
+ # breakpoint()
180
+
181
+ # return content
src/api/services/providers/openrouter_service.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from collections.abc import AsyncGenerator
3
+ from typing import Any
4
+
5
+ import opik
6
+ from openai import AsyncOpenAI
7
+ from opik.integrations.openai import track_openai
8
+
9
+ from src.api.models.provider_models import ModelConfig
10
+ from src.api.services.providers.utils.messages import build_messages
11
+ from src.config import settings
12
+ from src.utils.logger_util import setup_logging
13
+
14
+ logger = setup_logging()
15
+
16
+
17
+ # -----------------------
18
+ # OpenRouter client
19
+ # -----------------------
20
+
21
+ openrouter_key = settings.openrouter.api_key
22
+ openrouter_url = settings.openrouter.api_url
23
+ async_openrouter_client = AsyncOpenAI(base_url=openrouter_url, api_key=openrouter_key)
24
+
25
+ # -----------------------
26
+ # Opik Observability
27
+ # -----------------------
28
+
29
+ os.environ["OPIK_API_KEY"] = settings.opik.api_key
30
+ os.environ["OPIK_PROJECT_NAME"] = settings.opik.project_name
31
+
32
+ async_openrouter_client = track_openai(async_openrouter_client)
33
+
34
+ # -----------------------
35
+ # Helper to build extra body for OpenRouter
36
+ # -----------------------
37
+
38
+
39
+ @opik.track(name="build_openrouter_extra")
40
+ def build_openrouter_extra(config: ModelConfig) -> dict[str, Any]:
41
+ """Build the extra body for OpenRouter API requests based on the ModelConfig.
42
+
43
+ Args:
44
+ config (ModelConfig): The model configuration.
45
+
46
+ Returns:
47
+ dict[str, Any]: The extra body for OpenRouter API requests.
48
+
49
+ """
50
+ body = {"provider": {"sort": config.provider_sort.value}}
51
+ if config.candidate_models:
52
+ body["models"] = list(config.candidate_models) # type: ignore
53
+ return body
54
+
55
+
56
+ # -----------------------
57
+ # Core OpenRouter functions
58
+ # -----------------------
59
+
60
+
61
+ @opik.track(name="generate_openrouter")
62
+ async def generate_openrouter(
63
+ prompt: str,
64
+ config: ModelConfig,
65
+ selected_model: str | None = None,
66
+ ) -> tuple[str, str | None, str | None]:
67
+ """Generate a response from OpenRouter for a given prompt and model configuration.
68
+
69
+ Args:
70
+ prompt (str): The input prompt.
71
+ config (ModelConfig): The model configuration.
72
+ selected_model (str | None): Optional specific model to use.
73
+
74
+ Returns:
75
+ tuple[str, str | None, str | None]: The generated response, model used, and finish reason.
76
+
77
+ """
78
+
79
+ model_to_use = selected_model or config.primary_model
80
+
81
+ resp = await async_openrouter_client.chat.completions.create(
82
+ model=model_to_use,
83
+ messages=build_messages(prompt),
84
+ temperature=config.temperature,
85
+ max_completion_tokens=config.max_completion_tokens,
86
+ extra_body=build_openrouter_extra(config),
87
+ )
88
+ answer = resp.choices[0].message.content or ""
89
+
90
+ # Reasons: tool_calls, stop, length, content_filter, error
91
+ finish_reason = getattr(resp.choices[0], "native_finish_reason", None)
92
+ model_used = getattr(resp.choices[0], "model", None) or getattr(resp, "model", None)
93
+
94
+ logger.info(f"OpenRouter non-stream finish_reason: {finish_reason}")
95
+ if finish_reason == "length":
96
+ logger.warning("Response was truncated by token limit.")
97
+
98
+ model_used = getattr(resp.choices[0], "model", None) or getattr(resp, "model", None)
99
+ logger.info(f"OpenRouter non-stream finished. Model used: {model_used}")
100
+
101
+ return answer, model_used, finish_reason
102
+
103
+
104
+ @opik.track(name="stream_openrouter")
105
+ def stream_openrouter(
106
+ prompt: str,
107
+ config: ModelConfig,
108
+ selected_model: str | None = None,
109
+ ) -> AsyncGenerator[str, None]:
110
+ """Stream a response from OpenRouter for a given prompt and model configuration.
111
+
112
+ Args:
113
+ prompt (str): The input prompt.
114
+ config (ModelConfig): The model configuration.
115
+ selected_model (str | None): Optional specific model to use.
116
+
117
+ Returns:
118
+ AsyncGenerator[str, None]: An asynchronous generator yielding response chunks.
119
+
120
+ """
121
+
122
+ async def gen() -> AsyncGenerator[str, None]:
123
+ """Generate response chunks from OpenRouter.
124
+
125
+ Yields:
126
+ AsyncGenerator[str, None]: Response chunks.
127
+
128
+ """
129
+
130
+ model_to_use = selected_model or config.primary_model
131
+
132
+ stream = await async_openrouter_client.chat.completions.create(
133
+ model=model_to_use,
134
+ messages=build_messages(prompt),
135
+ temperature=config.temperature,
136
+ max_completion_tokens=config.max_completion_tokens,
137
+ extra_body=build_openrouter_extra(config),
138
+ stream=True,
139
+ )
140
+ try:
141
+ first_chunk = await stream.__anext__()
142
+ model_used = getattr(first_chunk, "model", None)
143
+ if model_used:
144
+ yield f"__model_used__:{model_used}"
145
+ delta_text = getattr(first_chunk.choices[0].delta, "content", None)
146
+ if delta_text:
147
+ yield delta_text
148
+ except StopAsyncIteration:
149
+ return
150
+
151
+ last_finish_reason = None
152
+ async for chunk in stream:
153
+ delta_text = getattr(chunk.choices[0].delta, "content", None)
154
+ if delta_text:
155
+ yield delta_text
156
+
157
+ # Reasons: tool_calls, stop, length, content_filter, error
158
+ finish_reason = getattr(chunk.choices[0], "finish_reason", None)
159
+
160
+ if finish_reason:
161
+ last_finish_reason = finish_reason
162
+
163
+ logger.info(f"OpenRouter stream finished. Model used: {model_used}")
164
+ logger.warning(f"Final finish_reason: {last_finish_reason}")
165
+
166
+ # Yield a chunk to trigger truncation warning in UI
167
+ if last_finish_reason == "length":
168
+ yield "__truncated__"
169
+
170
+ return gen()
171
+
172
+
173
+ # ---------------------------------------
174
+ # Test Log Probs and Confidence Visualization
175
+ # ---------------------------------------
176
+
177
+
178
+ # import math
179
+
180
+
181
+ # def visualize_token_confidence(token_probs: list[tuple[str, float]]):
182
+ # """Print token probabilities as ASCII bars in the terminal."""
183
+ # for tok, prob in token_probs:
184
+ # bar_length = int(prob * 40) # scale bar to 40 chars max
185
+ # bar = "#" * bar_length
186
+ # print(f"{tok:>12}: [{bar:<40}] {prob:.2%}")
187
+
188
+ # async def generate_openrouter(
189
+ # prompt: str,
190
+ # config: ModelConfig,
191
+ # max_tokens: int | None = None) -> tuple[str, str | None, str | None]:
192
+ # """Generate a response from OpenRouter
193
+ # and log token-level statistics with confidence evolution."""
194
+
195
+ # resp = await async_openrouter_client.chat.completions.create(
196
+ # model=config.primary_model,
197
+ # messages=build_messages(prompt),
198
+ # temperature=config.temperature,
199
+ # max_completion_tokens=max_tokens or config.max_completion_tokens,
200
+ # extra_body={**build_openrouter_extra(config), "logprobs": True, "top_logprobs": 3},
201
+ # )
202
+
203
+ # choice = resp.choices[0]
204
+ # content = choice.message.content or ""
205
+ # finish_reason = getattr(choice, "native_finish_reason", None)
206
+ # model_used = getattr(choice, "model", None) or getattr(resp, "model", None)
207
+
208
+ # logger.info(f"OpenRouter non-stream finish_reason: {finish_reason}")
209
+ # if finish_reason == "length":
210
+ # logger.warning("Response was truncated by token limit.")
211
+
212
+ # # Extract logprobs
213
+ # token_logprobs_list = choice.logprobs
214
+ # tokens_logprobs = []
215
+ # token_probs = []
216
+
217
+ # if token_logprobs_list and hasattr(token_logprobs_list, "content"):
218
+ # for token_info in token_logprobs_list.content:
219
+ # tok = token_info.token
220
+ # logprob = token_info.logprob
221
+ # prob = math.exp(logprob)
222
+
223
+ # tokens_logprobs.append(logprob)
224
+ # token_probs.append((tok, prob))
225
+
226
+
227
+ # if tokens_logprobs:
228
+ # avg_logprob = sum(tokens_logprobs) / len(tokens_logprobs)
229
+ # avg_prob = math.exp(avg_logprob)
230
+
231
+ # most_confident = sorted(token_probs, key=lambda x: x[1], reverse=True)[:5]
232
+ # least_confident = sorted(token_probs, key=lambda x: x[1])[:5]
233
+
234
+ # logger.info(f"Temperature: {config.temperature}")
235
+ # logger.info(f"Max completion tokens: {config.max_completion_tokens}")
236
+ # logger.info(f"Average log probability: {avg_logprob:.4f} "
237
+ # f"(≈ {avg_prob:.2%} avg token prob)")"
238
+
239
+ # logger.info("Top 5 most confident tokens:")
240
+ # for tok, prob in most_confident:
241
+ # logger.info(f" '{tok}' → {prob:.2%}")
242
+
243
+ # logger.info("Top 5 least confident tokens:")
244
+ # for tok, prob in least_confident:
245
+ # logger.info(f" '{tok}' → {prob:.2%}")
246
+
247
+ # # Terminal visualization
248
+ # print("\nToken confidence evolution:")
249
+ # visualize_token_confidence(token_probs,)
250
+
251
+ # else:
252
+ # logger.warning("No logprob information found in response.")
253
+
254
+ # return content, model_used, finish_reason
src/api/services/providers/utils/__init__.py ADDED
File without changes
src/api/services/providers/utils/evaluation_metrics.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opik.evaluation import models
2
+ from opik.evaluation.metrics import GEval
3
+
4
+ from src.config import settings
5
+ from src.utils.logger_util import setup_logging
6
+
7
+ logger = setup_logging()
8
+
9
+ # -----------------------
10
+ # Evaluation helper
11
+ # -----------------------
12
+
13
+
14
+ async def evaluate_metrics(output: str, context: str) -> dict:
15
+ """Evaluate multiple metrics for a given LLM output.
16
+ Metrics included: faithfulness, coherence, completeness.
17
+
18
+ Args:
19
+ output (str): The LLM-generated output to evaluate.
20
+ context (str): The context used to generate the output.
21
+
22
+ Returns:
23
+ dict: A dictionary with metric names as keys and their evaluation results as values.
24
+
25
+ """
26
+ settings.openai.api_key = None
27
+ logger.info(f"OpenAI key is not set: {settings.openai.api_key is None}")
28
+
29
+ if not output.strip():
30
+ logger.warning("Output is empty. Skipping evaluation.")
31
+ return {
32
+ "faithfulness": {"score": 0.0, "reason": "Empty output", "failed": True},
33
+ "coherence": {"score": 0.0, "reason": "Empty output", "failed": True},
34
+ "completeness": {"score": 0.0, "reason": "Empty output", "failed": True},
35
+ }
36
+
37
+ if not getattr(settings.openai, "api_key", None):
38
+ logger.info("OpenAI API key not set. Skipping metrics evaluation.")
39
+ return {
40
+ "faithfulness": {"score": None, "reason": "Skipped – no API key", "failed": True},
41
+ "coherence": {"score": None, "reason": "Skipped – no API key", "failed": True},
42
+ "completeness": {"score": None, "reason": "Skipped – no API key", "failed": True},
43
+ }
44
+
45
+ judge_model = models.LiteLLMChatModel(
46
+ model_name="gpt-4o", # gpt-4o, gpt-5-mini
47
+ api_key=settings.openai.api_key,
48
+ )
49
+
50
+ metric_configs = {
51
+ "faithfulness": (
52
+ (
53
+ "You are an expert judge tasked with evaluating whether an AI-generated answer is "
54
+ "faithful to the provided Substack excerpts."
55
+ ),
56
+ (
57
+ "The OUTPUT must not introduce new information and beyond "
58
+ "what is contained in the CONTEXT. "
59
+ "All claims in the OUTPUT should be directly supported by the CONTEXT."
60
+ ),
61
+ ),
62
+ "coherence": (
63
+ (
64
+ "You are an expert judge tasked with evaluating whether an AI-generated answer is "
65
+ "logically coherent."
66
+ ),
67
+ "The answer should be well-structured, readable, and maintain consistent reasoning.",
68
+ ),
69
+ "completeness": (
70
+ (
71
+ "You are an expert judge tasked with evaluating whether an AI-generated answer "
72
+ "covers all relevant aspects of the query."
73
+ ),
74
+ (
75
+ "The answer should include all major points from the CONTEXT "
76
+ "and address the user's "
77
+ "query "
78
+ "fully."
79
+ ),
80
+ ),
81
+ }
82
+
83
+ results = {}
84
+ for name, (task_intro, eval_criteria) in metric_configs.items():
85
+ try:
86
+ metric = GEval(
87
+ task_introduction=task_intro,
88
+ evaluation_criteria=eval_criteria,
89
+ model=judge_model,
90
+ name=f"G-Eval {name.capitalize()}",
91
+ )
92
+
93
+ eval_input = f"""
94
+ OUTPUT: {output}
95
+ CONTEXT: {context}
96
+ """
97
+
98
+ score_result = await metric.ascore(eval_input)
99
+
100
+ results[name] = {
101
+ "score": score_result.value,
102
+ "reason": score_result.reason,
103
+ "failed": score_result.scoring_failed,
104
+ }
105
+
106
+ except Exception as e:
107
+ logger.warning(f"G-Eval {name} failed: {e}")
108
+ results[name] = {"score": 0.0, "reason": str(e), "failed": True}
109
+
110
+ return results
src/api/services/providers/utils/messages.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai.types.chat import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
2
+
3
+
4
+ def build_messages(
5
+ prompt: str,
6
+ ) -> list[ChatCompletionSystemMessageParam | ChatCompletionUserMessageParam]:
7
+ """Build a list of messages for the OpenAI chat API.
8
+
9
+ Args:
10
+ prompt (str): The user prompt.
11
+
12
+ Returns:
13
+ list[ChatCompletionSystemMessageParam | ChatCompletionUserMessageParam]: A list of messages.
14
+
15
+ """
16
+ return [
17
+ ChatCompletionUserMessageParam(role="user", content=prompt),
18
+ ]
src/api/services/providers/utils/prompts.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import opik
2
+
3
+ from src.api.models.api_models import SearchResult
4
+ from src.api.models.provider_models import ModelConfig
5
+
6
+ config = ModelConfig()
7
+
8
+ PROMPT = """
9
+ You are a skilled research assistant specialized in analyzing Substack newsletters.
10
+ Respond to the user’s query using the provided context from these articles,
11
+ that is retrieved from a vector database without relying on outside knowledge or assumptions.
12
+
13
+
14
+ ### Output Rules:
15
+ - Write a detailed, structured answer using **Markdown** (headings, bullet points,
16
+ short or long paragraphs as appropriate).
17
+ - Use up to **{tokens} tokens** without exceeding this limit.
18
+ - Only include facts from the provided context from the articles.
19
+ - Attribute each fact to the correct author(s) and source, and include **clickable links**.
20
+ - If the article author and feed author differ, mention both.
21
+ - There is no need to mention that you based your answer on the provided context.
22
+ - But if no relevant information exists, clearly state this and provide a fallback suggestion.
23
+ - At the very end, include a **funny quote** and wish the user a great day.
24
+
25
+ ### Query:
26
+ {query}
27
+
28
+ ### Context Articles:
29
+ {context_texts}
30
+
31
+ ### Final Answer:
32
+ """
33
+
34
+
35
+ # Create a new prompt
36
+ prompt = opik.Prompt(
37
+ name="substack_research_assistant", prompt=PROMPT, metadata={"environment": "development"}
38
+ )
39
+
40
+
41
+ def build_research_prompt(
42
+ contexts: list[SearchResult],
43
+ query: str = "",
44
+ tokens: int = config.max_completion_tokens,
45
+ ) -> str:
46
+ """Construct a research-focused LLM prompt using the given query
47
+ and supporting context documents.
48
+
49
+ The prompt enforces Markdown formatting, citations, and strict length guidance.
50
+
51
+ Args:
52
+ contexts (list[SearchResult]): List of context documents with metadata.
53
+ query (str): The user's research query.
54
+ tokens (int): Maximum number of tokens for the LLM response.
55
+
56
+ Returns:
57
+ str: The formatted prompt ready for LLM consumption.
58
+
59
+ """
60
+ # Join all retrieved contexts into a readable format
61
+ context_texts = "\n\n".join(
62
+ (
63
+ f"- Feed Name: {r.feed_name}\n"
64
+ f" Article Title: {r.title}\n"
65
+ f" Article Author(s): {r.article_author}\n"
66
+ f" Feed Author: {r.feed_author}\n"
67
+ f" URL: {r.url}\n"
68
+ f" Snippet: {r.chunk_text}"
69
+ )
70
+ for r in contexts
71
+ )
72
+
73
+ return PROMPT.format(
74
+ query=query,
75
+ context_texts=context_texts,
76
+ tokens=tokens,
77
+ )
src/api/services/search_service.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import opik
2
+ from fastapi import Request
3
+ from qdrant_client.models import (
4
+ FieldCondition,
5
+ Filter,
6
+ Fusion,
7
+ FusionQuery,
8
+ MatchText,
9
+ MatchValue,
10
+ Prefetch,
11
+ )
12
+
13
+ from src.api.models.api_models import SearchResult
14
+ from src.infrastructure.qdrant.qdrant_vectorstore import AsyncQdrantVectorStore
15
+ from src.utils.logger_util import setup_logging
16
+
17
+ logger = setup_logging()
18
+
19
+
20
+ @opik.track(name="query_with_filters")
21
+ async def query_with_filters(
22
+ request: Request,
23
+ query_text: str = "",
24
+ feed_author: str | None = None,
25
+ feed_name: str | None = None,
26
+ title_keywords: str | None = None,
27
+ limit: int = 5,
28
+ ) -> list[SearchResult]:
29
+ """Query the vector store with optional filters and return search results.
30
+
31
+ Performs a hybrid dense + sparse search on Qdrant and applies filters based
32
+ on feed author, feed name, and title keywords. Results are deduplicated by point ID.
33
+
34
+ Args:
35
+ request (Request): FastAPI request object containing the vector store in app.state.
36
+ query_text (str): Text query to search for.
37
+ feed_author (str | None): Optional filter for the feed author.
38
+ feed_name (str | None): Optional filter for the feed name.
39
+ title_keywords (str | None): Optional filter for title keywords.
40
+ limit (int): Maximum number of results to return.
41
+
42
+ Returns:
43
+ list[SearchResult]:
44
+ List of search results containing title, feed info, URL, chunk text, and score.
45
+
46
+ """
47
+ vectorstore: AsyncQdrantVectorStore = request.app.state.vectorstore
48
+ dense_vector = vectorstore.dense_vectors([query_text])[0]
49
+ sparse_vector = vectorstore.sparse_vectors([query_text])[0]
50
+
51
+ # Build filter conditions
52
+ conditions: list[FieldCondition] = []
53
+ if feed_author:
54
+ conditions.append(FieldCondition(key="feed_author", match=MatchValue(value=feed_author)))
55
+ if feed_name:
56
+ conditions.append(FieldCondition(key="feed_name", match=MatchValue(value=feed_name)))
57
+ if title_keywords:
58
+ conditions.append(
59
+ FieldCondition(key="title", match=MatchText(text=title_keywords.strip().lower()))
60
+ )
61
+
62
+ query_filter = Filter(must=conditions) if conditions else None # type: ignore
63
+
64
+ fetch_limit = max(1, limit) * 100
65
+ logger.info(f"Fetching up to {fetch_limit} points for unique Ids.")
66
+
67
+ response = await vectorstore.client.query_points(
68
+ collection_name=vectorstore.collection_name,
69
+ query=FusionQuery(fusion=Fusion.RRF),
70
+ prefetch=[
71
+ Prefetch(query=dense_vector, using="Dense", limit=fetch_limit, filter=query_filter),
72
+ Prefetch(query=sparse_vector, using="Sparse", limit=fetch_limit, filter=query_filter),
73
+ ],
74
+ query_filter=query_filter,
75
+ limit=fetch_limit,
76
+ )
77
+
78
+ # Deduplicate by point ID
79
+ seen_ids: set[str] = set()
80
+ results: list[SearchResult] = []
81
+ for point in response.points:
82
+ if point.id in seen_ids:
83
+ continue
84
+ seen_ids.add(point.id) # type: ignore
85
+ payload = point.payload or {}
86
+ results.append(
87
+ SearchResult(
88
+ title=payload.get("title", ""),
89
+ feed_author=payload.get("feed_author"),
90
+ feed_name=payload.get("feed_name"),
91
+ article_author=payload.get("article_authors"),
92
+ url=payload.get("url"),
93
+ chunk_text=payload.get("chunk_text"),
94
+ score=point.score,
95
+ )
96
+ )
97
+
98
+ results = results[:limit]
99
+ logger.info(f"Returning {len(results)} results for matching query '{query_text}'")
100
+ return results
101
+
102
+
103
+ @opik.track(name="query_unique_titles")
104
+ async def query_unique_titles(
105
+ request: Request,
106
+ query_text: str,
107
+ feed_author: str | None = None,
108
+ feed_name: str | None = None,
109
+ title_keywords: str | None = None,
110
+ limit: int = 5,
111
+ ) -> list[SearchResult]:
112
+ """Query the vector store and return only unique titles.
113
+
114
+ Performs a hybrid dense + sparse search with optional filters and dynamically
115
+ increases the fetch limit to account for duplicates. Deduplicates results
116
+ by article title.
117
+
118
+ Args:
119
+ request (Request): FastAPI request object containing the vector store in app.state.
120
+ query_text (str): Text query to search for.
121
+ feed_author (str | None): Optional filter for the feed author.
122
+ feed_name (str | None): Optional filter for the feed name.
123
+ title_keywords (str | None): Optional filter for title keywords.
124
+ limit (int): Maximum number of unique results to return.
125
+
126
+ Returns:
127
+ list[SearchResult]:
128
+ List of unique search results containing title, feed info, URL, chunk text, and score.
129
+
130
+ """
131
+ vectorstore: AsyncQdrantVectorStore = request.app.state.vectorstore
132
+ dense_vector = vectorstore.dense_vectors([query_text])[0]
133
+ sparse_vector = vectorstore.sparse_vectors([query_text])[0]
134
+
135
+ # Build filter conditions
136
+ conditions: list[FieldCondition] = []
137
+ if feed_author:
138
+ conditions.append(FieldCondition(key="feed_author", match=MatchValue(value=feed_author)))
139
+ if feed_name:
140
+ conditions.append(FieldCondition(key="feed_name", match=MatchValue(value=feed_name)))
141
+ if title_keywords:
142
+ conditions.append(
143
+ FieldCondition(key="title", match=MatchText(text=title_keywords.strip().lower()))
144
+ )
145
+
146
+ query_filter = Filter(must=conditions) if conditions else None # type: ignore
147
+
148
+ fetch_limit = max(1, limit) * 280
149
+ logger.info(f"Fetching up to {fetch_limit} points for unique titles.")
150
+
151
+ response = await vectorstore.client.query_points(
152
+ collection_name=vectorstore.collection_name,
153
+ query=FusionQuery(fusion=Fusion.RRF),
154
+ prefetch=[
155
+ Prefetch(query=dense_vector, using="Dense", limit=fetch_limit, filter=query_filter),
156
+ Prefetch(query=sparse_vector, using="Sparse", limit=fetch_limit, filter=query_filter),
157
+ ],
158
+ query_filter=query_filter,
159
+ limit=fetch_limit,
160
+ )
161
+
162
+ # Deduplicate by title
163
+ seen_titles: set[str] = set()
164
+ results: list[SearchResult] = []
165
+ for point in response.points:
166
+ payload = point.payload or {}
167
+ title = payload.get("title")
168
+ if not title or title in seen_titles:
169
+ continue
170
+ seen_titles.add(title)
171
+ results.append(
172
+ SearchResult(
173
+ title=title,
174
+ feed_author=payload.get("feed_author"),
175
+ feed_name=payload.get("feed_name"),
176
+ article_author=payload.get("article_authors"),
177
+ url=payload.get("url"),
178
+ chunk_text=payload.get("chunk_text"),
179
+ score=point.score,
180
+ )
181
+ )
182
+ if len(results) >= limit:
183
+ break
184
+
185
+ logger.info(f"Returning {len(results)} unique title results for matching query '{query_text}'")
186
+
187
+ # logger.info(f"results: {results}")
188
+ return results
src/config.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import ClassVar
3
+
4
+ import yaml
5
+ from pydantic import BaseModel, Field, SecretStr, model_validator
6
+ from pydantic_settings import BaseSettings, SettingsConfigDict
7
+
8
+ from src.models.article_models import FeedItem
9
+
10
+
11
+ # -----------------------------
12
+ # Supabase database settings
13
+ # -----------------------------
14
+ class SupabaseDBSettings(BaseModel):
15
+ table_name: str = Field(default="substack_articles", description="Supabase table name")
16
+ host: str = Field(default="localhost", description="Database host")
17
+ name: str = Field(default="postgres", description="Database name")
18
+ user: str = Field(default="postgres", description="Database user")
19
+ password: SecretStr = Field(default=SecretStr("password"), description="Database password")
20
+ port: int = Field(default=6543, description="Database port")
21
+ test_database: str = Field(default="substack_test", description="Test database name")
22
+
23
+
24
+ # -----------------------------
25
+ # RSS settings
26
+ # -----------------------------
27
+ class RSSSettings(BaseModel):
28
+ feeds: list[FeedItem] = Field(
29
+ default_factory=list[FeedItem], description="List of RSS feed items"
30
+ )
31
+ default_start_date: str = Field(default="2025-09-15", description="Default cutoff date")
32
+ batch_size: int = Field(
33
+ default=5, description="Number of articles to parse and ingest in a batch"
34
+ )
35
+
36
+
37
+ # -----------------------------
38
+ # Qdrant settings
39
+ # -----------------------------
40
+ # BAAI/bge-large-en-v1.5 (1024), BAAI/bge-base-en-v1.5 (HF, 768). BAAI/bge-base-en (Fastembed, 768)
41
+ class QdrantSettings(BaseModel):
42
+ url: str = Field(default="", description="Qdrant API URL")
43
+ api_key: str = Field(default="", description="Qdrant API key")
44
+ timeout: int = Field(default=30, description="Qdrant client timeout")
45
+ collection_name: str = Field(
46
+ default="substack_collection", description="Qdrant collection name"
47
+ )
48
+ dense_model_name: str = Field(default="BAAI/bge-base-en", description="Dense model name")
49
+ sparse_model_name: str = Field(
50
+ default="Qdrant/bm25", description="Sparse model name"
51
+ ) # prithivida/Splade_PP_en_v1 (larger)
52
+ vector_dim: int = Field(
53
+ default=768,
54
+ description="Vector dimension", # 768, 1024 with Jina or large HF
55
+ )
56
+ article_batch_size: int = Field(
57
+ default=5, description="Number of articles to parse and ingest in a batch"
58
+ )
59
+ sparse_batch_size: int = Field(default=32, description="Sparse batch size")
60
+ embed_batch_size: int = Field(default=50, description="Dense embedding batch")
61
+ upsert_batch_size: int = Field(default=25, description="Batch size for Qdrant upsert")
62
+ max_concurrent: int = Field(default=2, description="Maximum number of concurrent tasks")
63
+
64
+
65
+ # -----------------------------
66
+ # Text splitting
67
+ # -----------------------------
68
+ class TextSplitterSettings(BaseModel):
69
+ chunk_size: int = Field(default=4000, description="Size of text chunks")
70
+ chunk_overlap: int = Field(default=200, description="Size of text chunks")
71
+ separators: list[str] = Field(
72
+ default_factory=lambda: [
73
+ "\n---\n",
74
+ "\n\n",
75
+ "\n```\n",
76
+ "\n## ",
77
+ "\n# ",
78
+ "\n**",
79
+ "\n",
80
+ ". ",
81
+ "! ",
82
+ "? ",
83
+ " ",
84
+ "",
85
+ ],
86
+ description="List of separators for text splitting. The order or separators matter",
87
+ )
88
+
89
+
90
+ # -----------------------------
91
+ # Jina Settings
92
+ # -----------------------------
93
+ class JinaSettings(BaseModel):
94
+ api_key: str = Field(default="", description="Jina API key")
95
+ url: str = Field(default="https://api.jina.ai/v1/embeddings", description="Jina API URL")
96
+ model: str = Field(default="jina-embeddings-v3", description="Jina model name") # 1024
97
+
98
+
99
+ # -----------------------------
100
+ # Hugging Face Settings
101
+ # -----------------------------
102
+ # BAAI/bge-large-en-v1.5 (1024), BAAI/bge-base-en-v1.5 (768)
103
+ class HuggingFaceSettings(BaseModel):
104
+ api_key: str = Field(default="", description="Hugging Face API key")
105
+ model: str = Field(default="BAAI/bge-base-en-v1.5", description="Hugging Face model name")
106
+
107
+
108
+ # -----------------------------
109
+ # Openai Settings
110
+ # -----------------------------
111
+ class OpenAISettings(BaseModel):
112
+ api_key: str | None = Field(default="", description="OpenAI API key")
113
+ # model: str = Field(default="gpt-4o-mini", description="OpenAI model name")
114
+
115
+
116
+ # -----------------------------
117
+ # OpenRouter Settings
118
+ # -----------------------------
119
+ class OpenRouterSettings(BaseModel):
120
+ api_key: str = Field(default="", description="OpenRouter API key")
121
+ api_url: str = Field(default="https://openrouter.ai/api/v1", description="OpenRouter API URL")
122
+
123
+
124
+ # -----------------------------
125
+ # Opik Observability Settings
126
+ # -----------------------------
127
+ class OpikObservabilitySettings(BaseModel):
128
+ api_key: str = Field(default="", description="Opik Observability API key")
129
+ project_name: str = Field(default="substack-pipeline", description="Opik project name")
130
+
131
+
132
+ # -----------------------------
133
+ # YAML loader
134
+ # -----------------------------
135
+ def load_yaml_feeds(path: str) -> list[FeedItem]:
136
+ """
137
+ Load RSS feed items from a YAML file.
138
+ If the file does not exist or is empty, returns an empty list.
139
+
140
+ Args:
141
+ path (str): Path to the YAML file.
142
+
143
+ Returns:
144
+ list[FeedItem]: List of FeedItem instances loaded from the file.
145
+ """
146
+ if not os.path.exists(path):
147
+ return []
148
+ with open(path, encoding="utf-8") as f:
149
+ data = yaml.safe_load(f)
150
+ feed_list = data.get("feeds", [])
151
+ return [FeedItem(**feed) for feed in feed_list]
152
+
153
+
154
+ # -----------------------------
155
+ # Main Settings
156
+ # -----------------------------
157
+ class Settings(BaseSettings):
158
+ supabase_db: SupabaseDBSettings = Field(default_factory=SupabaseDBSettings)
159
+ qdrant: QdrantSettings = Field(default_factory=QdrantSettings)
160
+ rss: RSSSettings = Field(default_factory=RSSSettings)
161
+ text_splitter: TextSplitterSettings = Field(default_factory=TextSplitterSettings)
162
+
163
+ jina: JinaSettings = Field(default_factory=JinaSettings)
164
+ hugging_face: HuggingFaceSettings = Field(default_factory=HuggingFaceSettings)
165
+ openai: OpenAISettings = Field(default_factory=OpenAISettings)
166
+ openrouter: OpenRouterSettings = Field(default_factory=OpenRouterSettings)
167
+ opik: OpikObservabilitySettings = Field(default_factory=OpikObservabilitySettings)
168
+
169
+ rss_config_yaml_path: str = "src/configs/feeds_rss.yaml"
170
+
171
+ # Pydantic v2 model config
172
+ model_config: ClassVar[SettingsConfigDict] = SettingsConfigDict(
173
+ env_file=[".env"],
174
+ env_file_encoding="utf-8",
175
+ extra="ignore",
176
+ env_nested_delimiter="__",
177
+ case_sensitive=False,
178
+ frozen=True,
179
+ )
180
+
181
+ @model_validator(mode="after")
182
+ def load_yaml_rss_feeds(self) -> "Settings":
183
+ """
184
+ Load RSS feeds from a YAML file after model initialization.
185
+ If the file does not exist or is empty, the feeds list remains unchanged.
186
+
187
+ Args:
188
+ self (Settings): The settings instance.
189
+
190
+ Returns:
191
+ Settings: The updated settings instance.
192
+ """
193
+ yaml_feeds = load_yaml_feeds(self.rss_config_yaml_path)
194
+ if yaml_feeds:
195
+ self.rss.feeds = yaml_feeds
196
+ return self
197
+
198
+
199
+ # -----------------------------
200
+ # Instantiate settings
201
+ # -----------------------------
202
+ settings = Settings()
src/configs/feeds_rss.yaml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ feeds:
2
+ - name: "AI Echoes"
3
+ author: "Benito Martin"
4
+ url: "https://aiechoes.substack.com/feed"
5
+ - name: "The Neural Maze"
6
+ author: "Miguel Otero"
7
+ url: "https://theneuralmaze.substack.com/feed"
8
+ - name: "Decoding ML"
9
+ author: "Paul Iusztin"
10
+ url: "https://decodingml.substack.com/feed"
11
+ - name: "Swirl AI Newsletter"
12
+ author: "Aurimas Griciūnas"
13
+ url: "https://www.newsletter.swirlai.com/feed"
14
+ - name: "Marvelous MLOps Substack"
15
+ author: "Başak Tuğçe Eskili and Maria Vechtomova"
16
+ url: "https://marvelousmlops.substack.com/feed"
17
+ - name: "Jam with AI"
18
+ author: "Shirin Khosravi Jam and Shantanu Ladhwe"
19
+ url: "https://jamwithai.substack.com/feed"
20
+ - name: "Hamel's Substack"
21
+ author: "Hamel Husain"
22
+ url: "https://hamelhusain.substack.com/feed"
23
+ - name: "Neural Bits"
24
+ author: "Alex Razvant"
25
+ url: "https://multimodalai.substack.com/feed"
26
+ - name: "DiamantAI"
27
+ author: "Nir Diamant"
28
+ url: "https://diamantai.substack.com/feed"
29
+ - name: "ByteByteGo Newsletter"
30
+ author: "Alex Xu"
31
+ url: "https://blog.bytebytego.com/feed"
32
+ - name: "Latent.Space"
33
+ author: "Latent.Space"
34
+ url: "https://www.latent.space/feed"
35
+ - name: "Adaline Labs"
36
+ author: "Adaline"
37
+ url: "https://labs.adaline.ai/feed"
38
+ - name: "Gradient Ascent"
39
+ author: "Sairam Sundaresan"
40
+ url: "https://newsletter.artofsaience.com/feed"
41
+ - name: "Daily Dose of Data Science"
42
+ author: "Avi Chawla"
43
+ url: "https://blog.dailydoseofds.com/feed"
44
+ - name: "Generative AI for Everyone"
45
+ author: "Hamza Farooq"
46
+ url: "https://boringbot.substack.com/feed"
47
+ - name: "Vizuara's AI Newsletter"
48
+ author: "Vizuara AI Labs"
49
+ url: "https://www.vizuaranewsletter.com/feed"
50
+ - name: "Deep (Learning) Focus"
51
+ author: "Cameron R. Wolfe, Ph.D."
52
+ url: "https://cameronrwolfe.substack.com/feed"
53
+ - name: "Language Models & Co."
54
+ author: "Jay Alammar"
55
+ url: "https://newsletter.languagemodels.co/feed"
56
+ - name: "Exploring Language Models"
57
+ author: "Maarten Grootendorst"
58
+ url: "https://newsletter.maartengrootendorst.com/feed"
59
+ - name: "Hyperplane"
60
+ author: "Cube Digital"
61
+ url: "https://thehyperplane.substack.com/feed"
62
+ - name: "ModelCraft"
63
+ author: "Abi Aryan"
64
+ url: "https://modelcraft.substack.com/feed"
65
+ - name: "NeoSage"
66
+ author: "Shivani Virdi"
67
+ url: "https://blog.neosage.io/feed"
68
+ - name: "Nnitiwe's AI Blog"
69
+ author: "Samuel Theophilus"
70
+ url: "https://blog.nnitiwe.io/feed"
71
+ - name: "The Palindrome"
72
+ author: "Tivadar Danka"
73
+ url: "https://thepalindrome.org/feed"
74
+ - name: "Python & Chill"
75
+ author: "Banias Baabe"
76
+ url: "https://pythonandchill.substack.com/feed"
77
+ - name: "Rami's Data Newsletter"
78
+ author: "Rami Krispin"
79
+ url: "https://ramikrispin.substack.com/feed"
80
+ - name: "To Data & Beyond"
81
+ author: "Youssef Hosni"
82
+ url: "https://youssefh.substack.com/feed"
83
+ - name: "Vanishing Gradients"
84
+ author: "Hugo Bowne-Anderson"
85
+ url: "https://hugobowne.substack.com/feed"
86
+ - name: "When Engineers meet AI"
87
+ author: "Kannan Kalidasan"
88
+ url: "https://engineersmeetai.substack.com/feed"
89
+ - name: "slys.dev"
90
+ author: "Anna & Jakub Slys"
91
+ url: "https://iam.slys.dev/feed"
src/infrastructure/__init__.py ADDED
File without changes
src/infrastructure/qdrant/__init__.py ADDED
File without changes
src/infrastructure/qdrant/create_collection.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+
3
+ from src.infrastructure.qdrant.qdrant_vectorstore import AsyncQdrantVectorStore
4
+ from src.utils.logger_util import setup_logging
5
+
6
+ logger = setup_logging()
7
+
8
+
9
+ async def main() -> None:
10
+ """Create a Qdrant collection asynchronously using AsyncQdrantVectorStore.
11
+
12
+ This function initializes an AsyncQdrantVectorStore instance and calls its
13
+ create_collection method to set up a Qdrant collection for vector storage.
14
+ Errors during collection creation are logged
15
+ and handled gracefully.
16
+
17
+ Args:
18
+ None
19
+
20
+ Returns:
21
+ None
22
+
23
+ Raises:
24
+ RuntimeError: If an error occurs during Qdrant collection creation.
25
+ Exception: For unexpected errors during execution.
26
+
27
+ """
28
+ # Initialize the logger
29
+ logger.info("Creating Qdrant collection")
30
+
31
+ try:
32
+ # Initialize the AsyncQdrantVectorStore instance
33
+ vectorstore = AsyncQdrantVectorStore()
34
+ # Create the Qdrant collection asynchronously
35
+ await vectorstore.create_collection()
36
+ logger.info("Qdrant collection created successfully")
37
+
38
+ except RuntimeError as e:
39
+ logger.error(f"Failed to create Qdrant collection: {e}")
40
+ raise RuntimeError("Error creating Qdrant collection") from e
41
+ except Exception as e:
42
+ logger.error(f"Unexpected error during Qdrant collection creation: {e}")
43
+ raise
44
+
45
+
46
+ if __name__ == "__main__":
47
+ asyncio.run(main())
src/infrastructure/qdrant/create_indexes.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+
3
+ from src.infrastructure.qdrant.qdrant_vectorstore import AsyncQdrantVectorStore
4
+ from src.utils.logger_util import setup_logging
5
+
6
+ logger = setup_logging()
7
+
8
+
9
+ async def main() -> None:
10
+ """Create necessary indexes for the Qdrant vector store.
11
+
12
+ Initializes an AsyncQdrantVectorStore and creates HNSW, title, article authors,
13
+ feed author, and feed name indexes. Logs errors and ensures proper execution.
14
+
15
+ Args:
16
+ None
17
+
18
+ Returns:
19
+ None
20
+
21
+ Raises:
22
+ RuntimeError: If an error occurs during index creation.
23
+ Exception: For unexpected errors during execution.
24
+
25
+ """
26
+ logger.info("Creating Qdrant indexes")
27
+ try:
28
+ vectorstore = AsyncQdrantVectorStore()
29
+ await vectorstore.enable_hnsw()
30
+ await vectorstore.create_title_index()
31
+ await vectorstore.create_article_authors_index()
32
+ await vectorstore.create_feed_author_index()
33
+ await vectorstore.create_article_feed_name_index()
34
+ logger.info("Qdrant indexes created successfully")
35
+ except RuntimeError as e:
36
+ logger.error(f"Failed to create Qdrant indexes: {e}")
37
+ raise RuntimeError("Error creating Qdrant indexes") from e
38
+ except Exception as e:
39
+ logger.error(f"Unexpected error creating Qdrant indexes: {e}")
40
+ raise
41
+
42
+
43
+ if __name__ == "__main__":
44
+ asyncio.run(main())