Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
·
7320bf1
1
Parent(s):
d6be5fa
Download embedding model during app startup
Browse files- app.py +10 -1
- graph.py +2 -1
- retriever.py +22 -6
app.py
CHANGED
|
@@ -27,6 +27,11 @@ print(f"Downloading/loading checkpoints for {model_id}...")
|
|
| 27 |
ckpt_dir = snapshot_download(model_id, local_dir_use_symlinks=False)
|
| 28 |
print(f"Using checkpoints from {ckpt_dir}")
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
# Global setting for search type
|
| 31 |
search_type = "hybrid"
|
| 32 |
|
|
@@ -92,7 +97,11 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
|
|
| 92 |
# Get the chat model and build the graph
|
| 93 |
chat_model = GetChatModel(compute_mode, ckpt_dir)
|
| 94 |
graph_builder = BuildGraph(
|
| 95 |
-
chat_model,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
)
|
| 97 |
# Compile the graph with an in-memory checkpointer
|
| 98 |
memory = MemorySaver()
|
|
|
|
| 27 |
ckpt_dir = snapshot_download(model_id, local_dir_use_symlinks=False)
|
| 28 |
print(f"Using checkpoints from {ckpt_dir}")
|
| 29 |
|
| 30 |
+
embedding_model_id = "nomic-ai/nomic-embed-text-v1.5"
|
| 31 |
+
print(f"Downloading/loading checkpoints for {embedding_model_id}...")
|
| 32 |
+
embedding_ckpt_dir = snapshot_download(embedding_model_id, local_dir_use_symlinks=False)
|
| 33 |
+
print(f"Using embedding checkpoints from {embedding_ckpt_dir}")
|
| 34 |
+
|
| 35 |
# Global setting for search type
|
| 36 |
search_type = "hybrid"
|
| 37 |
|
|
|
|
| 97 |
# Get the chat model and build the graph
|
| 98 |
chat_model = GetChatModel(compute_mode, ckpt_dir)
|
| 99 |
graph_builder = BuildGraph(
|
| 100 |
+
chat_model,
|
| 101 |
+
compute_mode,
|
| 102 |
+
search_type,
|
| 103 |
+
think_answer=True,
|
| 104 |
+
embedding_ckpt_dir=embedding_ckpt_dir,
|
| 105 |
)
|
| 106 |
# Compile the graph with an in-memory checkpointer
|
| 107 |
memory = MemorySaver()
|
graph.py
CHANGED
|
@@ -95,6 +95,7 @@ def BuildGraph(
|
|
| 95 |
top_k=6,
|
| 96 |
think_query=False,
|
| 97 |
think_answer=False,
|
|
|
|
| 98 |
):
|
| 99 |
"""
|
| 100 |
Build conversational RAG graph for email retrieval and answering with citations.
|
|
@@ -157,7 +158,7 @@ def BuildGraph(
|
|
| 157 |
end_year: Ending year for emails (optional)
|
| 158 |
"""
|
| 159 |
retriever = BuildRetriever(
|
| 160 |
-
compute_mode, search_type, top_k, start_year, end_year
|
| 161 |
)
|
| 162 |
# For now, just add the months to the search query
|
| 163 |
if months:
|
|
|
|
| 95 |
top_k=6,
|
| 96 |
think_query=False,
|
| 97 |
think_answer=False,
|
| 98 |
+
embedding_ckpt_dir=None,
|
| 99 |
):
|
| 100 |
"""
|
| 101 |
Build conversational RAG graph for email retrieval and answering with citations.
|
|
|
|
| 158 |
end_year: Ending year for emails (optional)
|
| 159 |
"""
|
| 160 |
retriever = BuildRetriever(
|
| 161 |
+
compute_mode, search_type, top_k, start_year, end_year, embedding_ckpt_dir
|
| 162 |
)
|
| 163 |
# For now, just add the months to the search query
|
| 164 |
if months:
|
retriever.py
CHANGED
|
@@ -36,6 +36,7 @@ def BuildRetriever(
|
|
| 36 |
top_k=6,
|
| 37 |
start_year=None,
|
| 38 |
end_year=None,
|
|
|
|
| 39 |
):
|
| 40 |
"""
|
| 41 |
Build retriever instance.
|
|
@@ -51,10 +52,14 @@ def BuildRetriever(
|
|
| 51 |
if search_type == "dense":
|
| 52 |
if not (start_year or end_year):
|
| 53 |
# No year filtering, so directly use base retriever
|
| 54 |
-
return BuildRetrieverDense(
|
|
|
|
|
|
|
| 55 |
else:
|
| 56 |
# Get 1000 documents then keep top_k filtered by year
|
| 57 |
-
base_retriever = BuildRetrieverDense(
|
|
|
|
|
|
|
| 58 |
return TopKRetriever(
|
| 59 |
base_retriever=base_retriever,
|
| 60 |
top_k=top_k,
|
|
@@ -78,10 +83,20 @@ def BuildRetriever(
|
|
| 78 |
# Use floor (top_k // 2) and ceiling -(top_k // -2) to divide odd values of top_k
|
| 79 |
# https://stackoverflow.com/questions/14822184/is-there-a-ceiling-equivalent-of-operator-in-python
|
| 80 |
dense_retriever = BuildRetriever(
|
| 81 |
-
compute_mode,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
)
|
| 83 |
sparse_retriever = BuildRetriever(
|
| 84 |
-
compute_mode,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
)
|
| 86 |
ensemble_retriever = EnsembleRetriever(
|
| 87 |
retrievers=[dense_retriever, sparse_retriever], weights=[1, 1]
|
|
@@ -111,7 +126,7 @@ def BuildRetrieverSparse(top_k=6):
|
|
| 111 |
return retriever
|
| 112 |
|
| 113 |
|
| 114 |
-
def BuildRetrieverDense(compute_mode: str, top_k=6):
|
| 115 |
"""
|
| 116 |
Build dense retriever instance with ChromaDB vectorstore
|
| 117 |
|
|
@@ -131,13 +146,14 @@ def BuildRetrieverDense(compute_mode: str, top_k=6):
|
|
| 131 |
# embedding_function = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5", show_progress=True)
|
| 132 |
# https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.huggingface.HuggingFaceBgeEmbeddings.html
|
| 133 |
model_name = "nomic-ai/nomic-embed-text-v1.5"
|
|
|
|
| 134 |
model_kwargs = {
|
| 135 |
"device": "cuda",
|
| 136 |
"trust_remote_code": True,
|
| 137 |
}
|
| 138 |
encode_kwargs = {"normalize_embeddings": True}
|
| 139 |
embedding_function = HuggingFaceBgeEmbeddings(
|
| 140 |
-
model_name=
|
| 141 |
model_kwargs=model_kwargs,
|
| 142 |
encode_kwargs=encode_kwargs,
|
| 143 |
query_instruction="search_query:",
|
|
|
|
| 36 |
top_k=6,
|
| 37 |
start_year=None,
|
| 38 |
end_year=None,
|
| 39 |
+
embedding_ckpt_dir=None,
|
| 40 |
):
|
| 41 |
"""
|
| 42 |
Build retriever instance.
|
|
|
|
| 52 |
if search_type == "dense":
|
| 53 |
if not (start_year or end_year):
|
| 54 |
# No year filtering, so directly use base retriever
|
| 55 |
+
return BuildRetrieverDense(
|
| 56 |
+
compute_mode, top_k=top_k, embedding_ckpt_dir=embedding_ckpt_dir
|
| 57 |
+
)
|
| 58 |
else:
|
| 59 |
# Get 1000 documents then keep top_k filtered by year
|
| 60 |
+
base_retriever = BuildRetrieverDense(
|
| 61 |
+
compute_mode, top_k=1000, embedding_ckpt_dir=embedding_ckpt_dir
|
| 62 |
+
)
|
| 63 |
return TopKRetriever(
|
| 64 |
base_retriever=base_retriever,
|
| 65 |
top_k=top_k,
|
|
|
|
| 83 |
# Use floor (top_k // 2) and ceiling -(top_k // -2) to divide odd values of top_k
|
| 84 |
# https://stackoverflow.com/questions/14822184/is-there-a-ceiling-equivalent-of-operator-in-python
|
| 85 |
dense_retriever = BuildRetriever(
|
| 86 |
+
compute_mode,
|
| 87 |
+
"dense",
|
| 88 |
+
(top_k // 2),
|
| 89 |
+
start_year,
|
| 90 |
+
end_year,
|
| 91 |
+
embedding_ckpt_dir,
|
| 92 |
)
|
| 93 |
sparse_retriever = BuildRetriever(
|
| 94 |
+
compute_mode,
|
| 95 |
+
"sparse",
|
| 96 |
+
-(top_k // -2),
|
| 97 |
+
start_year,
|
| 98 |
+
end_year,
|
| 99 |
+
embedding_ckpt_dir,
|
| 100 |
)
|
| 101 |
ensemble_retriever = EnsembleRetriever(
|
| 102 |
retrievers=[dense_retriever, sparse_retriever], weights=[1, 1]
|
|
|
|
| 126 |
return retriever
|
| 127 |
|
| 128 |
|
| 129 |
+
def BuildRetrieverDense(compute_mode: str, top_k=6, embedding_ckpt_dir=None):
|
| 130 |
"""
|
| 131 |
Build dense retriever instance with ChromaDB vectorstore
|
| 132 |
|
|
|
|
| 146 |
# embedding_function = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5", show_progress=True)
|
| 147 |
# https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.huggingface.HuggingFaceBgeEmbeddings.html
|
| 148 |
model_name = "nomic-ai/nomic-embed-text-v1.5"
|
| 149 |
+
id_or_dir = embedding_ckpt_dir if embedding_ckpt_dir else model_name
|
| 150 |
model_kwargs = {
|
| 151 |
"device": "cuda",
|
| 152 |
"trust_remote_code": True,
|
| 153 |
}
|
| 154 |
encode_kwargs = {"normalize_embeddings": True}
|
| 155 |
embedding_function = HuggingFaceBgeEmbeddings(
|
| 156 |
+
model_name=id_or_dir,
|
| 157 |
model_kwargs=model_kwargs,
|
| 158 |
encode_kwargs=encode_kwargs,
|
| 159 |
query_instruction="search_query:",
|