Spaces:

MedSwin
/

MedicalDiagnosisSystem

Runtime error

App Files Files Community

dylanglenister commited on Oct 19, 2025

Commit

833527f

1 Parent(s): 3d81965

FEAT: Reranker file.

Browse files

Uses an nvidia reranker model to find the most relevant information.

Files changed (2) hide show

src/config/settings.py +3 -0
src/services/reranker.py +70 -0

src/config/settings.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # src/config/settings.py
 import os
 class Settings:
 	"""Application-wide settings."""
 	# Memory settings
@@ -9,6 +10,8 @@ class Settings:
 	SEMANTIC_CONTEXT_SIZE: int = 17
 	SIMILARITY_THRESHOLD: float = 0.15
 	EMBEDDING_MODEL_NAME: str = "MedEmbed-large-v0.1"
 	# Safety Guard settings
 	SAFETY_GUARD_ENABLED: bool = os.getenv("SAFETY_GUARD_ENABLED", "true").lower() == "true"

 # src/config/settings.py
 import os
 class Settings:
 	"""Application-wide settings."""
 	# Memory settings
 	SEMANTIC_CONTEXT_SIZE: int = 17
 	SIMILARITY_THRESHOLD: float = 0.15
 	EMBEDDING_MODEL_NAME: str = "MedEmbed-large-v0.1"
+	NVIDIA_RERANKER_MODEL: str = "rerank-qa-mistral-4b"
+	NVIDIA_RERANKER_ENDPOINT: str = "" # TODO
 	# Safety Guard settings
 	SAFETY_GUARD_ENABLED: bool = os.getenv("SAFETY_GUARD_ENABLED", "true").lower() == "true"

src/services/reranker.py ADDED Viewed

	@@ -0,0 +1,70 @@

+# src/services/reranker.py
+from src.config.settings import settings
+from src.models.information import InfoChunk
+from src.utils.logger import logger
+from src.utils.rotator import APIKeyRotator, robust_post_json
+async def rerank_documents(
+	query: str,
+	documents: list[InfoChunk],
+	rotator: APIKeyRotator,
+	top_k: int = 3,
+) -> list[InfoChunk]:
+	"""
+	Reranks a list of documents based on a query using the NVIDIA Rerank API.
+	Args:
+		query: The user's query string.
+		documents: A list of InfoChunk objects retrieved from the initial search.
+		rotator: The API key rotator for NVIDIA services.
+		top_k: The final number of documents to return after reranking.
+	Returns:
+		A sorted list of the top_k most relevant InfoChunk objects.
+		Returns the original list sliced to top_k if reranking fails.
+	"""
+	if not documents:
+		return []
+	headers = {
+		"Authorization": f"Bearer {rotator.get_key() or ''}",
+		"Accept": "application/json",
+		"Content-Type": "application/json",
+	}
+	passages = [doc.content for doc in documents]
+	payload = {
+		"model": settings.NVIDIA_RERANKER_MODEL,
+		"query": query,
+		"passages": passages,
+		"top_n": top_k,
+	}
+	try:
+		# Use the existing robust helper for consistency
+		data = await robust_post_json(settings.NVIDIA_RERANKER_ENDPOINT, headers, payload, rotator)
+		results = data.get("results", [])
+		if not results:
+			logger().warning("Reranking returned no results, falling back to original order.")
+			return documents[:top_k]
+		# Create a mapping of original document content to the document object
+		doc_map = {doc.content: doc for doc in documents}
+		# Reconstruct the sorted list of documents based on rerank results
+		reranked_docs = []
+		for result in sorted(results, key=lambda x: x["rank"]):
+			if result["passage"] in doc_map:
+				reranked_docs.append(doc_map[result["passage"]])
+		return reranked_docs
+	except Exception as e:
+		logger().error(f"An unexpected error occurred during reranking: {e}")
+	# Fallback: return the top_k documents from the original list
+	return documents[:top_k]