Spaces:
Sleeping
Sleeping
Commit
·
6f113af
1
Parent(s):
5567b75
add files
Browse files- .gitattributes +1 -1
- .gitignore +11 -0
- .python-version +1 -0
- Dockerfile +21 -0
- README.md +0 -11
- app.py +48 -0
- pyproject.toml +22 -0
- reportanalysis.py +132 -0
- requirements.txt +14 -0
- runtime.txt +1 -0
- uv.lock +0 -0
.gitattributes
CHANGED
|
@@ -32,4 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python-generated files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[oc]
|
| 4 |
+
build/
|
| 5 |
+
dist/
|
| 6 |
+
wheels/
|
| 7 |
+
*.egg-info
|
| 8 |
+
|
| 9 |
+
# Virtual environments
|
| 10 |
+
.venv
|
| 11 |
+
.env
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
Dockerfile
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
FROM python:3.9
|
| 3 |
+
|
| 4 |
+
# Pehle root user hai by default, to yahan packages install karo
|
| 5 |
+
RUN apt-get update && apt-get install -y libgl1-mesa-glx libglib2.0-0
|
| 6 |
+
|
| 7 |
+
# Ab user add karo aur switch karo
|
| 8 |
+
RUN useradd -m -u 1000 user
|
| 9 |
+
USER user
|
| 10 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 11 |
+
|
| 12 |
+
WORKDIR /app
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
| 16 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 17 |
+
RUN python -m spacy download en_core_web_lg
|
| 18 |
+
RUN python -c "from doctr.models import ocr_predictor; ocr_predictor(pretrained=True)"
|
| 19 |
+
|
| 20 |
+
COPY --chown=user . /app
|
| 21 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,11 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Text Amon API
|
| 3 |
-
emoji: 📉
|
| 4 |
-
colorFrom: gray
|
| 5 |
-
colorTo: indigo
|
| 6 |
-
sdk: docker
|
| 7 |
-
pinned: false
|
| 8 |
-
license: apache-2.0
|
| 9 |
-
---
|
| 10 |
-
|
| 11 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, UploadFile , File
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from agents import Runner
|
| 4 |
+
from reportanalysis import Report_Agent, extract_text
|
| 5 |
+
from logging import getLogger
|
| 6 |
+
|
| 7 |
+
app = FastAPI()
|
| 8 |
+
log = getLogger()
|
| 9 |
+
|
| 10 |
+
app.add_middleware(
|
| 11 |
+
CORSMiddleware,
|
| 12 |
+
allow_origins=["*"],
|
| 13 |
+
allow_credentials=True,
|
| 14 |
+
allow_methods=["*"],
|
| 15 |
+
allow_headers=["*"],
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
@app.get("/")
|
| 19 |
+
async def root():
|
| 20 |
+
return {"message": "Welcome to the Medical Report Analysis API"}
|
| 21 |
+
|
| 22 |
+
@app.post("/upload")
|
| 23 |
+
async def upload_file(file: UploadFile = File(...)):
|
| 24 |
+
try:
|
| 25 |
+
pdf = file.filename.lower().endswith('.pdf')
|
| 26 |
+
doc = file.filename.lower().endswith('.docx')
|
| 27 |
+
content = await file.read()
|
| 28 |
+
text = extract_text(content, pdf, doc)
|
| 29 |
+
result = await Runner.run(
|
| 30 |
+
Report_Agent,
|
| 31 |
+
f"""Please analyze the uploaded medical report image extrected text :
|
| 32 |
+
|
| 33 |
+
{text}
|
| 34 |
+
|
| 35 |
+
Do not provide any analysis before calling the tool.
|
| 36 |
+
|
| 37 |
+
Once the text is extracted, continue with step-by-step medical analysis and return the final output strictly in JSON format.
|
| 38 |
+
|
| 39 |
+
""",
|
| 40 |
+
context=content,
|
| 41 |
+
|
| 42 |
+
)
|
| 43 |
+
print(result.final_output)
|
| 44 |
+
return {"result": result.final_output.model_dump()}
|
| 45 |
+
except Exception as e:
|
| 46 |
+
return {"error": str(e)}
|
| 47 |
+
|
| 48 |
+
|
pyproject.toml
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "backend"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"eval-type-backport>=0.2.2",
|
| 9 |
+
"fastapi[standard]>=0.116.1",
|
| 10 |
+
"numpy>=2.3.1",
|
| 11 |
+
"openai-agents>=0.2.3",
|
| 12 |
+
"pillow>=11.3.0",
|
| 13 |
+
"presidio-analyzer>=2.2.359",
|
| 14 |
+
"presidio-anonymizer>=2.2.359",
|
| 15 |
+
"pypdf2>=3.0.1",
|
| 16 |
+
"python-doctr>=1.0.0",
|
| 17 |
+
"python-docx>=1.2.0",
|
| 18 |
+
"python-dotenv>=1.1.1",
|
| 19 |
+
"torch>=2.7.1",
|
| 20 |
+
"torchvision>=0.22.1",
|
| 21 |
+
"uvicorn>=0.35.0",
|
| 22 |
+
]
|
reportanalysis.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import io
|
| 3 |
+
import re
|
| 4 |
+
import os
|
| 5 |
+
import docx
|
| 6 |
+
from PyPDF2 import PdfReader
|
| 7 |
+
import numpy as np
|
| 8 |
+
from PIL import Image
|
| 9 |
+
from doctr.models import ocr_predictor
|
| 10 |
+
from presidio_analyzer import AnalyzerEngine
|
| 11 |
+
from presidio_anonymizer import AnonymizerEngine
|
| 12 |
+
from dotenv import load_dotenv
|
| 13 |
+
from pydantic import BaseModel, Field
|
| 14 |
+
from typing import List, Literal
|
| 15 |
+
from agents import (
|
| 16 |
+
Agent,
|
| 17 |
+
AsyncOpenAI,
|
| 18 |
+
OpenAIChatCompletionsModel,
|
| 19 |
+
AgentOutputSchemaBase,
|
| 20 |
+
enable_verbose_stdout_logging,
|
| 21 |
+
set_default_openai_key
|
| 22 |
+
)
|
| 23 |
+
enable_verbose_stdout_logging()
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
load_dotenv()
|
| 27 |
+
model = ocr_predictor(pretrained=True)
|
| 28 |
+
analyzer = AnalyzerEngine()
|
| 29 |
+
anonymizer = AnonymizerEngine()
|
| 30 |
+
|
| 31 |
+
API = os.getenv("GEM_API_KEY")
|
| 32 |
+
|
| 33 |
+
class TestItem(BaseModel):
|
| 34 |
+
name: str
|
| 35 |
+
user_value: str
|
| 36 |
+
normal_range: str
|
| 37 |
+
analysis: str | None = None
|
| 38 |
+
flag: Literal["Red", "Yellow", "Green"]
|
| 39 |
+
|
| 40 |
+
class ReportSection(BaseModel):
|
| 41 |
+
title: str
|
| 42 |
+
tests: List[TestItem]
|
| 43 |
+
section_summary: str
|
| 44 |
+
|
| 45 |
+
class AiTipSection(BaseModel):
|
| 46 |
+
title: str
|
| 47 |
+
risk: str
|
| 48 |
+
tips: str
|
| 49 |
+
action: str
|
| 50 |
+
diet_suggestion: List[str] = Field(default_factory=list)
|
| 51 |
+
life_style: List[str] = Field(default_factory=list)
|
| 52 |
+
|
| 53 |
+
class ReportSchema(BaseModel):
|
| 54 |
+
report_summary_title: str
|
| 55 |
+
ai_tip_title: str
|
| 56 |
+
report_sections: List[ReportSection]
|
| 57 |
+
ai_tip_sections: List[AiTipSection]
|
| 58 |
+
|
| 59 |
+
client = AsyncOpenAI(
|
| 60 |
+
api_key = API,
|
| 61 |
+
base_url = "https://generativelanguage.googleapis.com/v1beta/openai/",
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
agent_model = OpenAIChatCompletionsModel(
|
| 65 |
+
model = "gemini-2.0-flash",
|
| 66 |
+
openai_client = client,
|
| 67 |
+
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
def format_json(result):
|
| 71 |
+
analyzer_results = analyzer.analyze(text=result, language='en')
|
| 72 |
+
anonymized_text = anonymizer.anonymize(text=result, analyzer_results=analyzer_results)
|
| 73 |
+
result_text = anonymized_text.text
|
| 74 |
+
pattern = r'(<PERSON>\s+[\w\s\-]+)'
|
| 75 |
+
hospital_pattern = r'(?i)\b(?:[A-Z][a-zA-Z]+(?:\s+|,|&)?){1,6}(hospital|lab|clinic|diagnostic|medical|centre|pathology)\b'
|
| 76 |
+
result_text = re.sub(r'[,.()\'"-]', ' ', result_text).strip()
|
| 77 |
+
result_text = re.sub(pattern, r'<NAME>', result_text)
|
| 78 |
+
result_text = re.sub(hospital_pattern, r'<HOSPITAL>', result_text,)
|
| 79 |
+
print(result_text)
|
| 80 |
+
return result_text
|
| 81 |
+
|
| 82 |
+
def extract_text(content ,pdf ,doc) -> str:
|
| 83 |
+
if pdf:
|
| 84 |
+
reader = PdfReader(io.BytesIO(content))
|
| 85 |
+
text = ''
|
| 86 |
+
for page in reader.pages:
|
| 87 |
+
text += page.extract_text() + '\n'
|
| 88 |
+
print(text)
|
| 89 |
+
return text.strip()
|
| 90 |
+
elif doc:
|
| 91 |
+
doc = docx.Document(io.BytesIO(content))
|
| 92 |
+
text = ''
|
| 93 |
+
for para in doc.paragraphs:
|
| 94 |
+
text += para.text + '\n'
|
| 95 |
+
print(text)
|
| 96 |
+
return text.strip()
|
| 97 |
+
|
| 98 |
+
else:
|
| 99 |
+
image = Image.open(io.BytesIO(content)).convert("RGB")
|
| 100 |
+
npImg = np.ascontiguousarray(np.array(image, dtype='uint8'))
|
| 101 |
+
ORCresult = model([npImg])
|
| 102 |
+
clean_jason = format_json(ORCresult.render())
|
| 103 |
+
print(clean_jason)
|
| 104 |
+
return clean_jason
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
Report_Agent = Agent(
|
| 108 |
+
name = "Report_Analysis_Agent",
|
| 109 |
+
instructions = """You are a Medical Report Analysis Agent.
|
| 110 |
+
|
| 111 |
+
Your role is to analyze uploaded medical test reports and generate clear, accurate health advice in structured JSON format.
|
| 112 |
+
|
| 113 |
+
Your Main Task:
|
| 114 |
+
1. Analyze the extracted medical text carefully.
|
| 115 |
+
2. Identify each test name, its result (user value), and the normal reference range.
|
| 116 |
+
3. Assign a flag to each test based on the result:
|
| 117 |
+
- Red: Critical or abnormal
|
| 118 |
+
- Yellow: Slightly out of range or borderline
|
| 119 |
+
- Green: Normal or safe
|
| 120 |
+
4. Provide a clear summary of the findings.
|
| 121 |
+
5. Offer relevant AI-driven health tips, highlight potential risks, and suggest dietary and lifestyle improvements.
|
| 122 |
+
|
| 123 |
+
Return your final response STRICTLY in the JSON structure.
|
| 124 |
+
|
| 125 |
+
""",
|
| 126 |
+
model = agent_model,
|
| 127 |
+
output_type= ReportSchema,
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi[standard]
|
| 2 |
+
numpy
|
| 3 |
+
openai-agents
|
| 4 |
+
uvicorn
|
| 5 |
+
pillow
|
| 6 |
+
presidio-analyzer
|
| 7 |
+
presidio-anonymizer
|
| 8 |
+
pypdf2
|
| 9 |
+
python-doctr
|
| 10 |
+
python-docx
|
| 11 |
+
python-dotenv
|
| 12 |
+
eval_type_backport
|
| 13 |
+
torch
|
| 14 |
+
torchvision
|
runtime.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
python-3.10.12
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|