Spaces:

infly
/

Infinity-Parser-Demo

Running

App Files Files Community

GiantPandas commited on 4 days ago

Commit

cc96b3f

verified ·

1 Parent(s): 39b2b5c

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -25

app.py CHANGED Viewed

@@ -54,6 +54,29 @@ preset_prompts = [
     "Reformat this document as Markdown with clear sections and lists.",
 ]
 def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None):
     url = f"{openai_api_base}{route}"
@@ -66,9 +89,6 @@ def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None)
         response = requests.post(url, files=files, headers=headers)
     return response
 async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
     """使用aiohttp异步发送PDF"""
     url = f"{server_ip}{route}"
@@ -95,32 +115,16 @@ def extract_makrdown(text):
         return m.group(1).strip()
     else:
         return text
-openai_api_key = "EMPTY"
-openai_api_base = os.environ.get("openai_api_base")
-IP = os.environ.get("IP")
-PORT = os.environ.get("PORT")
-Authorization = os.environ.get("Authorization")
-client = AsyncOpenAI(
-    api_key=openai_api_key,
-    base_url=openai_api_base + "/v1",
-    http_client=httpx.AsyncClient(verify=False)
-)
-async def request(messages):
     chat_completion_from_base64 = await client.chat.completions.create(
         messages=messages,
         extra_headers={
             "Authorization": f"Bearer {Authorization}"
         },
-        model="Qwen2_5VL",
         max_completion_tokens=4096,
         stream=True,
         temperature=0.0,
@@ -204,7 +208,10 @@ def download_markdown_file(md_text):
     return str(filepath)
-async def doc_parser(doc_path, prompt):
     doc_path = Path(doc_path)
     if not doc_path.is_file():
@@ -231,7 +238,7 @@ async def doc_parser(doc_path, prompt):
     all_pages_raw = []
     for query in queries:
         pages = ""
-        async for chunk in request(query):
             pages += chunk
             yield extract_makrdown(pages), pages
         all_pages.append(extract_makrdown(pages))
@@ -382,6 +389,12 @@ async def process_file(file_path):
     return str(tmp_file_path)
 if __name__ == '__main__':
     with gr.Blocks() as demo:
         with gr.Row():
@@ -417,6 +430,15 @@ if __name__ == '__main__':
                 ]
             with gr.Column(variant='panel', scale=5):
                 with gr.Accordion("Examples", open=True):
                     example_root = "examples"
                     file_path = [
@@ -516,9 +538,13 @@ if __name__ == '__main__':
             lambda f: gr.update(visible=False),
             inputs=output_file,
             outputs=output_file
         ).then(
             fn=doc_parser,
-            inputs=[file, prompts],
             outputs=[md, md_text]
         )

     "Reformat this document as Markdown with clear sections and lists.",
 ]
+openai_api_key = "EMPTY"
+openai_api_base = os.environ.get("infinity_parser1_name")
+Authorization =  os.environ.get("infinity_parser1_Authorization")
+AVAILABLE_MODELS = {
+    "Infinity-Parser-7B": {
+            "name": os.environ.get("infinity_parser1_name"),
+            "client": AsyncOpenAI(
+                api_key=openai_api_key,
+                base_url=os.environ.get("infinity_parser1_api") + "/v1",
+            ),
+            "Authorization": os.environ.get("infinity_parser1_Authorization")
+        },
+    "Infinity-Parser2-30B-A3B-Preview": {
+            "name": os.environ.get("infinity_parser2_name"),
+            "client": AsyncOpenAI(
+                api_key=openai_api_key,
+                base_url=os.environ.get("infinity_parser2_api") + "/v1",
+            ),
+            "Authorization": os.environ.get("infinity_parser2_Authorization")
+        }
+}
 def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None):
     url = f"{openai_api_base}{route}"
         response = requests.post(url, files=files, headers=headers)
     return response
 async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
     """使用aiohttp异步发送PDF"""
     url = f"{server_ip}{route}"
         return m.group(1).strip()
     else:
         return text
+async def request(messages, model_name, client, Authorization):
     chat_completion_from_base64 = await client.chat.completions.create(
         messages=messages,
         extra_headers={
             "Authorization": f"Bearer {Authorization}"
         },
+        model=model_name,
         max_completion_tokens=4096,
         stream=True,
         temperature=0.0,
     return str(filepath)
+async def doc_parser(doc_path, prompt, model_id):
+    model_name = AVAILABLE_MODELS[model_id]["name"]
+    client = AVAILABLE_MODELS[model_id]["client"]
+    Authorization = AVAILABLE_MODELS[model_id]["Authorization"]
     doc_path = Path(doc_path)
     if not doc_path.is_file():
     all_pages_raw = []
     for query in queries:
         pages = ""
+        async for chunk in request(query, model_name, client, Authorization):
             pages += chunk
             yield extract_makrdown(pages), pages
         all_pages.append(extract_makrdown(pages))
     return str(tmp_file_path)
+def check_file(f):
+    if f is None:
+        raise gr.Error("Please upload a PDF or image before parsing.")
+    return f
 if __name__ == '__main__':
     with gr.Blocks() as demo:
         with gr.Row():
                 ]
             with gr.Column(variant='panel', scale=5):
+                model_selector = gr.Dropdown(
+                    choices=[(k, k) for k, v in AVAILABLE_MODELS.items()],
+                    value=list(AVAILABLE_MODELS.keys())[0],  # 默认选择第一个模型
+                    label="Model Selection",
+                    info="Select the model to use for parsing",
+                    interactive=True,
+                )
                 with gr.Accordion("Examples", open=True):
                     example_root = "examples"
                     file_path = [
             lambda f: gr.update(visible=False),
             inputs=output_file,
             outputs=output_file
+        ).then(
+            fn=check_file,
+            inputs=file,
+            outputs=file
         ).then(
             fn=doc_parser,
+            inputs=[file, prompts, model_selector],
             outputs=[md, md_text]
         )