Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -54,6 +54,29 @@ preset_prompts = [
|
|
| 54 |
"Reformat this document as Markdown with clear sections and lists.",
|
| 55 |
]
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None):
|
| 59 |
url = f"{openai_api_base}{route}"
|
|
@@ -66,9 +89,6 @@ def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None)
|
|
| 66 |
response = requests.post(url, files=files, headers=headers)
|
| 67 |
return response
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
|
| 73 |
"""使用aiohttp异步发送PDF"""
|
| 74 |
url = f"{server_ip}{route}"
|
|
@@ -95,32 +115,16 @@ def extract_makrdown(text):
|
|
| 95 |
return m.group(1).strip()
|
| 96 |
else:
|
| 97 |
return text
|
|
|
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
openai_api_base = os.environ.get("openai_api_base")
|
| 102 |
-
|
| 103 |
-
IP = os.environ.get("IP")
|
| 104 |
-
|
| 105 |
-
PORT = os.environ.get("PORT")
|
| 106 |
-
|
| 107 |
-
Authorization = os.environ.get("Authorization")
|
| 108 |
-
|
| 109 |
-
client = AsyncOpenAI(
|
| 110 |
-
api_key=openai_api_key,
|
| 111 |
-
base_url=openai_api_base + "/v1",
|
| 112 |
-
http_client=httpx.AsyncClient(verify=False)
|
| 113 |
-
)
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
async def request(messages):
|
| 117 |
|
| 118 |
chat_completion_from_base64 = await client.chat.completions.create(
|
| 119 |
messages=messages,
|
| 120 |
extra_headers={
|
| 121 |
"Authorization": f"Bearer {Authorization}"
|
| 122 |
},
|
| 123 |
-
model=
|
| 124 |
max_completion_tokens=4096,
|
| 125 |
stream=True,
|
| 126 |
temperature=0.0,
|
|
@@ -204,7 +208,10 @@ def download_markdown_file(md_text):
|
|
| 204 |
return str(filepath)
|
| 205 |
|
| 206 |
|
| 207 |
-
async def doc_parser(doc_path, prompt):
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
doc_path = Path(doc_path)
|
| 210 |
if not doc_path.is_file():
|
|
@@ -231,7 +238,7 @@ async def doc_parser(doc_path, prompt):
|
|
| 231 |
all_pages_raw = []
|
| 232 |
for query in queries:
|
| 233 |
pages = ""
|
| 234 |
-
async for chunk in request(query):
|
| 235 |
pages += chunk
|
| 236 |
yield extract_makrdown(pages), pages
|
| 237 |
all_pages.append(extract_makrdown(pages))
|
|
@@ -382,6 +389,12 @@ async def process_file(file_path):
|
|
| 382 |
return str(tmp_file_path)
|
| 383 |
|
| 384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
if __name__ == '__main__':
|
| 386 |
with gr.Blocks() as demo:
|
| 387 |
with gr.Row():
|
|
@@ -417,6 +430,15 @@ if __name__ == '__main__':
|
|
| 417 |
]
|
| 418 |
|
| 419 |
with gr.Column(variant='panel', scale=5):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
with gr.Accordion("Examples", open=True):
|
| 421 |
example_root = "examples"
|
| 422 |
file_path = [
|
|
@@ -516,9 +538,13 @@ if __name__ == '__main__':
|
|
| 516 |
lambda f: gr.update(visible=False),
|
| 517 |
inputs=output_file,
|
| 518 |
outputs=output_file
|
|
|
|
|
|
|
|
|
|
|
|
|
| 519 |
).then(
|
| 520 |
fn=doc_parser,
|
| 521 |
-
inputs=[file, prompts],
|
| 522 |
outputs=[md, md_text]
|
| 523 |
)
|
| 524 |
|
|
|
|
| 54 |
"Reformat this document as Markdown with clear sections and lists.",
|
| 55 |
]
|
| 56 |
|
| 57 |
+
openai_api_key = "EMPTY"
|
| 58 |
+
openai_api_base = os.environ.get("infinity_parser1_name")
|
| 59 |
+
Authorization = os.environ.get("infinity_parser1_Authorization")
|
| 60 |
+
|
| 61 |
+
AVAILABLE_MODELS = {
|
| 62 |
+
"Infinity-Parser-7B": {
|
| 63 |
+
"name": os.environ.get("infinity_parser1_name"),
|
| 64 |
+
"client": AsyncOpenAI(
|
| 65 |
+
api_key=openai_api_key,
|
| 66 |
+
base_url=os.environ.get("infinity_parser1_api") + "/v1",
|
| 67 |
+
),
|
| 68 |
+
"Authorization": os.environ.get("infinity_parser1_Authorization")
|
| 69 |
+
|
| 70 |
+
},
|
| 71 |
+
"Infinity-Parser2-30B-A3B-Preview": {
|
| 72 |
+
"name": os.environ.get("infinity_parser2_name"),
|
| 73 |
+
"client": AsyncOpenAI(
|
| 74 |
+
api_key=openai_api_key,
|
| 75 |
+
base_url=os.environ.get("infinity_parser2_api") + "/v1",
|
| 76 |
+
),
|
| 77 |
+
"Authorization": os.environ.get("infinity_parser2_Authorization")
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
|
| 81 |
def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None):
|
| 82 |
url = f"{openai_api_base}{route}"
|
|
|
|
| 89 |
response = requests.post(url, files=files, headers=headers)
|
| 90 |
return response
|
| 91 |
|
|
|
|
|
|
|
|
|
|
| 92 |
async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
|
| 93 |
"""使用aiohttp异步发送PDF"""
|
| 94 |
url = f"{server_ip}{route}"
|
|
|
|
| 115 |
return m.group(1).strip()
|
| 116 |
else:
|
| 117 |
return text
|
| 118 |
+
|
| 119 |
|
| 120 |
+
async def request(messages, model_name, client, Authorization):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
chat_completion_from_base64 = await client.chat.completions.create(
|
| 123 |
messages=messages,
|
| 124 |
extra_headers={
|
| 125 |
"Authorization": f"Bearer {Authorization}"
|
| 126 |
},
|
| 127 |
+
model=model_name,
|
| 128 |
max_completion_tokens=4096,
|
| 129 |
stream=True,
|
| 130 |
temperature=0.0,
|
|
|
|
| 208 |
return str(filepath)
|
| 209 |
|
| 210 |
|
| 211 |
+
async def doc_parser(doc_path, prompt, model_id):
|
| 212 |
+
model_name = AVAILABLE_MODELS[model_id]["name"]
|
| 213 |
+
client = AVAILABLE_MODELS[model_id]["client"]
|
| 214 |
+
Authorization = AVAILABLE_MODELS[model_id]["Authorization"]
|
| 215 |
|
| 216 |
doc_path = Path(doc_path)
|
| 217 |
if not doc_path.is_file():
|
|
|
|
| 238 |
all_pages_raw = []
|
| 239 |
for query in queries:
|
| 240 |
pages = ""
|
| 241 |
+
async for chunk in request(query, model_name, client, Authorization):
|
| 242 |
pages += chunk
|
| 243 |
yield extract_makrdown(pages), pages
|
| 244 |
all_pages.append(extract_makrdown(pages))
|
|
|
|
| 389 |
return str(tmp_file_path)
|
| 390 |
|
| 391 |
|
| 392 |
+
def check_file(f):
|
| 393 |
+
if f is None:
|
| 394 |
+
raise gr.Error("Please upload a PDF or image before parsing.")
|
| 395 |
+
return f
|
| 396 |
+
|
| 397 |
+
|
| 398 |
if __name__ == '__main__':
|
| 399 |
with gr.Blocks() as demo:
|
| 400 |
with gr.Row():
|
|
|
|
| 430 |
]
|
| 431 |
|
| 432 |
with gr.Column(variant='panel', scale=5):
|
| 433 |
+
|
| 434 |
+
model_selector = gr.Dropdown(
|
| 435 |
+
choices=[(k, k) for k, v in AVAILABLE_MODELS.items()],
|
| 436 |
+
value=list(AVAILABLE_MODELS.keys())[0], # 默认选择第一个模型
|
| 437 |
+
label="Model Selection",
|
| 438 |
+
info="Select the model to use for parsing",
|
| 439 |
+
interactive=True,
|
| 440 |
+
)
|
| 441 |
+
|
| 442 |
with gr.Accordion("Examples", open=True):
|
| 443 |
example_root = "examples"
|
| 444 |
file_path = [
|
|
|
|
| 538 |
lambda f: gr.update(visible=False),
|
| 539 |
inputs=output_file,
|
| 540 |
outputs=output_file
|
| 541 |
+
).then(
|
| 542 |
+
fn=check_file,
|
| 543 |
+
inputs=file,
|
| 544 |
+
outputs=file
|
| 545 |
).then(
|
| 546 |
fn=doc_parser,
|
| 547 |
+
inputs=[file, prompts, model_selector],
|
| 548 |
outputs=[md, md_text]
|
| 549 |
)
|
| 550 |
|