GiantPandas commited on
Commit
cc96b3f
·
verified ·
1 Parent(s): 39b2b5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -25
app.py CHANGED
@@ -54,6 +54,29 @@ preset_prompts = [
54
  "Reformat this document as Markdown with clear sections and lists.",
55
  ]
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None):
59
  url = f"{openai_api_base}{route}"
@@ -66,9 +89,6 @@ def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None)
66
  response = requests.post(url, files=files, headers=headers)
67
  return response
68
 
69
-
70
-
71
-
72
  async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
73
  """使用aiohttp异步发送PDF"""
74
  url = f"{server_ip}{route}"
@@ -95,32 +115,16 @@ def extract_makrdown(text):
95
  return m.group(1).strip()
96
  else:
97
  return text
 
98
 
99
- openai_api_key = "EMPTY"
100
-
101
- openai_api_base = os.environ.get("openai_api_base")
102
-
103
- IP = os.environ.get("IP")
104
-
105
- PORT = os.environ.get("PORT")
106
-
107
- Authorization = os.environ.get("Authorization")
108
-
109
- client = AsyncOpenAI(
110
- api_key=openai_api_key,
111
- base_url=openai_api_base + "/v1",
112
- http_client=httpx.AsyncClient(verify=False)
113
- )
114
-
115
-
116
- async def request(messages):
117
 
118
  chat_completion_from_base64 = await client.chat.completions.create(
119
  messages=messages,
120
  extra_headers={
121
  "Authorization": f"Bearer {Authorization}"
122
  },
123
- model="Qwen2_5VL",
124
  max_completion_tokens=4096,
125
  stream=True,
126
  temperature=0.0,
@@ -204,7 +208,10 @@ def download_markdown_file(md_text):
204
  return str(filepath)
205
 
206
 
207
- async def doc_parser(doc_path, prompt):
 
 
 
208
 
209
  doc_path = Path(doc_path)
210
  if not doc_path.is_file():
@@ -231,7 +238,7 @@ async def doc_parser(doc_path, prompt):
231
  all_pages_raw = []
232
  for query in queries:
233
  pages = ""
234
- async for chunk in request(query):
235
  pages += chunk
236
  yield extract_makrdown(pages), pages
237
  all_pages.append(extract_makrdown(pages))
@@ -382,6 +389,12 @@ async def process_file(file_path):
382
  return str(tmp_file_path)
383
 
384
 
 
 
 
 
 
 
385
  if __name__ == '__main__':
386
  with gr.Blocks() as demo:
387
  with gr.Row():
@@ -417,6 +430,15 @@ if __name__ == '__main__':
417
  ]
418
 
419
  with gr.Column(variant='panel', scale=5):
 
 
 
 
 
 
 
 
 
420
  with gr.Accordion("Examples", open=True):
421
  example_root = "examples"
422
  file_path = [
@@ -516,9 +538,13 @@ if __name__ == '__main__':
516
  lambda f: gr.update(visible=False),
517
  inputs=output_file,
518
  outputs=output_file
 
 
 
 
519
  ).then(
520
  fn=doc_parser,
521
- inputs=[file, prompts],
522
  outputs=[md, md_text]
523
  )
524
 
 
54
  "Reformat this document as Markdown with clear sections and lists.",
55
  ]
56
 
57
+ openai_api_key = "EMPTY"
58
+ openai_api_base = os.environ.get("infinity_parser1_name")
59
+ Authorization = os.environ.get("infinity_parser1_Authorization")
60
+
61
+ AVAILABLE_MODELS = {
62
+ "Infinity-Parser-7B": {
63
+ "name": os.environ.get("infinity_parser1_name"),
64
+ "client": AsyncOpenAI(
65
+ api_key=openai_api_key,
66
+ base_url=os.environ.get("infinity_parser1_api") + "/v1",
67
+ ),
68
+ "Authorization": os.environ.get("infinity_parser1_Authorization")
69
+
70
+ },
71
+ "Infinity-Parser2-30B-A3B-Preview": {
72
+ "name": os.environ.get("infinity_parser2_name"),
73
+ "client": AsyncOpenAI(
74
+ api_key=openai_api_key,
75
+ base_url=os.environ.get("infinity_parser2_api") + "/v1",
76
+ ),
77
+ "Authorization": os.environ.get("infinity_parser2_Authorization")
78
+ }
79
+ }
80
 
81
  def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None):
82
  url = f"{openai_api_base}{route}"
 
89
  response = requests.post(url, files=files, headers=headers)
90
  return response
91
 
 
 
 
92
  async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
93
  """使用aiohttp异步发送PDF"""
94
  url = f"{server_ip}{route}"
 
115
  return m.group(1).strip()
116
  else:
117
  return text
118
+
119
 
120
+ async def request(messages, model_name, client, Authorization):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  chat_completion_from_base64 = await client.chat.completions.create(
123
  messages=messages,
124
  extra_headers={
125
  "Authorization": f"Bearer {Authorization}"
126
  },
127
+ model=model_name,
128
  max_completion_tokens=4096,
129
  stream=True,
130
  temperature=0.0,
 
208
  return str(filepath)
209
 
210
 
211
+ async def doc_parser(doc_path, prompt, model_id):
212
+ model_name = AVAILABLE_MODELS[model_id]["name"]
213
+ client = AVAILABLE_MODELS[model_id]["client"]
214
+ Authorization = AVAILABLE_MODELS[model_id]["Authorization"]
215
 
216
  doc_path = Path(doc_path)
217
  if not doc_path.is_file():
 
238
  all_pages_raw = []
239
  for query in queries:
240
  pages = ""
241
+ async for chunk in request(query, model_name, client, Authorization):
242
  pages += chunk
243
  yield extract_makrdown(pages), pages
244
  all_pages.append(extract_makrdown(pages))
 
389
  return str(tmp_file_path)
390
 
391
 
392
+ def check_file(f):
393
+ if f is None:
394
+ raise gr.Error("Please upload a PDF or image before parsing.")
395
+ return f
396
+
397
+
398
  if __name__ == '__main__':
399
  with gr.Blocks() as demo:
400
  with gr.Row():
 
430
  ]
431
 
432
  with gr.Column(variant='panel', scale=5):
433
+
434
+ model_selector = gr.Dropdown(
435
+ choices=[(k, k) for k, v in AVAILABLE_MODELS.items()],
436
+ value=list(AVAILABLE_MODELS.keys())[0], # 默认选择第一个模型
437
+ label="Model Selection",
438
+ info="Select the model to use for parsing",
439
+ interactive=True,
440
+ )
441
+
442
  with gr.Accordion("Examples", open=True):
443
  example_root = "examples"
444
  file_path = [
 
538
  lambda f: gr.update(visible=False),
539
  inputs=output_file,
540
  outputs=output_file
541
+ ).then(
542
+ fn=check_file,
543
+ inputs=file,
544
+ outputs=file
545
  ).then(
546
  fn=doc_parser,
547
+ inputs=[file, prompts, model_selector],
548
  outputs=[md, md_text]
549
  )
550