zuminghuang commited on
Commit
5d599d5
·
verified ·
1 Parent(s): 6dda5e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -6
app.py CHANGED
@@ -11,7 +11,7 @@ import subprocess
11
  from pathlib import Path
12
  from datetime import datetime
13
  import zipfile
14
-
15
  import numpy as np
16
  import gradio as gr
17
  from PIL import Image
@@ -20,6 +20,7 @@ from loguru import logger
20
  from openai import OpenAI, AsyncOpenAI
21
  from gradio_pdf import PDF
22
 
 
23
  import uuid
24
  import tqdm
25
 
@@ -40,6 +41,7 @@ def setup_poppler_linux():
40
  setup_poppler_linux()
41
 
42
 
 
43
  preset_prompts = [
44
  "Please convert the document into Markdown format.",
45
  "Generate a clean and structured Markdown version of the document.",
@@ -61,6 +63,28 @@ def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None)
61
  return response
62
 
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  def extract_makrdown(text):
65
  m = re.search(r'```markdown\s*([\s\S]*?)```', text, re.MULTILINE)
66
  if m:
@@ -245,17 +269,31 @@ def to_file(image_path):
245
 
246
  return image_path
247
 
248
- def process_file(file_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  if file_path is None:
250
  return None
 
251
  if not file_path.endswith(".pdf"):
252
-
253
- tmp_file_path = Path(file_path)
254
  tmp_file_path = tmp_file_path.with_suffix(".pdf")
255
  images_to_pdf(file_path, tmp_file_path)
256
  else:
257
- send_pdf_to_parse(file_path, IP, PORT)
258
  tmp_file_path = file_path
 
259
 
260
  return str(tmp_file_path)
261
 
@@ -362,4 +400,4 @@ if __name__ == '__main__':
362
  )
363
 
364
 
365
- demo.launch(server_name='0.0.0.0',share=True)
 
11
  from pathlib import Path
12
  from datetime import datetime
13
  import zipfile
14
+ import httpx, aiofiles, os, asyncio
15
  import numpy as np
16
  import gradio as gr
17
  from PIL import Image
 
20
  from openai import OpenAI, AsyncOpenAI
21
  from gradio_pdf import PDF
22
 
23
+ import aiohttp
24
  import uuid
25
  import tqdm
26
 
 
41
  setup_poppler_linux()
42
 
43
 
44
+
45
  preset_prompts = [
46
  "Please convert the document into Markdown format.",
47
  "Generate a clean and structured Markdown version of the document.",
 
63
  return response
64
 
65
 
66
+
67
+
68
+ async def send_pdf_async_aiohttp(file_path, server_ip, port, route="/upload", api_key=None):
69
+ """使用aiohttp异步发送PDF"""
70
+ url = f"http://{server_ip}:{port}{route}"
71
+ headers = {}
72
+ if api_key:
73
+ headers["Authorization"] = f"Bearer {api_key}"
74
+
75
+ try:
76
+ async with aiohttp.ClientSession() as session:
77
+ with open(file_path, "rb") as f:
78
+ data = aiohttp.FormData()
79
+ data.add_field('file', f, filename=os.path.basename(file_path), content_type='application/pdf')
80
+ async with session.post(url, data=data, headers=headers) as response:
81
+ print(f"PDF发送成功: {file_path}, 状态码: {response.status}")
82
+ return response
83
+ except Exception as e:
84
+ print(f"PDF发送失败: {file_path}, 错误: {e}")
85
+ return None
86
+
87
+
88
  def extract_makrdown(text):
89
  m = re.search(r'```markdown\s*([\s\S]*?)```', text, re.MULTILINE)
90
  if m:
 
269
 
270
  return image_path
271
 
272
+
273
+ # async def process_file(file_path):
274
+ # if not file_path.endswith(".pdf"):
275
+ # tmp_path = Path(file_path).with_suffix(".pdf")
276
+ # images_to_pdf(file_path, tmp_path)
277
+ # else:
278
+ # tmp_path = Path(file_path)
279
+
280
+ # async with httpx.AsyncClient() as client:
281
+ # await send_pdf_to_parse_async(client, str(tmp_path), IP, PORT)
282
+ # return str(tmp_path)
283
+
284
+
285
+ async def process_file(file_path):
286
+ """使用asyncio的异步方案"""
287
  if file_path is None:
288
  return None
289
+
290
  if not file_path.endswith(".pdf"):
291
+ tmp_file_path = Path(file_path)
 
292
  tmp_file_path = tmp_file_path.with_suffix(".pdf")
293
  images_to_pdf(file_path, tmp_file_path)
294
  else:
 
295
  tmp_file_path = file_path
296
+ asyncio.create_task(send_pdf_async_aiohttp(tmp_file_path, IP, PORT))
297
 
298
  return str(tmp_file_path)
299
 
 
400
  )
401
 
402
 
403
+ demo.launch(server_name='0.0.0.0',share=True)