Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ import subprocess
|
|
11 |
from pathlib import Path
|
12 |
from datetime import datetime
|
13 |
import zipfile
|
14 |
-
|
15 |
import numpy as np
|
16 |
import gradio as gr
|
17 |
from PIL import Image
|
@@ -20,6 +20,7 @@ from loguru import logger
|
|
20 |
from openai import OpenAI, AsyncOpenAI
|
21 |
from gradio_pdf import PDF
|
22 |
|
|
|
23 |
import uuid
|
24 |
import tqdm
|
25 |
|
@@ -40,6 +41,7 @@ def setup_poppler_linux():
|
|
40 |
setup_poppler_linux()
|
41 |
|
42 |
|
|
|
43 |
preset_prompts = [
|
44 |
"Please convert the document into Markdown format.",
|
45 |
"Generate a clean and structured Markdown version of the document.",
|
@@ -61,6 +63,28 @@ def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None)
|
|
61 |
return response
|
62 |
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
def extract_makrdown(text):
|
65 |
m = re.search(r'```markdown\s*([\s\S]*?)```', text, re.MULTILINE)
|
66 |
if m:
|
@@ -245,17 +269,31 @@ def to_file(image_path):
|
|
245 |
|
246 |
return image_path
|
247 |
|
248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
if file_path is None:
|
250 |
return None
|
|
|
251 |
if not file_path.endswith(".pdf"):
|
252 |
-
|
253 |
-
tmp_file_path = Path(file_path)
|
254 |
tmp_file_path = tmp_file_path.with_suffix(".pdf")
|
255 |
images_to_pdf(file_path, tmp_file_path)
|
256 |
else:
|
257 |
-
send_pdf_to_parse(file_path, IP, PORT)
|
258 |
tmp_file_path = file_path
|
|
|
259 |
|
260 |
return str(tmp_file_path)
|
261 |
|
@@ -362,4 +400,4 @@ if __name__ == '__main__':
|
|
362 |
)
|
363 |
|
364 |
|
365 |
-
demo.launch(server_name='0.0.0.0',share=True)
|
|
|
11 |
from pathlib import Path
|
12 |
from datetime import datetime
|
13 |
import zipfile
|
14 |
+
import httpx, aiofiles, os, asyncio
|
15 |
import numpy as np
|
16 |
import gradio as gr
|
17 |
from PIL import Image
|
|
|
20 |
from openai import OpenAI, AsyncOpenAI
|
21 |
from gradio_pdf import PDF
|
22 |
|
23 |
+
import aiohttp
|
24 |
import uuid
|
25 |
import tqdm
|
26 |
|
|
|
41 |
setup_poppler_linux()
|
42 |
|
43 |
|
44 |
+
|
45 |
preset_prompts = [
|
46 |
"Please convert the document into Markdown format.",
|
47 |
"Generate a clean and structured Markdown version of the document.",
|
|
|
63 |
return response
|
64 |
|
65 |
|
66 |
+
|
67 |
+
|
68 |
+
async def send_pdf_async_aiohttp(file_path, server_ip, port, route="/upload", api_key=None):
|
69 |
+
"""使用aiohttp异步发送PDF"""
|
70 |
+
url = f"http://{server_ip}:{port}{route}"
|
71 |
+
headers = {}
|
72 |
+
if api_key:
|
73 |
+
headers["Authorization"] = f"Bearer {api_key}"
|
74 |
+
|
75 |
+
try:
|
76 |
+
async with aiohttp.ClientSession() as session:
|
77 |
+
with open(file_path, "rb") as f:
|
78 |
+
data = aiohttp.FormData()
|
79 |
+
data.add_field('file', f, filename=os.path.basename(file_path), content_type='application/pdf')
|
80 |
+
async with session.post(url, data=data, headers=headers) as response:
|
81 |
+
print(f"PDF发送成功: {file_path}, 状态码: {response.status}")
|
82 |
+
return response
|
83 |
+
except Exception as e:
|
84 |
+
print(f"PDF发送失败: {file_path}, 错误: {e}")
|
85 |
+
return None
|
86 |
+
|
87 |
+
|
88 |
def extract_makrdown(text):
|
89 |
m = re.search(r'```markdown\s*([\s\S]*?)```', text, re.MULTILINE)
|
90 |
if m:
|
|
|
269 |
|
270 |
return image_path
|
271 |
|
272 |
+
|
273 |
+
# async def process_file(file_path):
|
274 |
+
# if not file_path.endswith(".pdf"):
|
275 |
+
# tmp_path = Path(file_path).with_suffix(".pdf")
|
276 |
+
# images_to_pdf(file_path, tmp_path)
|
277 |
+
# else:
|
278 |
+
# tmp_path = Path(file_path)
|
279 |
+
|
280 |
+
# async with httpx.AsyncClient() as client:
|
281 |
+
# await send_pdf_to_parse_async(client, str(tmp_path), IP, PORT)
|
282 |
+
# return str(tmp_path)
|
283 |
+
|
284 |
+
|
285 |
+
async def process_file(file_path):
|
286 |
+
"""使用asyncio的异步方案"""
|
287 |
if file_path is None:
|
288 |
return None
|
289 |
+
|
290 |
if not file_path.endswith(".pdf"):
|
291 |
+
tmp_file_path = Path(file_path)
|
|
|
292 |
tmp_file_path = tmp_file_path.with_suffix(".pdf")
|
293 |
images_to_pdf(file_path, tmp_file_path)
|
294 |
else:
|
|
|
295 |
tmp_file_path = file_path
|
296 |
+
asyncio.create_task(send_pdf_async_aiohttp(tmp_file_path, IP, PORT))
|
297 |
|
298 |
return str(tmp_file_path)
|
299 |
|
|
|
400 |
)
|
401 |
|
402 |
|
403 |
+
demo.launch(server_name='0.0.0.0',share=True)
|