zuminghuang commited on
Commit
95e350e
·
verified ·
1 Parent(s): 46820dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -49
app.py CHANGED
@@ -19,13 +19,16 @@ from pdf2image import convert_from_path
19
  from loguru import logger
20
  from openai import OpenAI, AsyncOpenAI
21
  from gradio_pdf import PDF
22
-
 
23
  import aiohttp
24
  import uuid
25
  import tqdm
 
 
 
26
 
27
  import requests
28
- import httpx
29
 
30
 
31
  def setup_poppler_linux():
@@ -64,9 +67,10 @@ def send_pdf_to_parse(file_path, server_ip, port, route="/upload", api_key=None)
64
  return response
65
 
66
 
 
 
67
  async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
68
  """使用aiohttp异步发送PDF"""
69
- # url = f"http://{server_ip}:{port}{route}"
70
  url = f"{server_ip}{route}"
71
  headers = {}
72
  if Authorization:
@@ -105,12 +109,12 @@ Authorization = os.environ.get("Authorization")
105
  client = AsyncOpenAI(
106
  api_key=openai_api_key,
107
  base_url=openai_api_base + "/v1",
108
- http_client=httpx.AsyncClient(verify=False),
109
  )
110
 
111
 
112
  async def request(messages):
113
-
114
  chat_completion_from_base64 = await client.chat.completions.create(
115
  messages=messages,
116
  extra_headers={
@@ -122,20 +126,20 @@ async def request(messages):
122
  temperature=0.0,
123
  top_p=0.95
124
  )
125
-
126
  page = ""
127
  async for chunk in chat_completion_from_base64:
128
  if chunk.choices[0].delta.content:
129
  content = chunk.choices[0].delta.content
130
-
131
  choice = chunk.choices[0]
132
  if choice.finish_reason is not None:
133
  print(f"end reason = {choice.finish_reason}")
134
  break
135
  page += content
136
-
137
  yield content
138
-
139
 
140
  def images_to_pdf(img_paths, pdf_path):
141
 
@@ -168,9 +172,8 @@ def encode_image(image_path):
168
  with open(image_path, "rb") as image_file:
169
  return base64.b64encode(image_file.read()).decode("utf-8")
170
 
171
-
172
  def build_message(image_path, prompt):
173
-
174
  content = [
175
  {
176
  "type": "image_url",
@@ -180,17 +183,18 @@ def build_message(image_path, prompt):
180
  },
181
  {"type": "text", 'text': prompt}
182
  ]
183
-
184
-
185
  messages = [
186
  {"role": "system", "content": "You are a helpful assistant."},
187
  {'role': 'user', 'content': content}
188
-
189
  ]
190
-
191
  return messages
192
 
193
 
 
194
  def download_markdown_file(md_text):
195
  filename = f"markdown_{uuid.uuid4().hex[:8]}.md"
196
  filepath = Path("downloads") / filename
@@ -215,14 +219,14 @@ async def doc_parser(doc_path, prompt):
215
  for idx, page in enumerate(pages, start=1):
216
  img_path = tmpdir / f"page_{idx}.png"
217
  page.save(img_path, "PNG")
218
-
219
  messages = build_message(img_path, prompt)
220
  queries.append(messages)
221
-
222
  else:
223
  messages = build_message(doc_path, prompt)
224
  queries.append(messages)
225
-
226
  all_pages = []
227
  all_pages_raw = []
228
  for query in queries:
@@ -235,26 +239,25 @@ async def doc_parser(doc_path, prompt):
235
  print(all_pages)
236
  yield "\n---\n".join(all_pages), "\n\n".join(all_pages_raw)
237
 
238
-
239
  def compress_directory_to_zip(directory_path, output_zip_path):
240
  try:
241
  with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
242
 
243
-
244
  for root, dirs, files in os.walk(directory_path):
245
  for file in files:
246
-
247
  file_path = os.path.join(root, file)
248
 
249
  arcname = os.path.relpath(file_path, directory_path)
250
-
251
  zipf.write(file_path, arcname)
252
  return 0
253
  except Exception as e:
254
  logger.exception(e)
255
  return -1
256
 
257
-
258
  latex_delimiters = [
259
  {'left': '$$', 'right': '$$', 'display': True},
260
  {'left': '$', 'right': '$', 'display': False},
@@ -262,31 +265,94 @@ latex_delimiters = [
262
  {'left': '\\[', 'right': '\\]', 'display': True},
263
  ]
264
 
265
-
266
  def check_prompt(prompt):
267
  if not prompt or prompt.strip() == "":
268
  raise gr.Error("Please select or enter a prompt before parsing.")
269
  return prompt
270
 
271
-
272
  def to_file(image_path):
273
-
274
  if image_path.endswith("Academic_Papers.png"):
275
  image_path = image_path.replace("Academic_Papers.png", "Academic_Papers.pdf")
276
 
277
  return image_path
278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
- # async def process_file(file_path):
281
- # if not file_path.endswith(".pdf"):
282
- # tmp_path = Path(file_path).with_suffix(".pdf")
283
- # images_to_pdf(file_path, tmp_path)
284
- # else:
285
- # tmp_path = Path(file_path)
 
 
 
 
 
 
 
 
 
 
 
286
 
287
- # async with httpx.AsyncClient() as client:
288
- # await send_pdf_to_parse_async(client, str(tmp_path), IP, PORT)
289
- # return str(tmp_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
 
292
  async def process_file(file_path):
@@ -309,7 +375,7 @@ if __name__ == '__main__':
309
  with gr.Blocks() as demo:
310
  with gr.Row():
311
  with gr.Column(variant='panel', scale=5):
312
-
313
  file = gr.File(label='Please upload a PDF or image', file_types=['.pdf', '.png', '.jpeg', '.jpg'], type="filepath")
314
  prompts = gr.Dropdown(
315
  choices=preset_prompts,
@@ -324,9 +390,13 @@ if __name__ == '__main__':
324
  with gr.Row():
325
  change_bu = gr.Button('Parse')
326
  clear_bu = gr.ClearButton(value='Clear')
327
- pdf_show = PDF(label='Preview', interactive=False, visible=True, height=800)
328
-
329
 
 
 
 
 
 
 
330
 
331
  example_root = os.path.join(os.path.dirname(__file__), 'examples')
332
  images = [
@@ -341,9 +411,9 @@ if __name__ == '__main__':
341
  file_path = [
342
  os.path.join(example_root, f)
343
  for f in ["Financial_Reports.png", "Books.png", "Magazines.png", "Academic_Papers.png"]
344
-
345
  ]
346
-
347
  with gr.Row():
348
  for i, label in enumerate(["Financial Reports(IMG)", "Books(IMG)", "Magazines(IMG)", "Academic Papers(PDF)"]):
349
  with gr.Column(scale=1, min_width=120):
@@ -355,11 +425,11 @@ if __name__ == '__main__':
355
  show_download_button=False
356
  )
357
  gr.Button(label).click(fn=to_file, inputs=gr.State(file_path[i]), outputs=file)
358
-
359
-
360
  download_btn = gr.Button("⬇️ Generate download link", size="sm")
361
  output_file = gr.File(label='Parse result', interactive=False, elem_id="down-file-box",visible=False)
362
-
363
  gr.HTML("""
364
  <style>
365
  #down-file-box {
@@ -375,9 +445,56 @@ if __name__ == '__main__':
375
  with gr.Tab('Markdown text'):
376
  md_text = gr.TextArea(lines=45, show_copy_button=True)
377
 
378
-
379
-
380
- file.change(fn=process_file, inputs=file, outputs=pdf_show)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
 
382
 
383
  change_bu.click(
@@ -393,9 +510,9 @@ if __name__ == '__main__':
393
  inputs=[file, prompts],
394
  outputs=[md, md_text]
395
  )
396
-
397
- clear_bu.add([file, md, pdf_show, md_text])
398
-
399
  download_btn.click(
400
  fn=download_markdown_file,
401
  inputs=md_text,
 
19
  from loguru import logger
20
  from openai import OpenAI, AsyncOpenAI
21
  from gradio_pdf import PDF
22
+ import certifi
23
+ import httpx
24
  import aiohttp
25
  import uuid
26
  import tqdm
27
+ import base64, pathlib
28
+ from io import BytesIO
29
+ from pdf2image import convert_from_bytes, convert_from_path # pip install pdf2image
30
 
31
  import requests
 
32
 
33
 
34
  def setup_poppler_linux():
 
67
  return response
68
 
69
 
70
+
71
+
72
  async def send_pdf_async_aiohttp(file_path, server_ip, route="/upload", Authorization=None):
73
  """使用aiohttp异步发送PDF"""
 
74
  url = f"{server_ip}{route}"
75
  headers = {}
76
  if Authorization:
 
109
  client = AsyncOpenAI(
110
  api_key=openai_api_key,
111
  base_url=openai_api_base + "/v1",
112
+ http_client=httpx.AsyncClient(verify=False)
113
  )
114
 
115
 
116
  async def request(messages):
117
+
118
  chat_completion_from_base64 = await client.chat.completions.create(
119
  messages=messages,
120
  extra_headers={
 
126
  temperature=0.0,
127
  top_p=0.95
128
  )
129
+
130
  page = ""
131
  async for chunk in chat_completion_from_base64:
132
  if chunk.choices[0].delta.content:
133
  content = chunk.choices[0].delta.content
134
+
135
  choice = chunk.choices[0]
136
  if choice.finish_reason is not None:
137
  print(f"end reason = {choice.finish_reason}")
138
  break
139
  page += content
140
+
141
  yield content
142
+
143
 
144
  def images_to_pdf(img_paths, pdf_path):
145
 
 
172
  with open(image_path, "rb") as image_file:
173
  return base64.b64encode(image_file.read()).decode("utf-8")
174
 
 
175
  def build_message(image_path, prompt):
176
+
177
  content = [
178
  {
179
  "type": "image_url",
 
183
  },
184
  {"type": "text", 'text': prompt}
185
  ]
186
+
187
+
188
  messages = [
189
  {"role": "system", "content": "You are a helpful assistant."},
190
  {'role': 'user', 'content': content}
191
+
192
  ]
193
+
194
  return messages
195
 
196
 
197
+
198
  def download_markdown_file(md_text):
199
  filename = f"markdown_{uuid.uuid4().hex[:8]}.md"
200
  filepath = Path("downloads") / filename
 
219
  for idx, page in enumerate(pages, start=1):
220
  img_path = tmpdir / f"page_{idx}.png"
221
  page.save(img_path, "PNG")
222
+
223
  messages = build_message(img_path, prompt)
224
  queries.append(messages)
225
+
226
  else:
227
  messages = build_message(doc_path, prompt)
228
  queries.append(messages)
229
+
230
  all_pages = []
231
  all_pages_raw = []
232
  for query in queries:
 
239
  print(all_pages)
240
  yield "\n---\n".join(all_pages), "\n\n".join(all_pages_raw)
241
 
242
+
243
  def compress_directory_to_zip(directory_path, output_zip_path):
244
  try:
245
  with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
246
 
247
+
248
  for root, dirs, files in os.walk(directory_path):
249
  for file in files:
250
+
251
  file_path = os.path.join(root, file)
252
 
253
  arcname = os.path.relpath(file_path, directory_path)
254
+
255
  zipf.write(file_path, arcname)
256
  return 0
257
  except Exception as e:
258
  logger.exception(e)
259
  return -1
260
 
 
261
  latex_delimiters = [
262
  {'left': '$$', 'right': '$$', 'display': True},
263
  {'left': '$', 'right': '$', 'display': False},
 
265
  {'left': '\\[', 'right': '\\]', 'display': True},
266
  ]
267
 
 
268
  def check_prompt(prompt):
269
  if not prompt or prompt.strip() == "":
270
  raise gr.Error("Please select or enter a prompt before parsing.")
271
  return prompt
272
 
 
273
  def to_file(image_path):
274
+
275
  if image_path.endswith("Academic_Papers.png"):
276
  image_path = image_path.replace("Academic_Papers.png", "Academic_Papers.pdf")
277
 
278
  return image_path
279
 
280
+ def render_img(b64_list, idx, scale):
281
+ """根据当前索引 idx 和缩放倍数 scale 渲染 HTML。"""
282
+ if not b64_list:
283
+ return "<p style='color:gray'>请先上传图片</p>"
284
+ idx %= len(b64_list)
285
+ src = b64_list[idx]
286
+ # return (
287
+ # f'<div style="overflow:auto;border:1px solid #ccc;'
288
+ # f'display:flex;justify-content:center;align-items:center;' # ① 横纵向居中
289
+ # f'width:100%;height:800px;">' # ② 容器尺寸
290
+ # f'<img src="{src}" '
291
+ # f'style="transform:scale({scale});transform-origin:center center;" />' # ③ 以中心缩放
292
+ # f'</div>'
293
+ # )
294
+
295
+
296
+ # 以百分比形式设置 width,height 自动等比
297
+ percent = scale * 100
298
+
299
+ if scale <= 1:
300
+ # ---------- 居中模式 ----------
301
+ return (
302
+ f'<div style="overflow:auto;border:1px solid #ccc;'
303
+ f'display:flex;justify-content:center;align-items:center;'
304
+ f'width:100%;height:800px;">'
305
+ f' <img src="{src}" '
306
+ f' style="width:{percent}%;max-width:none;'
307
+ f' height:auto;display:block;" />'
308
+ f'</div>'
309
+ )
310
+ else:
311
+ # ---------- 放大模式 ----------
312
+ return (
313
+ f'<div style="overflow:auto;border:1px solid #ccc;'
314
+ f'width:100%;height:800px;">'
315
+ f' <img src="{src}" '
316
+ f' style="width:{percent}%;max-width:none;'
317
+ f' height:auto;display:block;" />'
318
+ f'</div>'
319
+ )
320
 
321
+ def files_to_b64(file, pdf_dpi: int = 200):
322
+ out: list[str] = []
323
+ if hasattr(file, "data"):
324
+ raw_bytes = file.data
325
+ suffix = pathlib.Path(file.name).suffix.lower()
326
+
327
+ # -- PDF --
328
+ if suffix == ".pdf":
329
+ pages = convert_from_bytes(raw_bytes, dpi=pdf_dpi)
330
+ for page in pages:
331
+ buf = BytesIO()
332
+ page.save(buf, format="PNG")
333
+ b64 = base64.b64encode(buf.getvalue()).decode()
334
+ out.append(f"data:image/png;base64,{b64}")
335
+ else:
336
+ b64 = base64.b64encode(raw_bytes).decode()
337
+ out.append(f"data:image/{suffix[1:]};base64,{b64}")
338
 
339
+ else:
340
+ path = pathlib.Path(file)
341
+ suffix = path.suffix.lower()
342
+
343
+ if suffix == ".pdf":
344
+ pages = convert_from_path(str(path), dpi=pdf_dpi)
345
+ for page in pages:
346
+ buf = BytesIO()
347
+ page.save(buf, format="PNG")
348
+ b64 = base64.b64encode(buf.getvalue()).decode()
349
+ out.append(f"data:image/png;base64,{b64}")
350
+ else:
351
+ raw_bytes = path.read_bytes()
352
+ b64 = base64.b64encode(raw_bytes).decode()
353
+ out.append(f"data:image/{suffix[1:]};base64,{b64}")
354
+
355
+ return out
356
 
357
 
358
  async def process_file(file_path):
 
375
  with gr.Blocks() as demo:
376
  with gr.Row():
377
  with gr.Column(variant='panel', scale=5):
378
+
379
  file = gr.File(label='Please upload a PDF or image', file_types=['.pdf', '.png', '.jpeg', '.jpg'], type="filepath")
380
  prompts = gr.Dropdown(
381
  choices=preset_prompts,
 
390
  with gr.Row():
391
  change_bu = gr.Button('Parse')
392
  clear_bu = gr.ClearButton(value='Clear')
 
 
393
 
394
+ zoom = gr.Slider(0.5, 3, value=1, step=0.1, label="Image Scale")
395
+ with gr.Row():
396
+ prev_btn = gr.Button("⬅️ Pre")
397
+ next_btn = gr.Button("Next ➡️")
398
+
399
+ viewer = gr.HTML()
400
 
401
  example_root = os.path.join(os.path.dirname(__file__), 'examples')
402
  images = [
 
411
  file_path = [
412
  os.path.join(example_root, f)
413
  for f in ["Financial_Reports.png", "Books.png", "Magazines.png", "Academic_Papers.png"]
414
+
415
  ]
416
+
417
  with gr.Row():
418
  for i, label in enumerate(["Financial Reports(IMG)", "Books(IMG)", "Magazines(IMG)", "Academic Papers(PDF)"]):
419
  with gr.Column(scale=1, min_width=120):
 
425
  show_download_button=False
426
  )
427
  gr.Button(label).click(fn=to_file, inputs=gr.State(file_path[i]), outputs=file)
428
+
429
+
430
  download_btn = gr.Button("⬇️ Generate download link", size="sm")
431
  output_file = gr.File(label='Parse result', interactive=False, elem_id="down-file-box",visible=False)
432
+
433
  gr.HTML("""
434
  <style>
435
  #down-file-box {
 
445
  with gr.Tab('Markdown text'):
446
  md_text = gr.TextArea(lines=45, show_copy_button=True)
447
 
448
+ img_list_state = gr.State([])
449
+ idx_state = gr.State(0)
450
+
451
+ async def upload_handler(files):
452
+
453
+ if files is None:
454
+ return [], 0, ""
455
+
456
+ if files.lower().endswith(".pdf"):
457
+ asyncio.create_task(send_pdf_async_aiohttp(files, server_ip=openai_api_base, Authorization=Authorization))
458
+
459
+ b64s = files_to_b64(files)
460
+ return b64s, 0, render_img(b64s, 0, 1)
461
+
462
+ file.change(
463
+ upload_handler,
464
+ inputs=file,
465
+ outputs=[img_list_state, idx_state, viewer],
466
+ ).then(
467
+ lambda: gr.update(value=1), # 无输入,直接把 zoom 设为 1
468
+ None, # inputs=None
469
+ zoom # outputs=[zoom]
470
+ )
471
+
472
+ def show_prev(b64s, idx, scale):
473
+ idx -= 1
474
+ return idx, render_img(b64s, idx, scale)
475
+
476
+ prev_btn.click(
477
+ show_prev,
478
+ inputs=[img_list_state, idx_state, zoom],
479
+ outputs=[idx_state, viewer],
480
+ )
481
+
482
+ def show_next(b64s, idx, scale):
483
+ idx += 1
484
+ return idx, render_img(b64s, idx, scale)
485
+
486
+ next_btn.click(
487
+ show_next,
488
+ inputs=[img_list_state, idx_state, zoom],
489
+ outputs=[idx_state, viewer],
490
+ )
491
+
492
+ zoom.change(
493
+ lambda b64s, idx, scale: render_img(b64s, idx, scale),
494
+ inputs=[img_list_state, idx_state, zoom],
495
+ outputs=viewer,
496
+ )
497
+
498
 
499
 
500
  change_bu.click(
 
510
  inputs=[file, prompts],
511
  outputs=[md, md_text]
512
  )
513
+
514
+ clear_bu.add([file, md, md_text])
515
+
516
  download_btn.click(
517
  fn=download_markdown_file,
518
  inputs=md_text,