guoshengjian commited on
Commit
c489b93
·
1 Parent(s): a3a3acc
.gitattributes CHANGED
@@ -35,3 +35,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  pp-structurev3_altered.png filter=lfs diff=lfs merge=lfs -text
37
  pp-structurev3.png filter=lfs diff=lfs merge=lfs -text
 
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  pp-structurev3_altered.png filter=lfs diff=lfs merge=lfs -text
37
  pp-structurev3.png filter=lfs diff=lfs merge=lfs -text
38
+ *.jpg filter=lfs diff=lfs merge=lfs -text
39
+ *.png filter=lfs diff=lfs merge=lfs -text
.pre-commit-config.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.6.0
4
+ hooks:
5
+ - id: check-added-large-files
6
+ args: ['--maxkb=11000']
7
+ - id: check-case-conflict
8
+ - id: check-merge-conflict
9
+ - id: check-symlinks
10
+ - id: detect-private-key
11
+ - id: end-of-file-fixer
12
+ - id: trailing-whitespace
13
+ files: \.(md|c|cc|cxx|cpp|cu|h|hpp|hxx|py)$
14
+ - repo: https://github.com/Lucas-C/pre-commit-hooks
15
+ rev: v1.5.1
16
+ hooks:
17
+ - id: remove-crlf
18
+ - id: remove-tabs
19
+ files: \.(md|c|cc|cxx|cpp|cu|h|hpp|hxx|py)$
20
+ - repo: local
21
+ hooks:
22
+ - id: clang-format
23
+ name: clang-format
24
+ description: Format files with ClangFormat
25
+ entry: bash .precommit/clang_format.hook -i
26
+ language: system
27
+ files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
28
+
29
+ # For Python files
30
+ - repo: https://github.com/psf/black.git
31
+ rev: 24.4.2
32
+ hooks:
33
+ - id: black
34
+ files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
35
+
36
+ # Flake8
37
+ - repo: https://github.com/pycqa/flake8
38
+ rev: 7.0.0
39
+ hooks:
40
+ - id: flake8
41
+ args:
42
+ - --count
43
+ - --select=E9,F63,F7,F82,E721,F401
44
+ - --per-file-ignores=__init__.py:F401
45
+ - --show-source
46
+ - --statistics
47
+
48
+ # isort
49
+ - repo: https://github.com/pycqa/isort
50
+ rev: 5.12.0
51
+ hooks:
52
+ - id: isort
53
+ args:
54
+ - --profile=black
README.md CHANGED
@@ -8,7 +8,7 @@ sdk_version: 5.27.1
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
- short_description: Next-Gen High-Precision Doc Parsing Solution
12
  tags:
13
  - ocr
14
  - document-analysis
 
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
+ short_description: Next-Gen High-Precision Doc Parsing Solution
12
  tags:
13
  - ocr
14
  - document-analysis
app.py CHANGED
@@ -5,6 +5,8 @@ import json
5
  import os
6
  import re
7
  import tempfile
 
 
8
  import uuid
9
  import zipfile
10
  from pathlib import Path
@@ -14,8 +16,9 @@ import requests
14
  from PIL import Image
15
 
16
  # API Configuration
17
- API_URL = "https://cf38vaydqdl2l4p2.aistudio-hub.baidu.com/layout-parsing"
18
- TOKEN = os.getenv("API_TOKEN", "")
 
19
 
20
  LOGO_PATH = Path(__file__).parent / "pp-structurev3.png"
21
  with open(LOGO_PATH, "rb") as image_file:
@@ -26,6 +29,11 @@ with open(LOGO_PATH, "rb") as image_file:
26
  TEMP_DIR = tempfile.TemporaryDirectory()
27
  atexit.register(TEMP_DIR.cleanup)
28
 
 
 
 
 
 
29
 
30
  CSS = """
31
  :root {
@@ -46,21 +54,6 @@ body {
46
  font-family: Arial, sans-serif;
47
  }
48
 
49
- .gradio-container {
50
- max-width: var(--content-width) !important;
51
- width: 100% !important;
52
- margin: 20px auto;
53
- padding: 20px;
54
- background-color: var(--white);
55
- }
56
-
57
- #component-0,
58
- #tabs,
59
- #settings {
60
- background-color: var(--white) !important;
61
- padding: 15px;
62
- }
63
-
64
  .upload-section {
65
  width: 100%;
66
  margin: 0 auto 30px;
@@ -131,9 +124,6 @@ button {
131
  border-radius: 4px;
132
  padding: 8px 16px;
133
  }
134
- button:hover {
135
- opacity: 0.8 !important;
136
- }
137
 
138
  .file-download {
139
  margin-top: 15px !important;
@@ -157,9 +147,179 @@ button:hover {
157
  text-align: center;
158
  margin: 20px 0;
159
  }
160
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  MAX_NUM_PAGES = 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
 
165
  def url_to_bytes(url, *, timeout=10):
@@ -225,22 +385,49 @@ def concatenate_markdown_pages(markdown_list):
225
 
226
  def process_file(
227
  file_path,
 
228
  use_formula_recognition,
229
  use_chart_recognition,
230
  use_doc_orientation_classify,
231
  use_doc_unwarping,
232
  use_textline_orientation,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  ):
234
  """Process uploaded file with API"""
235
  try:
236
- if not file_path:
237
  raise ValueError("Please upload a file first")
238
-
239
- if Path(file_path).suffix == ".pdf":
240
- file_type = "pdf"
 
 
241
  else:
 
242
  file_type = "image"
243
-
244
  # Read file content
245
  with open(file_path, "rb") as f:
246
  file_bytes = f.read()
@@ -262,6 +449,30 @@ def process_file(
262
  "useDocOrientationClassify": use_doc_orientation_classify,
263
  "useDocUnwarping": use_doc_unwarping,
264
  "useTextlineOrientation": use_textline_orientation,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  },
266
  headers=headers,
267
  timeout=1000,
@@ -271,16 +482,25 @@ def process_file(
271
  # Parse API response
272
  result = response.json()
273
  layout_results = result.get("result", {}).get("layoutParsingResults", [])
274
-
275
  layout_ordering_images = []
 
 
 
276
  markdown_texts = []
277
  markdown_images = []
278
  markdown_content_list = []
279
  input_images = []
 
280
  for res in layout_results:
281
  layout_ordering_images.append(
282
  url_to_bytes(res["outputImages"]["layout_order_res"])
283
  )
 
 
 
 
 
 
284
  markdown = res["markdown"]
285
  markdown_text = markdown["text"]
286
  markdown_texts.append(markdown_text)
@@ -290,6 +510,7 @@ def process_file(
290
  img_path_to_bytes[path] = url_to_bytes(url)
291
  markdown_images.append(img_path_to_bytes)
292
  input_images.append(url_to_bytes(res["inputImage"]))
 
293
  markdown_content = embed_images_into_markdown_text(
294
  markdown_text, img_path_to_url
295
  )
@@ -306,11 +527,15 @@ def process_file(
306
  "original_file": file_path,
307
  "file_type": file_type,
308
  "layout_ordering_images": layout_ordering_images,
 
 
 
309
  "markdown_texts": markdown_texts,
310
  "markdown_images": markdown_images,
311
  "markdown_content_list": markdown_content_list,
312
  "concatenated_markdown_content": concatenated_markdown_content,
313
  "input_images": input_images,
 
314
  "api_response": result,
315
  }
316
 
@@ -323,6 +548,7 @@ def process_file(
323
  def export_full_results(results):
324
  """Create ZIP file with all analysis results"""
325
  try:
 
326
  if not results:
327
  raise ValueError("No results to export")
328
 
@@ -333,6 +559,17 @@ def export_full_results(results):
333
  for i, img_bytes in enumerate(results["layout_ordering_images"]):
334
  zipf.writestr(f"layout_ordering_images/page_{i+1}.jpg", img_bytes)
335
 
 
 
 
 
 
 
 
 
 
 
 
336
  for i, (md_text, md_imgs) in enumerate(
337
  zip(
338
  results["markdown_texts"],
@@ -352,169 +589,976 @@ def export_full_results(results):
352
 
353
  for i, img_bytes in enumerate(results["input_images"]):
354
  zipf.writestr(f"input_images/page_{i+1}.jpg", img_bytes)
355
-
 
356
  return str(zip_path)
357
 
358
  except Exception as e:
359
  raise gr.Error(f"Error creating ZIP file: {str(e)}")
360
 
361
 
362
- with gr.Blocks(css=CSS, title="Document Analysis System") as demo:
363
- results_state = gr.State()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  # Header with logo
366
  with gr.Column(elem_classes=["logo-container"]):
367
  gr.HTML(f'<img src="{LOGO_BASE64}" class="logo-img">')
368
-
369
- # Navigation bar
370
- with gr.Row(elem_classes=["nav-bar"]):
371
- gr.HTML(
372
  """
373
- <div class="nav-links">
374
- <a href="https://github.com/PaddlePaddle/PaddleOCR" class="nav-link" target="_blank">GitHub</a>
375
- </div>
376
- """
 
 
377
  )
378
-
379
  # Upload section
380
- with gr.Column(elem_classes=["upload-section"]):
381
- file_input = gr.File(
382
- label="Upload Document",
383
- file_types=[".pdf", ".jpg", ".jpeg", ".png"],
384
- type="filepath",
385
- )
386
- with gr.Row():
387
- use_formula_recognition_cb = gr.Checkbox(
388
- value=True, label="Use formula recognition"
389
  )
390
- use_chart_recognition_cb = gr.Checkbox(
391
- value=False, label="Use chart recognition"
 
 
 
 
 
 
392
  )
393
- with gr.Row():
394
- use_doc_orientation_classify_cb = gr.Checkbox(
395
- value=False, label="Use document image orientation classification"
 
 
396
  )
397
- use_doc_unwarping_cb = gr.Checkbox(
398
- value=False, label="Use text image unwarping"
 
 
 
 
 
399
  )
400
- with gr.Row():
401
- use_textline_orientation_cb = gr.Checkbox(
402
- value=False, label="Use text line orientation classification"
403
- )
404
- concatenate_pages_cb = gr.Checkbox(value=True, label="Concatenate pages")
405
- process_btn = gr.Button("Analyze Document", variant="primary")
406
- gr.Markdown(
407
- f"""
408
- 1. Only the first {MAX_NUM_PAGES} pages will be processed.
409
- 2. Some formulas might not display correctly because of renderer limitations.
410
- """
411
- )
412
 
413
- loading_spinner = gr.Column(visible=False, elem_classes=["loader-container"])
414
- with loading_spinner:
415
- gr.HTML(
416
- """
417
- <div class="loader"></div>
418
- <p>Processing, please wait...</p>
419
- """
 
 
 
 
 
420
  )
421
-
422
- # Results display section
423
- with gr.Column():
424
- gr.Markdown("### Results")
425
- with gr.Row():
426
  with gr.Column():
427
- layout_ordering_images = []
428
- for i in range(MAX_NUM_PAGES):
429
- layout_ordering_images.append(
430
- gr.Image(
431
- label=f"Layout Ordering Image {i}",
432
- show_label=True,
433
- visible=False,
434
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  )
436
- with gr.Column():
437
- markdown_display_list = []
438
- for i in range(MAX_NUM_PAGES):
439
- markdown_display_list.append(
440
- gr.Markdown(
441
- visible=False,
442
- container=True,
443
- show_copy_button=True,
444
- latex_delimiters=[
445
- {"left": "$$", "right": "$$", "display": True},
446
- {"left": "$", "right": "$", "display": False},
447
- ],
448
- )
449
- )
450
-
451
- # Download section
452
- with gr.Column(elem_classes=["download-section"]):
453
- gr.Markdown("### Result Export")
454
- download_all_btn = gr.Button("Download Full Results (ZIP)", variant="primary")
455
- download_file = gr.File(visible=False, label="Download File")
456
-
457
- # Interaction logic
458
- def toggle_spinner():
459
- return gr.Column(visible=True)
460
-
461
- def hide_spinner():
462
- return gr.Column(visible=False)
463
-
464
- def update_display(results, concatenate_pages):
465
- if not results:
466
- return gr.skip()
467
 
468
- assert len(results["layout_ordering_images"]) <= MAX_NUM_PAGES, len(
469
- results["layout_ordering_images"]
470
- )
471
- ret_img = []
472
- for img in results["layout_ordering_images"]:
473
- ret_img.append(gr.Image(value=bytes_to_image(img), visible=True))
474
- for _ in range(len(results["layout_ordering_images"]), MAX_NUM_PAGES):
475
- ret_img.append(gr.Image(visible=False))
476
-
477
- if concatenate_pages:
478
- markdown_content = results["concatenated_markdown_content"]
479
- ret_cont = [gr.Markdown(value=markdown_content, visible=True)]
480
- for _ in range(1, MAX_NUM_PAGES):
481
- ret_cont.append(gr.Markdown(visible=False))
482
- else:
483
- assert len(results["markdown_content_list"]) <= MAX_NUM_PAGES, len(
484
- results["markdown_content_list"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
  )
486
- ret_cont = []
487
- for cont in results["markdown_content_list"]:
488
- ret_cont.append(gr.Markdown(value=cont, visible=True))
489
- for _ in range(len(results["markdown_content_list"]), MAX_NUM_PAGES):
490
- ret_cont.append(gr.Markdown(visible=False))
491
- return ret_img + ret_cont
492
-
493
- process_btn.click(toggle_spinner, outputs=[loading_spinner]).then(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  process_file,
495
  inputs=[
496
  file_input,
497
- use_formula_recognition_cb,
498
- use_chart_recognition_cb,
499
- use_doc_orientation_classify_cb,
500
- use_doc_unwarping_cb,
501
- use_textline_orientation_cb,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
502
  ],
503
  outputs=[results_state],
504
- ).then(hide_spinner, outputs=[loading_spinner]).then(
 
 
505
  update_display,
506
- inputs=[results_state, concatenate_pages_cb],
507
- outputs=layout_ordering_images + markdown_display_list,
 
 
 
 
 
 
 
 
 
 
 
508
  )
 
 
 
509
 
510
  download_all_btn.click(
511
  export_full_results, inputs=[results_state], outputs=[download_file]
512
  ).success(lambda: gr.File(visible=True), outputs=[download_file])
513
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
 
515
  if __name__ == "__main__":
 
 
516
  demo.launch(
517
  server_name="0.0.0.0",
518
  server_port=7860,
519
- favicon_path=LOGO_PATH,
520
  )
 
5
  import os
6
  import re
7
  import tempfile
8
+ import threading
9
+ import time
10
  import uuid
11
  import zipfile
12
  from pathlib import Path
 
16
  from PIL import Image
17
 
18
  # API Configuration
19
+ API_URL = os.environ["SERVER_URL"]
20
+
21
+ TOKEN = os.environ["API_TOKEN"]
22
 
23
  LOGO_PATH = Path(__file__).parent / "pp-structurev3.png"
24
  with open(LOGO_PATH, "rb") as image_file:
 
29
  TEMP_DIR = tempfile.TemporaryDirectory()
30
  atexit.register(TEMP_DIR.cleanup)
31
 
32
+ paddle_theme = gr.themes.Soft(
33
+ font=["Roboto", "Open Sans", "Arial", "sans-serif"],
34
+ font_mono=["Fira Code", "monospace"],
35
+ )
36
+
37
 
38
  CSS = """
39
  :root {
 
54
  font-family: Arial, sans-serif;
55
  }
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  .upload-section {
58
  width: 100%;
59
  margin: 0 auto 30px;
 
124
  border-radius: 4px;
125
  padding: 8px 16px;
126
  }
 
 
 
127
 
128
  .file-download {
129
  margin-top: 15px !important;
 
147
  text-align: center;
148
  margin: 20px 0;
149
  }
150
+ .loader-container-prepare {
151
+ text-align: left;
152
+ margin: 20px 0;
153
+ }
154
+ .bold-label .gr-radio {
155
+ margin-top: 8px;
156
+ background-color: var(--white);
157
+ padding: 10px;
158
+ border-radius: 4px;
159
+ }
160
+
161
+ .bold-label .gr-radio label {
162
+ font-size: 14px;
163
+ color: var(--black);
164
+ }
165
+
166
+ #analyze-btn {
167
+ background-color: #FF5722 !important;
168
+ color: white !important;
169
+ transition: all 0.3s ease !important;
170
+ box-shadow: 0 2px 5px rgba(0,0,0,0.2) !important;
171
+ position: fixed !important;
172
+ bottom: 1% !important;
173
+ left: 3% !important;
174
+ z-index: 1000 !important;
175
+ }
176
+
177
+
178
+ #unzip-btn {
179
+ background-color: #4CAF50 !important;
180
+ color: white !important;
181
+ transition: all 0.3s ease !important;
182
+ box-shadow: 0 2px 5px rgba(0,0,0,0.2) !important;
183
+ position: fixed !important;
184
+ bottom: 1% !important;
185
+ left: 18% !important;
186
+ z-index: 1000 !important;
187
+ }
188
+
189
+ #download_file {
190
+ position: fixed !important;
191
+ bottom: 1% !important;
192
+ left: 22% !important;
193
+ z-index: 1000 !important;
194
+ }
195
+
196
+ #analyze-btn:hover,#unzip-btn:hover{
197
+ transform: translateY(-3px) !important;
198
+ box-shadow: 0 4px 8px rgba(0,0,0,0.3) !important;
199
+ }
200
 
201
+ .square-pdf-btn {
202
+ width: 90% !important;
203
+ height: 3% !important;
204
+ padding: 0 !important;
205
+ display: flex !important;
206
+ flex-direction: column !important;
207
+ align-items: center !important;
208
+ justify-content: center !important;
209
+ gap: 8px !important;
210
+ }
211
+
212
+
213
+ .square-pdf-btn img {
214
+ width: 20% !important;
215
+ height: 1% !important;
216
+ margin: 0 !important;
217
+ }
218
+
219
+
220
+ .square-pdf-btn span {
221
+ font-size: 14px !important;
222
+ text-align: center !important;
223
+ }
224
+
225
+
226
+ .gradio-gallery-item:hover {
227
+ background-color: transparent !important;
228
+ filter: none !important;
229
+ transform: none !important;
230
+ }
231
+
232
+ .custom-markdown h3 {
233
+ font-size: 25px !important;
234
+ }
235
+
236
+ .tight-spacing {
237
+ margin-bottom: -20px !important;
238
+ }
239
+
240
+ .tight-spacing-as {
241
+ margin-top: 0px !important;
242
+ margin-bottom: 0px !important;
243
+ }
244
+
245
+ .left-margin-column {
246
+ margin-left: 5%;
247
+ }
248
+
249
+ .image-container img {
250
+ display: inline-block !important;
251
+ }
252
+
253
+ }
254
+ """
255
  MAX_NUM_PAGES = 10
256
+ TMP_DELETE_TIME = 900
257
+ THREAD_WAKEUP_TIME = 600
258
+
259
+ EXAMPLES_TEST = [
260
+ "examples/chinese-formula.jpg",
261
+ "examples/chemical-equation.jpg",
262
+ "examples/formula-chart.jpg",
263
+ "examples/table.jpg",
264
+ "examples/complex-formula.jpg",
265
+ "examples/complex-typeset.jpg",
266
+ "examples/muti-column.jpg",
267
+ "examples/Handwritten.jpg",
268
+ "examples/janpan-paper.jpg",
269
+ "examples/vertical-text.jpg",
270
+ "examples/tradition-chinese.jpg",
271
+ ]
272
+
273
+ DESC_DICT = {
274
+ "concatenate_pages": "Whether to merge pages",
275
+ "use_formula_recognition": "Whether to use formula recognition subpipeline. If used, the formula can be converted into Latex code. If not used, the formula part is the text recognition result.",
276
+ "use_chart_recognition": "Use the PP-Chart2Table model to parse and convert the charts in the document into tables.",
277
+ "use_doc_orientation_classify": "Whether to use the document image orientation classification module. After use, you can correct distorted images, such as wrinkles, tilts, etc.",
278
+ "use_doc_unwarping": "Whether to use the document unwarping module. After use, you can correct distorted images, such as wrinkles, tilts, etc.",
279
+ "use_textline_orientation": "Whether to use the text line orientation classification module to support the distinction and correction of text lines of 0 degrees and 180 degrees.",
280
+ "use_region_detection": "Whether to use the layout region detection. After using it, it can handle complex layouts such as newspapers and magazines.",
281
+ "use_seal_recognition": "Whether to seal text recognition subpipeline. After use, the seal text content in the document can be extracted.",
282
+ "use_table_recognition": "Whether to table recognition subpipeline. If used, the table can be identified as a structured format (such as HTML, Markdown, etc.). If not used, the table part will be in the form of an image.",
283
+ "layout_threshold_nb": "The threshold used to filter out low confidence prediction results for the layout region, ranging from 0 to 1. If there are missed regions, this value can be appropriately lowered.",
284
+ "layout_nms": "Whether the layout region detection model uses NMS post-processing. After using it, nested boxes or those with large intersections can be removed.",
285
+ "layout_unclip_ratio_nb": "Use this method to expand each region of ​​the layout. The larger the value, the larger the expanded region.",
286
+ "text_det_limit_type": "[Short side] means to ensure that the shortest side of the image is not less than [Image side length limit for text detection], and [Long side] means to ensure that the longest side of the image is not greater than [Image side length limit for text detection].",
287
+ "text_det_limit_side_len_nb": "For the side length limit of the text detection input image, for large images with dense text, if you want more accurate recognition, you should choose a larger size. This parameter is used in conjunction with the [Image side length limit type for text detection]. Generally, the maximum [Long side] is suitable for scenes with large images and text, and the minimum [Short side] is suitable for document scenes with small and dense images.",
288
+ "text_det_thresh_nb": "In the output probability map, only pixels with scores greater than the threshold are considered text pixels, and the value range is 0~1.",
289
+ "text_det_box_thresh_nb": "When the average score of all pixels in the detection result border is greater than the threshold, the result will be considered as a text area, and the value range is 0 to 1. If missed detection occurs, this value can be appropriately lowered.",
290
+ "text_det_unclip_ratio_nb": "Use this method to expand the text area. The larger the value, the larger the expanded area.",
291
+ "text_rec_score_thresh_nb": "After text detection, the text box performs text recognition, and the text results with scores greater than the threshold will be retained. The value range is 0~1.",
292
+ "seal_det_limit_type": "[Short side] means ensuring that the shortest side of the image is not less than [Image side length limit for seal text recognition], and [Long side] means ensuring that the longest side of the image is not greater than [Image side length limit for seal text recognition].",
293
+ "seal_det_limit_side_len_nb": "For the side length limit of the input image for seal text detection, for large images with dense text, if you want more accurate recognition, you should choose a larger size. This parameter is used in conjunction with [Image side length limit type for seal text detection]. Generally, the maximum [Long side] is suitable for scenes with large images and text, and the minimum [Short side] is suitable for document scenes with small and dense images and text.",
294
+ "seal_det_thresh_nb": "In the output probability map, only pixels with scores greater than the threshold are considered text pixels, and the value range is 0~1.",
295
+ "seal_det_box_thresh_nb": "When the average score of all pixels within the detection result border is greater than the threshold, the result will be considered as a text area, and the value range is 0~1.",
296
+ "seal_det_unclip_ratio_nb": "Use this method to expand the seal text area. The larger the value, the larger the expanded area.",
297
+ "seal_rec_score_thresh_nb": "After the seal text is detected, the text box is subjected to text recognition. The text results with scores greater than the threshold will be retained. The value range is 0~1.",
298
+ "use_ocr_results_with_table_cells": "Whether to enable the cell OCR mode. If not enabled, the global OCR result is used to fill the HTML table. If enabled, OCR is performed cell by cell and filled into the HTML table (which will increase the time consumption).",
299
+ "use_e2e_wired_table_rec_model": "Whether to enable the wired table end-to-end prediction mode. If not enabled, the table cell detection model prediction results are used to fill the HTML table. If enabled, the end-to-end table structure recognition model cell prediction results are used to fill the HTML table.",
300
+ "use_e2e_wireless_table_rec_model": "Whether to enable the wireless table end-to-end prediction mode. If not enabled, the table cell detection model prediction results are used to fill the HTML table. If enabled, the end-to-end table structure recognition model cell prediction results are used to fill the HTML table.",
301
+ "use_wired_table_cells_trans_to_html": "The wired table cell detection results are directly converted to HTML. The wired table structure recognition model is no longer used to predict the HTML structure. Instead, HTML is directly constructed based on the geometric relationship of the wired table cell detection results.",
302
+ "use_wireless_table_cells_trans_to_html": "The wireless table cell detection results are directly converted to HTML. The wireless table structure recognition model is no longer used to predict the HTML structure. Instead, HTML is directly constructed based on the geometric relationship of the wireless table cell detection results.",
303
+ "use_table_orientation_classify": "Using table orientation classification, when the table in the image is rotated 90/180/270 degrees, the orientation can be corrected and the table recognition can be completed correctly.",
304
+ }
305
+ tmp_time = {}
306
+ lock = threading.Lock()
307
+
308
+
309
+ def gen_tooltip_radio(desc_dict):
310
+ tooltip = {}
311
+ for key, desc in desc_dict.items():
312
+ suffixes = ["_rd", "_md"]
313
+ if key.endswith("_nb"):
314
+ suffix = "_nb"
315
+ suffixes = ["_nb", "_md"]
316
+ key = key[: -len(suffix)]
317
+ for suffix in suffixes:
318
+ tooltip[f"{key}{suffix}"] = desc
319
+ return tooltip
320
+
321
+
322
+ TOOLTIP_RADIO = gen_tooltip_radio(DESC_DICT)
323
 
324
 
325
  def url_to_bytes(url, *, timeout=10):
 
385
 
386
  def process_file(
387
  file_path,
388
+ image_input,
389
  use_formula_recognition,
390
  use_chart_recognition,
391
  use_doc_orientation_classify,
392
  use_doc_unwarping,
393
  use_textline_orientation,
394
+ use_region_detection,
395
+ use_seal_recognition,
396
+ use_table_recognition,
397
+ layout_threshold,
398
+ layout_nms,
399
+ layout_unclip_ratio,
400
+ text_det_limit_type,
401
+ text_det_limit_side_len,
402
+ text_det_thresh,
403
+ text_det_box_thresh,
404
+ text_det_unclip_ratio,
405
+ text_rec_score_thresh,
406
+ seal_det_limit_type,
407
+ seal_det_limit_side_len,
408
+ seal_det_thresh,
409
+ seal_det_box_thresh,
410
+ seal_det_unclip_ratio,
411
+ seal_rec_score_thresh,
412
+ use_ocr_results_with_table_cells,
413
+ use_e2e_wired_table_rec_model,
414
+ use_e2e_wireless_table_rec_model,
415
+ use_wired_table_cells_trans_to_html,
416
+ use_wireless_table_cells_trans_to_html,
417
+ use_table_orientation_classify,
418
  ):
419
  """Process uploaded file with API"""
420
  try:
421
+ if not file_path and not image_input:
422
  raise ValueError("Please upload a file first")
423
+ if file_path:
424
+ if Path(file_path).suffix == ".pdf":
425
+ file_type = "pdf"
426
+ else:
427
+ file_type = "image"
428
  else:
429
+ file_path = image_input
430
  file_type = "image"
 
431
  # Read file content
432
  with open(file_path, "rb") as f:
433
  file_bytes = f.read()
 
449
  "useDocOrientationClassify": use_doc_orientation_classify,
450
  "useDocUnwarping": use_doc_unwarping,
451
  "useTextlineOrientation": use_textline_orientation,
452
+ "useSealRecognition": use_seal_recognition,
453
+ "useRegionDetection": use_region_detection,
454
+ "useTableRecognition": use_table_recognition,
455
+ "layoutThreshold": layout_threshold,
456
+ "layoutNms": layout_nms,
457
+ "layoutUnclipRatio": layout_unclip_ratio,
458
+ "textDetLimitType": text_det_limit_type,
459
+ "textTetLimitSideLen": text_det_limit_side_len,
460
+ "textDetThresh": text_det_thresh,
461
+ "textDetBoxThresh": text_det_box_thresh,
462
+ "textDetUnclipRatio": text_det_unclip_ratio,
463
+ "textRecScoreThresh": text_rec_score_thresh,
464
+ "sealDetLimitType": seal_det_limit_type,
465
+ "sealDetLimitSideLen": seal_det_limit_side_len,
466
+ "sealDetThresh": seal_det_thresh,
467
+ "sealDetBoxThresh": seal_det_box_thresh,
468
+ "sealDetUnclipRatio": seal_det_unclip_ratio,
469
+ "sealRecScoreThresh": seal_rec_score_thresh,
470
+ "useOcrResultsWithTableCells": use_ocr_results_with_table_cells,
471
+ "useE2eWiredTableRecModel": use_e2e_wired_table_rec_model,
472
+ "useE2eWirelessTableRecModel": use_e2e_wireless_table_rec_model,
473
+ "useWiredTableCellsTransToHtml": use_wired_table_cells_trans_to_html,
474
+ "useWirelessWableCellsTransToHtml": use_wireless_table_cells_trans_to_html,
475
+ "useTableOrientationClassify": use_table_orientation_classify,
476
  },
477
  headers=headers,
478
  timeout=1000,
 
482
  # Parse API response
483
  result = response.json()
484
  layout_results = result.get("result", {}).get("layoutParsingResults", [])
 
485
  layout_ordering_images = []
486
+ layout_det_res_images = []
487
+ overall_ocr_res_images = []
488
+ output_json = result.get("result", {})
489
  markdown_texts = []
490
  markdown_images = []
491
  markdown_content_list = []
492
  input_images = []
493
+ input_images_gallery = []
494
  for res in layout_results:
495
  layout_ordering_images.append(
496
  url_to_bytes(res["outputImages"]["layout_order_res"])
497
  )
498
+ layout_det_res_images.append(
499
+ url_to_bytes(res["outputImages"]["layout_det_res"])
500
+ )
501
+ overall_ocr_res_images.append(
502
+ url_to_bytes(res["outputImages"]["overall_ocr_res"])
503
+ )
504
  markdown = res["markdown"]
505
  markdown_text = markdown["text"]
506
  markdown_texts.append(markdown_text)
 
510
  img_path_to_bytes[path] = url_to_bytes(url)
511
  markdown_images.append(img_path_to_bytes)
512
  input_images.append(url_to_bytes(res["inputImage"]))
513
+ input_images_gallery.append(res["inputImage"])
514
  markdown_content = embed_images_into_markdown_text(
515
  markdown_text, img_path_to_url
516
  )
 
527
  "original_file": file_path,
528
  "file_type": file_type,
529
  "layout_ordering_images": layout_ordering_images,
530
+ "layout_det_res_images": layout_det_res_images,
531
+ "overall_ocr_res_images": overall_ocr_res_images,
532
+ "output_json": output_json,
533
  "markdown_texts": markdown_texts,
534
  "markdown_images": markdown_images,
535
  "markdown_content_list": markdown_content_list,
536
  "concatenated_markdown_content": concatenated_markdown_content,
537
  "input_images": input_images,
538
+ "input_images_gallery": input_images_gallery,
539
  "api_response": result,
540
  }
541
 
 
548
  def export_full_results(results):
549
  """Create ZIP file with all analysis results"""
550
  try:
551
+ global tmp_time
552
  if not results:
553
  raise ValueError("No results to export")
554
 
 
559
  for i, img_bytes in enumerate(results["layout_ordering_images"]):
560
  zipf.writestr(f"layout_ordering_images/page_{i+1}.jpg", img_bytes)
561
 
562
+ for i, img_bytes in enumerate(results["layout_det_res_images"]):
563
+ zipf.writestr(f"layout_det_res_images/page_{i+1}.jpg", img_bytes)
564
+
565
+ for i, img_bytes in enumerate(results["overall_ocr_res_images"]):
566
+ zipf.writestr(f"overall_ocr_res_images/page_{i+1}.jpg", img_bytes)
567
+
568
+ zipf.writestr(
569
+ "output.json",
570
+ json.dumps(results["output_json"], indent=2, ensure_ascii=False),
571
+ )
572
+
573
  for i, (md_text, md_imgs) in enumerate(
574
  zip(
575
  results["markdown_texts"],
 
589
 
590
  for i, img_bytes in enumerate(results["input_images"]):
591
  zipf.writestr(f"input_images/page_{i+1}.jpg", img_bytes)
592
+ with lock:
593
+ tmp_time[zip_path] = time.time()
594
  return str(zip_path)
595
 
596
  except Exception as e:
597
  raise gr.Error(f"Error creating ZIP file: {str(e)}")
598
 
599
 
600
+ def on_file_change(file):
601
+ if file:
602
+ return gr.Textbox(
603
+ value=f"✅ Chosen file: {os.path.basename(file.name)}", visible=True
604
+ )
605
+ else:
606
+ return gr.Textbox()
607
+
608
+
609
+ def clear_file_selection():
610
+ return gr.File(value=None), gr.Textbox(value=None)
611
+
612
+
613
+ def clear_file_selection_examples(image_input):
614
+ text_name = "✅ Chosen file: " + os.path.basename(image_input)
615
+ return gr.File(value=None), gr.Textbox(value=text_name, visible=True)
616
+
617
+
618
+ def toggle_sections(choice):
619
+ return {
620
+ Module_Options: gr.Column(visible=(choice == "Module Options")),
621
+ Subpipeline_Options: gr.Column(visible=(choice == "Subpipeline Options")),
622
+ Layout_region_detection_Options: gr.Column(
623
+ visible=(choice == "Layout region detection Options")
624
+ ),
625
+ Text_detection_Options: gr.Column(visible=(choice == "Text detection Options")),
626
+ Seal_text_recognition_Options: gr.Column(
627
+ visible=(choice == "Seal text recognition Options")
628
+ ),
629
+ Table_recognition_Options: gr.Column(
630
+ visible=(choice == "Table recognition Options")
631
+ ),
632
+ }
633
+
634
+
635
+ # Interaction logic
636
+ def toggle_spinner():
637
+ return (
638
+ gr.Column(visible=True),
639
+ gr.Column(visible=False),
640
+ gr.File(visible=False),
641
+ gr.update(visible=False),
642
+ )
643
+
644
+
645
+ def hide_spinner():
646
+ return gr.Column(visible=False), gr.update(visible=True)
647
+
648
+
649
+ def update_display(results, concatenate_pages):
650
+ if not results:
651
+ return gr.skip()
652
 
653
+ assert len(results["layout_ordering_images"]) <= MAX_NUM_PAGES, len(
654
+ results["layout_ordering_images"]
655
+ )
656
+ assert len(results["layout_det_res_images"]) <= MAX_NUM_PAGES, len(
657
+ results["layout_det_res_images"]
658
+ )
659
+ assert len(results["overall_ocr_res_images"]) <= MAX_NUM_PAGES, len(
660
+ results["overall_ocr_res_images"]
661
+ )
662
+ assert len(results["input_images_gallery"]) <= MAX_NUM_PAGES, len(
663
+ results["input_images_gallery"]
664
+ )
665
+ gallery_list_imgs = []
666
+ for i in range(len(gallery_list)):
667
+ gallery_list_imgs.append(
668
+ gr.Gallery(
669
+ value=results["input_images_gallery"],
670
+ rows=len(results["input_images_gallery"]),
671
+ )
672
+ )
673
+
674
+ layout_order_imgs = []
675
+ for img in results["layout_ordering_images"]:
676
+ layout_order_imgs.append(gr.Image(value=bytes_to_image(img), visible=True))
677
+ for _ in range(len(results["layout_ordering_images"]), MAX_NUM_PAGES):
678
+ layout_order_imgs.append(gr.Image(visible=False))
679
+
680
+ layout_det_imgs = []
681
+ for img in results["layout_det_res_images"]:
682
+ layout_det_imgs.append(gr.Image(value=bytes_to_image(img), visible=True))
683
+ for _ in range(len(results["layout_det_res_images"]), MAX_NUM_PAGES):
684
+ layout_det_imgs.append(gr.Image(visible=False))
685
+
686
+ ocr_imgs = []
687
+ for img in results["overall_ocr_res_images"]:
688
+ ocr_imgs.append(gr.Image(value=bytes_to_image(img), visible=True))
689
+ for _ in range(len(results["overall_ocr_res_images"]), MAX_NUM_PAGES):
690
+ ocr_imgs.append(gr.Image(visible=False))
691
+
692
+ output_json = [gr.Markdown(value=results["output_json"], visible=True)]
693
+
694
+ if concatenate_pages:
695
+ markdown_content = results["concatenated_markdown_content"]
696
+ ret_cont = [gr.Markdown(value=markdown_content, visible=True)]
697
+ for _ in range(1, MAX_NUM_PAGES):
698
+ ret_cont.append(gr.Markdown(visible=False))
699
+ else:
700
+ assert len(results["markdown_content_list"]) <= MAX_NUM_PAGES, len(
701
+ results["markdown_content_list"]
702
+ )
703
+ ret_cont = []
704
+ for cont in results["markdown_content_list"]:
705
+ ret_cont.append(gr.Markdown(value=cont, visible=True))
706
+ for _ in range(len(results["markdown_content_list"]), MAX_NUM_PAGES):
707
+ ret_cont.append(gr.Markdown(visible=False))
708
+ return (
709
+ layout_order_imgs
710
+ + layout_det_imgs
711
+ + ocr_imgs
712
+ + output_json
713
+ + ret_cont
714
+ + gallery_list_imgs
715
+ )
716
+
717
+
718
+ def update_image(evt: gr.SelectData):
719
+ update_images = []
720
+ for index in range(MAX_NUM_PAGES):
721
+ update_images.append(
722
+ gr.Image(visible=False) if index != evt.index else gr.Image(visible=True)
723
+ )
724
+ return update_images
725
+
726
+
727
+ def update_markdown(concatenate_pages, evt: gr.SelectData):
728
+ update_markdowns = []
729
+ if not concatenate_pages:
730
+ for index in range(MAX_NUM_PAGES):
731
+ update_markdowns.append(
732
+ gr.Markdown(visible=False)
733
+ if index != evt.index
734
+ else gr.Markdown(visible=True)
735
+ )
736
+ else:
737
+ gr.Warning(
738
+ "When page merging is on, the thumbnail-to-page linking is disabled. If you want to navigate to the corresponding page when clicking on the thumbnail, please turn off page merging."
739
+ )
740
+ for index in range(MAX_NUM_PAGES):
741
+ update_markdowns.append(
742
+ gr.Markdown(visible=True) if index == 0 else gr.Markdown(visible=False)
743
+ )
744
+ return update_markdowns
745
+
746
+
747
+ def delete_file_periodically():
748
+ global tmp_time
749
+ while True:
750
+ current_time = time.time()
751
+ delete_tmp = []
752
+ for filename, strat_time in list(tmp_time.items()):
753
+ if (current_time - strat_time) >= TMP_DELETE_TIME:
754
+ if os.path.exists(filename):
755
+ os.remove(filename)
756
+ delete_tmp.append(filename)
757
+ for filename in delete_tmp:
758
+ with lock:
759
+ del tmp_time[filename]
760
+ time.sleep(THREAD_WAKEUP_TIME)
761
+
762
+
763
+ with gr.Blocks(css=CSS, title="Document Analysis System", theme=paddle_theme) as demo:
764
+ results_state = gr.State()
765
  # Header with logo
766
  with gr.Column(elem_classes=["logo-container"]):
767
  gr.HTML(f'<img src="{LOGO_BASE64}" class="logo-img">')
768
+ gr.Markdown(
 
 
 
769
  """
770
+ Since our inference server is deployed in mainland China, cross-border
771
+ network transmission may be slow, which could result in a suboptimal experience on Hugging Face.
772
+ We recommend visiting the [PaddlePaddle AI Studio Community](https://aistudio.baidu.com/community/app/518494/webUI?source=appCenter) to try the demo for a smoother experience.
773
+ """,
774
+ elem_classes=["tight-spacing-as"],
775
+ visible=True,
776
  )
 
777
  # Upload section
778
+ with gr.Row():
779
+ with gr.Column(scale=4):
780
+ file_input = gr.File(
781
+ label="Upload document",
782
+ file_types=[".pdf", ".jpg", ".jpeg", ".png"],
783
+ type="filepath",
784
+ visible=False,
 
 
785
  )
786
+ file_select = gr.Textbox(label="Select File Path", visible=False)
787
+ image_input = gr.Image(
788
+ label="Image",
789
+ sources="upload",
790
+ type="filepath",
791
+ visible=False,
792
+ interactive=True,
793
+ placeholder="Click to upload image...",
794
  )
795
+ pdf_btn = gr.Button(
796
+ "Click to upload file...",
797
+ variant="primary",
798
+ icon="icon/upload.png",
799
+ elem_classes=["square-pdf-btn"],
800
  )
801
+ examples_image = gr.Examples(
802
+ fn=clear_file_selection_examples,
803
+ inputs=image_input,
804
+ outputs=[file_input, file_select],
805
+ examples_per_page=11,
806
+ examples=EXAMPLES_TEST,
807
+ run_on_click=True,
808
  )
 
 
 
 
 
 
 
 
 
 
 
 
809
 
810
+ file_input.change(
811
+ fn=on_file_change, inputs=file_input, outputs=[file_select]
812
+ )
813
+ concatenate_pages_md = gr.Markdown(
814
+ "### Merge pages", elem_id="concatenate_pages_md"
815
+ )
816
+ concatenate_pages_rd = gr.Radio(
817
+ choices=[("yes", True), ("no", False)],
818
+ value=False,
819
+ interactive=True,
820
+ show_label=False,
821
+ elem_id="concatenate_pages_rd",
822
  )
 
 
 
 
 
823
  with gr.Column():
824
+ section_choice = gr.Dropdown(
825
+ choices=[
826
+ "Module Options",
827
+ "Subpipeline Options",
828
+ "Layout region detection Options",
829
+ "Text detection Options",
830
+ "Seal text recognition Options",
831
+ "Table recognition Options",
832
+ ],
833
+ value="Module Options",
834
+ label="Advance Options",
835
+ show_label=True,
836
+ container=True,
837
+ scale=0,
838
+ elem_classes=["tight-spacing"],
839
+ )
840
+ with gr.Column(
841
+ visible=True, elem_classes="left-margin-column"
842
+ ) as Module_Options:
843
+ use_chart_recognition_md = gr.Markdown(
844
+ "### Using the chart parsing module",
845
+ elem_id="use_chart_recognition_md",
846
+ )
847
+ use_chart_recognition_rd = gr.Radio(
848
+ choices=[("yes", True), ("no", False)],
849
+ value=False,
850
+ interactive=True,
851
+ show_label=False,
852
+ elem_id="use_chart_recognition_rd",
853
+ )
854
+ use_region_detection_md = gr.Markdown(
855
+ "### Using the layout region detection module",
856
+ elem_id="use_region_detection_md",
857
+ )
858
+ use_region_detection_rd = gr.Radio(
859
+ choices=[("yes", True), ("no", False)],
860
+ value=True,
861
+ interactive=True,
862
+ show_label=False,
863
+ elem_id="use_region_detection_rd",
864
+ )
865
+ use_doc_orientation_classify_md = gr.Markdown(
866
+ "### Using the document image orientation classification module",
867
+ elem_id="use_doc_orientation_classify_md",
868
+ )
869
+ use_doc_orientation_classify_rd = gr.Radio(
870
+ choices=[("yes", True), ("no", False)],
871
+ value=False,
872
+ interactive=True,
873
+ show_label=False,
874
+ elem_id="use_doc_orientation_classify_rd",
875
+ )
876
+ use_doc_unwarping_md = gr.Markdown(
877
+ "### Using the document unwarping module",
878
+ elem_id="use_doc_unwarping_md",
879
+ )
880
+ use_doc_unwarping_rd = gr.Radio(
881
+ choices=[("yes", True), ("no", False)],
882
+ value=False,
883
+ interactive=True,
884
+ show_label=False,
885
+ elem_id="use_doc_unwarping_rd",
886
+ )
887
+ use_textline_orientation_md = gr.Markdown(
888
+ "### Using the text line orientation classification module",
889
+ elem_id="use_textline_orientation_md",
890
+ )
891
+ use_textline_orientation_rd = gr.Radio(
892
+ choices=[("yes", True), ("no", False)],
893
+ value=False,
894
+ interactive=True,
895
+ show_label=False,
896
+ elem_id="use_textline_orientation_rd",
897
+ )
898
+ with gr.Column(
899
+ visible=False, elem_classes="left-margin-column"
900
+ ) as Subpipeline_Options:
901
+ use_seal_recognition_md = gr.Markdown(
902
+ "### Using the seal text recognition subpipeline",
903
+ elem_id="use_seal_recognition_md",
904
+ )
905
+ use_seal_recognition_rd = gr.Radio(
906
+ choices=[("yes", True), ("no", False)],
907
+ value=True,
908
+ interactive=True,
909
+ show_label=False,
910
+ elem_id="use_seal_recognition_rd",
911
+ )
912
+ use_formula_recognition_md = gr.Markdown(
913
+ "### Using the formula recognition subpipeline",
914
+ elem_id="use_formula_recognition_md",
915
+ )
916
+ use_formula_recognition_rd = gr.Radio(
917
+ choices=[("yes", True), ("no", False)],
918
+ value=True,
919
+ interactive=True,
920
+ show_label=False,
921
+ elem_id="use_formula_recognition_rd",
922
+ )
923
+ use_table_recognition_md = gr.Markdown(
924
+ "### Using the table recognition subpipeline",
925
+ elem_id="use_table_recognition_md",
926
+ )
927
+ use_table_recognition_rd = gr.Radio(
928
+ choices=[("yes", True), ("no", False)],
929
+ value=True,
930
+ interactive=True,
931
+ show_label=False,
932
+ elem_id="use_table_recognition_rd",
933
+ )
934
+ with gr.Column(
935
+ visible=False, elem_classes="left-margin-column"
936
+ ) as Layout_region_detection_Options:
937
+ layout_threshold_md = gr.Markdown(
938
+ "### Score threshold of layout region detection model",
939
+ elem_id="layout_threshold_md",
940
+ )
941
+ layout_threshold_nb = gr.Number(
942
+ value=0.5,
943
+ step=0.1,
944
+ minimum=0,
945
+ maximum=1,
946
+ interactive=True,
947
+ show_label=False,
948
+ elem_id="layout_threshold_nb",
949
+ )
950
+ layout_nms_md = gr.Markdown(
951
+ "### NMS post-processing of layout region detection",
952
+ elem_id="layout_nms_md",
953
+ )
954
+ layout_nms_rd = gr.Radio(
955
+ choices=[("yes", True), ("no", False)],
956
+ value=True,
957
+ interactive=True,
958
+ show_label=False,
959
+ elem_id="layout_nms_rd",
960
+ )
961
+ layout_unclip_ratio_md = gr.Markdown(
962
+ "### Layout region detection expansion coefficient",
963
+ elem_id="layout_unclip_ratio_md",
964
+ )
965
+ layout_unclip_ratio_nb = gr.Number(
966
+ value=1.0,
967
+ step=0.1,
968
+ minimum=0,
969
+ maximum=10.0,
970
+ interactive=True,
971
+ show_label=False,
972
+ elem_id="layout_unclip_ratio_nb",
973
+ )
974
+ with gr.Column(
975
+ visible=False, elem_classes="left-margin-column"
976
+ ) as Text_detection_Options:
977
+ text_det_limit_type_md = gr.Markdown(
978
+ "### Image side length restriction type for text detection",
979
+ elem_id="text_det_limit_type_md",
980
+ )
981
+ text_det_limit_type_rd = gr.Radio(
982
+ choices=[("Short side", "min"), ("Long side", "max")],
983
+ value="min",
984
+ interactive=True,
985
+ show_label=False,
986
+ elem_id="text_det_limit_type_rd",
987
+ )
988
+ text_det_limit_side_len_md = gr.Markdown(
989
+ "### Image side length limitation for text detection",
990
+ elem_id="text_det_limit_side_len_md",
991
+ )
992
+ text_det_limit_side_len_nb = gr.Number(
993
+ value=736,
994
+ step=1,
995
+ minimum=0,
996
+ maximum=10000,
997
+ interactive=True,
998
+ show_label=False,
999
+ elem_id="text_det_limit_side_len_nb",
1000
+ )
1001
+ text_det_thresh_md = gr.Markdown(
1002
+ "### Text detection pixel threshold",
1003
+ elem_id="text_det_thresh_md",
1004
+ )
1005
+ text_det_thresh_nb = gr.Number(
1006
+ value=0.30,
1007
+ step=0.01,
1008
+ minimum=0.00,
1009
+ maximum=1.00,
1010
+ interactive=True,
1011
+ show_label=False,
1012
+ elem_id="text_det_thresh_nb",
1013
+ )
1014
+ text_det_box_thresh_md = gr.Markdown(
1015
+ "### Text detection box threshold",
1016
+ elem_id="text_det_box_thresh_md",
1017
+ )
1018
+ text_det_box_thresh_nb = gr.Number(
1019
+ value=0.60,
1020
+ step=0.01,
1021
+ minimum=0.00,
1022
+ maximum=1.00,
1023
+ interactive=True,
1024
+ show_label=False,
1025
+ elem_id="text_det_box_thresh_nb",
1026
+ )
1027
+ text_det_unclip_ratio_md = gr.Markdown(
1028
+ "### Text detection unclip ratio",
1029
+ elem_id="text_det_unclip_ratio_md",
1030
+ )
1031
+ text_det_unclip_ratio_nb = gr.Number(
1032
+ value=1.5,
1033
+ step=0.1,
1034
+ minimum=0,
1035
+ maximum=10.0,
1036
+ interactive=True,
1037
+ show_label=False,
1038
+ elem_id="text_det_unclip_ratio_nb",
1039
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1040
 
1041
+ text_rec_score_thresh_md = gr.Markdown(
1042
+ "### Text recognition score threshold",
1043
+ elem_id="text_rec_score_thresh_md",
1044
+ )
1045
+ text_rec_score_thresh_nb = gr.Number(
1046
+ value=0.00,
1047
+ step=0.01,
1048
+ minimum=0,
1049
+ maximum=1.00,
1050
+ interactive=True,
1051
+ show_label=False,
1052
+ elem_id="text_rec_score_thresh_nb",
1053
+ )
1054
+
1055
+ with gr.Column(
1056
+ visible=False, elem_classes="left-margin-column"
1057
+ ) as Seal_text_recognition_Options:
1058
+ seal_det_limit_type_md = gr.Markdown(
1059
+ "### Image side length restriction type for seal text detection",
1060
+ elem_id="seal_det_limit_type_md",
1061
+ )
1062
+ seal_det_limit_type_rd = gr.Radio(
1063
+ choices=[("Short side", "min"), ("Long side", "max")],
1064
+ value="min",
1065
+ interactive=True,
1066
+ show_label=False,
1067
+ elem_id="seal_det_limit_type_rd",
1068
+ )
1069
+ seal_det_limit_side_len_md = gr.Markdown(
1070
+ "### Image side length limitation for seal text detection",
1071
+ elem_id="seal_det_limit_side_len_md",
1072
+ )
1073
+ seal_det_limit_side_len_nb = gr.Number(
1074
+ value=736,
1075
+ step=1,
1076
+ minimum=0,
1077
+ maximum=10000,
1078
+ interactive=True,
1079
+ show_label=False,
1080
+ elem_id="seal_det_limit_side_len_nb",
1081
+ )
1082
+ seal_det_thresh_md = gr.Markdown(
1083
+ "### Pixel threshold for seal text detection",
1084
+ elem_id="seal_det_thresh_md",
1085
+ )
1086
+ seal_det_thresh_nb = gr.Number(
1087
+ value=0.20,
1088
+ step=0.01,
1089
+ minimum=0.00,
1090
+ maximum=1.00,
1091
+ interactive=True,
1092
+ show_label=False,
1093
+ elem_id="seal_det_thresh_nb",
1094
+ )
1095
+ seal_det_box_thresh_md = gr.Markdown(
1096
+ "### Seal text detection box threshold",
1097
+ elem_id="seal_det_box_thresh_md",
1098
+ )
1099
+ seal_det_box_thresh_nb = gr.Number(
1100
+ value=0.60,
1101
+ step=0.01,
1102
+ minimum=0.00,
1103
+ maximum=1.00,
1104
+ interactive=True,
1105
+ show_label=False,
1106
+ elem_id="seal_det_box_thresh_nb",
1107
+ )
1108
+ seal_det_unclip_ratio_md = gr.Markdown(
1109
+ "### Seal text detection unclip ratio",
1110
+ elem_id="seal_det_unclip_ratio_md",
1111
+ )
1112
+ seal_det_unclip_ratio_nb = gr.Number(
1113
+ value=0.5,
1114
+ step=0.1,
1115
+ minimum=0,
1116
+ maximum=10.0,
1117
+ interactive=True,
1118
+ show_label=False,
1119
+ elem_id="seal_det_unclip_ratio_nb",
1120
+ )
1121
+ seal_rec_score_thresh_md = gr.Markdown(
1122
+ "### Seal text detection threshold",
1123
+ elem_id="seal_rec_score_thresh_md",
1124
+ )
1125
+ seal_rec_score_thresh_nb = gr.Number(
1126
+ value=0.00,
1127
+ step=0.01,
1128
+ minimum=0,
1129
+ maximum=1.00,
1130
+ interactive=True,
1131
+ show_label=False,
1132
+ elem_id="seal_rec_score_thresh_nb",
1133
+ )
1134
+ with gr.Column(
1135
+ visible=False, elem_classes="left-margin-column"
1136
+ ) as Table_recognition_Options:
1137
+ use_ocr_results_with_table_cells_md = gr.Markdown(
1138
+ "### Cell OCR mode",
1139
+ elem_id="use_ocr_results_with_table_cells_md",
1140
+ )
1141
+ use_ocr_results_with_table_cells_rd = gr.Radio(
1142
+ choices=[("yes", True), ("no", False)],
1143
+ value=True,
1144
+ interactive=True,
1145
+ show_label=False,
1146
+ elem_id="use_ocr_results_with_table_cells_rd",
1147
+ )
1148
+ use_e2e_wired_table_rec_model_md = gr.Markdown(
1149
+ "### Wired Table End-to-End Prediction model",
1150
+ elem_id="use_e2e_wired_table_rec_model_md",
1151
+ )
1152
+ use_e2e_wired_table_rec_model_rd = gr.Radio(
1153
+ choices=[("yes", True), ("no", False)],
1154
+ value=False,
1155
+ interactive=True,
1156
+ show_label=False,
1157
+ elem_id="use_e2e_wired_table_rec_model_rd",
1158
+ )
1159
+ use_e2e_wireless_table_rec_model_md = gr.Markdown(
1160
+ "### Wireless Table End-to-End Prediction model",
1161
+ elem_id="use_e2e_wireless_table_rec_model_md",
1162
+ )
1163
+ use_e2e_wireless_table_rec_model_rd = gr.Radio(
1164
+ choices=[("yes", True), ("no", False)],
1165
+ value=False,
1166
+ interactive=True,
1167
+ show_label=False,
1168
+ elem_id="use_e2e_wireless_table_rec_model_rd",
1169
+ )
1170
+ use_wired_table_cells_trans_to_html_md = gr.Markdown(
1171
+ "### Wired table to HTML mode",
1172
+ elem_id="use_wired_table_cells_trans_to_html_md",
1173
+ )
1174
+ use_wired_table_cells_trans_to_html_rd = gr.Radio(
1175
+ choices=[("yes", True), ("no", False)],
1176
+ value=False,
1177
+ interactive=True,
1178
+ show_label=False,
1179
+ elem_id="use_wired_table_cells_trans_to_html_rd",
1180
+ )
1181
+ use_wireless_table_cells_trans_to_html_md = gr.Markdown(
1182
+ "### Wireless table to HTML mode",
1183
+ elem_id="use_wireless_table_cells_trans_to_html_md",
1184
+ )
1185
+ use_wireless_table_cells_trans_to_html_rd = gr.Radio(
1186
+ choices=[("yes", True), ("no", False)],
1187
+ value=False,
1188
+ interactive=True,
1189
+ show_label=False,
1190
+ elem_id="use_wireless_table_cells_trans_to_html_rd",
1191
+ )
1192
+ use_table_orientation_classify_md = gr.Markdown(
1193
+ "### Using table orientation classify module",
1194
+ elem_id="use_table_orientation_classify_md",
1195
+ )
1196
+ use_table_orientation_classify_rd = gr.Radio(
1197
+ choices=[("yes", True), ("no", False)],
1198
+ value=True,
1199
+ interactive=True,
1200
+ show_label=False,
1201
+ elem_id="use_table_orientation_classify_rd",
1202
+ )
1203
+ with gr.Row():
1204
+ process_btn = gr.Button(
1205
+ "🚀 Parse Document", elem_id="analyze-btn", variant="primary"
1206
+ )
1207
+ download_all_btn = gr.Button(
1208
+ "📦 Download Full Results (ZIP)",
1209
+ elem_id="unzip-btn",
1210
+ variant="primary",
1211
+ )
1212
+ gr.Markdown(
1213
+ f"""
1214
+ 1. Only the first {MAX_NUM_PAGES} pages will be processed.
1215
+ 2. Some formulas might not display correctly because of renderer limitations or syntax errors.
1216
+ """
1217
+ )
1218
+
1219
+ # Results display section
1220
+ with gr.Column(scale=7):
1221
+ gr.Markdown("### Results", elem_classes="custom-markdown")
1222
+ loading_spinner = gr.Column(
1223
+ visible=False, elem_classes=["loader-container"]
1224
+ )
1225
+ with loading_spinner:
1226
+ gr.HTML(
1227
+ """
1228
+ <div class="loader"></div>
1229
+ <p>Processing, please wait...</p>
1230
+ """
1231
+ )
1232
+ prepare_spinner = gr.Column(
1233
+ visible=True, elem_classes=["loader-container-prepare"]
1234
  )
1235
+ with prepare_spinner:
1236
+ gr.HTML(
1237
+ """
1238
+ <div style="
1239
+ max-width: 100%;
1240
+ max-height: 100%;
1241
+ margin: 24px 0 0 12px;
1242
+ padding: 24px 32px;
1243
+ border: 2px solid #A8C1E7;
1244
+ border-radius: 12px;
1245
+ background: #f8faff;
1246
+ box-shadow: 0 2px 8px rgba(100,150,200,0.08);
1247
+ font-size: 18px;
1248
+ ">
1249
+ <b>🚀 User Guide</b><br>
1250
+ <b>Step 1:</b> Upload Your File<br>
1251
+ Supported formats: JPG, PNG, PDF, JPEG<br>
1252
+ <b>Step 2:</b> Click Analyze Document Button<br>
1253
+ System will process automatically<br>
1254
+ <b>Step 3:</b> Wait for Results<br>
1255
+ Results will be displayed after processing<br>
1256
+ <b>Step 4:</b> Download results zip<br>
1257
+ Results zip will be displayed after processing<br>
1258
+ </div>
1259
+ """
1260
+ )
1261
+ download_file = gr.File(visible=False, label="Download File")
1262
+ markdown_display_list = []
1263
+ layout_ordering_images = []
1264
+ layout_det_res_images = []
1265
+ overall_ocr_res_images = []
1266
+ output_json_list = []
1267
+ gallery_list = []
1268
+ with gr.Tabs(visible=False) as tabs:
1269
+ with gr.Tab("Layout Parsing"):
1270
+ with gr.Row():
1271
+ with gr.Column(scale=2, min_width=1):
1272
+ gallery_markdown = gr.Gallery(
1273
+ show_label=False,
1274
+ allow_preview=False,
1275
+ preview=False,
1276
+ columns=1,
1277
+ min_width=10,
1278
+ object_fit="contain",
1279
+ visible=True,
1280
+ )
1281
+ gallery_list.append(gallery_markdown)
1282
+ with gr.Column(scale=10):
1283
+ for i in range(MAX_NUM_PAGES):
1284
+ markdown_display_list.append(
1285
+ gr.Markdown(
1286
+ visible=False,
1287
+ container=True,
1288
+ show_copy_button=True,
1289
+ latex_delimiters=[
1290
+ {
1291
+ "left": "$$",
1292
+ "right": "$$",
1293
+ "display": True,
1294
+ },
1295
+ {
1296
+ "left": "$",
1297
+ "right": "$",
1298
+ "display": False,
1299
+ },
1300
+ ],
1301
+ elem_classes=["image-container"],
1302
+ )
1303
+ )
1304
+ with gr.Tab("Reading Order"):
1305
+ with gr.Row():
1306
+ with gr.Column(scale=2, min_width=1):
1307
+ gallery_layout_order = gr.Gallery(
1308
+ show_label=False,
1309
+ allow_preview=False,
1310
+ preview=False,
1311
+ columns=1,
1312
+ min_width=10,
1313
+ object_fit="contain",
1314
+ )
1315
+ gallery_list.append(gallery_layout_order)
1316
+ with gr.Column(scale=10):
1317
+ for i in range(MAX_NUM_PAGES):
1318
+ layout_ordering_images.append(
1319
+ gr.Image(
1320
+ label=f"Layout Ordering Image {i}",
1321
+ show_label=True,
1322
+ visible=False,
1323
+ container=True,
1324
+ )
1325
+ )
1326
+ with gr.Tab("Layout Region Detection"):
1327
+ with gr.Row():
1328
+ with gr.Column(scale=2, min_width=1):
1329
+ gallery_layout_det = gr.Gallery(
1330
+ show_label=False,
1331
+ allow_preview=False,
1332
+ preview=False,
1333
+ columns=1,
1334
+ min_width=10,
1335
+ object_fit="contain",
1336
+ )
1337
+ gallery_list.append(gallery_layout_det)
1338
+ with gr.Column(scale=10):
1339
+ for i in range(MAX_NUM_PAGES):
1340
+ layout_det_res_images.append(
1341
+ gr.Image(
1342
+ label=f"Layout Detection Image {i}",
1343
+ show_label=True,
1344
+ visible=False,
1345
+ )
1346
+ )
1347
+ with gr.Tab("OCR"):
1348
+ with gr.Row():
1349
+ with gr.Column(scale=2, min_width=1):
1350
+ gallery_ocr_det = gr.Gallery(
1351
+ show_label=False,
1352
+ allow_preview=False,
1353
+ preview=False,
1354
+ columns=1,
1355
+ min_width=10,
1356
+ object_fit="contain",
1357
+ )
1358
+ gallery_list.append(gallery_ocr_det)
1359
+ with gr.Column(scale=10):
1360
+ for i in range(MAX_NUM_PAGES):
1361
+ overall_ocr_res_images.append(
1362
+ gr.Image(
1363
+ label=f"OCR Image {i}",
1364
+ show_label=True,
1365
+ visible=False,
1366
+ )
1367
+ )
1368
+ with gr.Tab("JSON"):
1369
+ with gr.Row():
1370
+ with gr.Column(scale=2, min_width=1):
1371
+ gallery_json = gr.Gallery(
1372
+ show_label=False,
1373
+ allow_preview=False,
1374
+ preview=False,
1375
+ columns=1,
1376
+ min_width=10,
1377
+ object_fit="contain",
1378
+ )
1379
+ gallery_list.append(gallery_json)
1380
+ with gr.Column(scale=10):
1381
+ gr.HTML(
1382
+ """
1383
+ <style>
1384
+ .line.svelte-19ir0ev svg {
1385
+ width: 30px !important;
1386
+ height: 30px !important;
1387
+ min-width: 30px !important;
1388
+ min-height: 30px !important;
1389
+ padding: 0 !important;
1390
+ font-size: 18px !important;
1391
+ }
1392
+ .line.svelte-19ir0ev span:contains('Object(') {
1393
+ font-size: 12px;
1394
+ }
1395
+ </style>
1396
+ """
1397
+ )
1398
+ output_json_list.append(
1399
+ gr.JSON(
1400
+ visible=False,
1401
+ )
1402
+ )
1403
+ # # Navigation bar
1404
+ with gr.Column(elem_classes=["nav-bar"]):
1405
+ gr.HTML(
1406
+ """
1407
+ <div class="nav-links">
1408
+ <a href="https://github.com/PaddlePaddle/PaddleOCR" class="nav-link" target="_blank">GitHub</a>
1409
+ </div>
1410
+ """
1411
+ )
1412
+
1413
+ section_choice.change(
1414
+ fn=toggle_sections,
1415
+ inputs=section_choice,
1416
+ outputs=[
1417
+ Module_Options,
1418
+ Subpipeline_Options,
1419
+ Layout_region_detection_Options,
1420
+ Text_detection_Options,
1421
+ Seal_text_recognition_Options,
1422
+ Table_recognition_Options,
1423
+ ],
1424
+ )
1425
+ pdf_btn.click(
1426
+ fn=clear_file_selection, inputs=[], outputs=[file_input, file_select]
1427
+ ).then(
1428
+ None,
1429
+ [],
1430
+ [],
1431
+ js="""
1432
+ () => {
1433
+ const fileInput = document.querySelector('input[type="file"]');
1434
+ fileInput.value = '';
1435
+ fileInput.click();
1436
+ }
1437
+ """,
1438
+ )
1439
+ process_btn.click(
1440
+ toggle_spinner, outputs=[loading_spinner, prepare_spinner, download_file, tabs]
1441
+ ).then(
1442
  process_file,
1443
  inputs=[
1444
  file_input,
1445
+ image_input,
1446
+ use_formula_recognition_rd,
1447
+ use_chart_recognition_rd,
1448
+ use_doc_orientation_classify_rd,
1449
+ use_doc_unwarping_rd,
1450
+ use_textline_orientation_rd,
1451
+ use_region_detection_rd,
1452
+ use_seal_recognition_rd,
1453
+ use_table_recognition_rd,
1454
+ layout_threshold_nb,
1455
+ layout_nms_rd,
1456
+ layout_unclip_ratio_nb,
1457
+ text_det_limit_type_rd,
1458
+ text_det_limit_side_len_nb,
1459
+ text_det_thresh_nb,
1460
+ text_det_box_thresh_nb,
1461
+ text_det_unclip_ratio_nb,
1462
+ text_rec_score_thresh_nb,
1463
+ seal_det_limit_type_rd,
1464
+ seal_det_limit_side_len_nb,
1465
+ seal_det_thresh_nb,
1466
+ seal_det_box_thresh_nb,
1467
+ seal_det_unclip_ratio_nb,
1468
+ seal_rec_score_thresh_nb,
1469
+ use_ocr_results_with_table_cells_rd,
1470
+ use_e2e_wired_table_rec_model_rd,
1471
+ use_e2e_wireless_table_rec_model_rd,
1472
+ use_wired_table_cells_trans_to_html_rd,
1473
+ use_wireless_table_cells_trans_to_html_rd,
1474
+ use_table_orientation_classify_rd,
1475
  ],
1476
  outputs=[results_state],
1477
+ ).then(
1478
+ hide_spinner, outputs=[loading_spinner, tabs]
1479
+ ).then(
1480
  update_display,
1481
+ inputs=[results_state, concatenate_pages_rd],
1482
+ outputs=layout_ordering_images
1483
+ + layout_det_res_images
1484
+ + overall_ocr_res_images
1485
+ + output_json_list
1486
+ + markdown_display_list
1487
+ + gallery_list,
1488
+ )
1489
+
1490
+ gallery_markdown.select(
1491
+ update_markdown,
1492
+ inputs=concatenate_pages_rd,
1493
+ outputs=markdown_display_list,
1494
  )
1495
+ gallery_layout_order.select(update_image, outputs=layout_ordering_images)
1496
+ gallery_layout_det.select(update_image, outputs=layout_det_res_images)
1497
+ gallery_ocr_det.select(update_image, outputs=overall_ocr_res_images)
1498
 
1499
  download_all_btn.click(
1500
  export_full_results, inputs=[results_state], outputs=[download_file]
1501
  ).success(lambda: gr.File(visible=True), outputs=[download_file])
1502
 
1503
+ demo.load(
1504
+ fn=lambda: None,
1505
+ inputs=[],
1506
+ outputs=[],
1507
+ js=f"""
1508
+ () => {{
1509
+ const tooltipTexts = {TOOLTIP_RADIO};
1510
+ let tooltip = document.getElementById("custom-tooltip");
1511
+ if (!tooltip) {{
1512
+ tooltip = document.createElement("div");
1513
+ tooltip.id = "custom-tooltip";
1514
+ tooltip.style.position = "fixed";
1515
+ tooltip.style.background = "rgba(0, 0, 0, 0.75)";
1516
+ tooltip.style.color = "white";
1517
+ tooltip.style.padding = "6px 10px";
1518
+ tooltip.style.borderRadius = "4px";
1519
+ tooltip.style.fontSize = "13px";
1520
+ tooltip.style.maxWidth = "300px";
1521
+ tooltip.style.zIndex = "10000";
1522
+ tooltip.style.pointerEvents = "none";
1523
+ tooltip.style.transition = "opacity 0.2s";
1524
+ tooltip.style.opacity = "0";
1525
+ tooltip.style.whiteSpace = "normal";
1526
+ document.body.appendChild(tooltip);
1527
+ }}
1528
+ Object.keys(tooltipTexts).forEach(id => {{
1529
+ const elem = document.getElementById(id);
1530
+ if (!elem) return;
1531
+ function showTooltip(e) {{
1532
+ tooltip.style.opacity = "1";
1533
+ tooltip.innerText = tooltipTexts[id];
1534
+ let x = e.clientX + 10;
1535
+ let y = e.clientY + 10;
1536
+ if (x + tooltip.offsetWidth > window.innerWidth) {{
1537
+ x = e.clientX - tooltip.offsetWidth - 10;
1538
+ }}
1539
+ if (y + tooltip.offsetHeight > window.innerHeight) {{
1540
+ y = e.clientY - tooltip.offsetHeight - 10;
1541
+ }}
1542
+ tooltip.style.left = x + "px";
1543
+ tooltip.style.top = y + "px";
1544
+ }}
1545
+
1546
+ function hideTooltip() {{
1547
+ tooltip.style.opacity = "0";
1548
+ }}
1549
+
1550
+ elem.addEventListener("mousemove", showTooltip);
1551
+ elem.addEventListener("mouseleave", hideTooltip);
1552
+ }});
1553
+ }}
1554
+ """,
1555
+ )
1556
+
1557
 
1558
  if __name__ == "__main__":
1559
+ t = threading.Thread(target=delete_file_periodically)
1560
+ t.start()
1561
  demo.launch(
1562
  server_name="0.0.0.0",
1563
  server_port=7860,
 
1564
  )
examples/Handwritten.jpg ADDED

Git LFS Details

  • SHA256: 4fbd1482b39b103a31b049d0d3a5830b8deb694edf12176116f47d3e7ffd966a
  • Pointer size: 131 Bytes
  • Size of remote file: 465 kB
examples/chemical-equation.jpg ADDED

Git LFS Details

  • SHA256: d0d50911acb3010fb6d7b35c98efd83cf5bd89acebcfe4d5a1d1ff0203be7492
  • Pointer size: 131 Bytes
  • Size of remote file: 225 kB
examples/chinese-formula.jpg ADDED

Git LFS Details

  • SHA256: 28c17e5bf8b8c603667e531016ed087fb7bebc67b0b304db0bd74ceee164aeb5
  • Pointer size: 131 Bytes
  • Size of remote file: 224 kB
examples/complex-formula.jpg ADDED

Git LFS Details

  • SHA256: 0ea2daa1fc3e5e0e3244a3229c93b88c9a5e89130c550cb06e9f80d572613a26
  • Pointer size: 131 Bytes
  • Size of remote file: 221 kB
examples/complex-typeset.jpg ADDED

Git LFS Details

  • SHA256: ee4b25fe50bd0f0c270efff76c50445c9695bb12e96f05b2b971df986f4466b0
  • Pointer size: 131 Bytes
  • Size of remote file: 966 kB
examples/formula-chart.jpg ADDED

Git LFS Details

  • SHA256: 80f629e74f1a9272004b5578bf21b8e99f26859cd2381b1151537b61d1abdb40
  • Pointer size: 131 Bytes
  • Size of remote file: 171 kB
examples/janpan-paper.jpg ADDED

Git LFS Details

  • SHA256: 5bcbeeb9691cf2abf9f3832bf04aef6a3f63709d5f987c364b73c25c8360ed1f
  • Pointer size: 131 Bytes
  • Size of remote file: 655 kB
examples/muti-column.jpg ADDED

Git LFS Details

  • SHA256: 6cafba2ef3d7281c4095f8b429eb7358d89690596a764b2f9b02bf5fca630dbd
  • Pointer size: 131 Bytes
  • Size of remote file: 808 kB
examples/table.jpg ADDED

Git LFS Details

  • SHA256: d424db7d01fa6a842afea3ff1a7a41bf03e9e7089fb931e19ce578ccce42de06
  • Pointer size: 131 Bytes
  • Size of remote file: 900 kB
examples/tradition-chinese.jpg ADDED

Git LFS Details

  • SHA256: 101e1863680032368af0b64bb514ee7a7d401288d2c2b23632932437c933a053
  • Pointer size: 131 Bytes
  • Size of remote file: 393 kB
examples/vertical-text.jpg ADDED

Git LFS Details

  • SHA256: 442f70532f79419288fa1ca99b03400a716691a8a65576290e57f620f1e6e801
  • Pointer size: 131 Bytes
  • Size of remote file: 561 kB
icon/upload.png ADDED

Git LFS Details

  • SHA256: bf7780dd26d21a5f09ef9c0f9c86ba992b2bf53fcd5f5618617cce33afec44b6
  • Pointer size: 130 Bytes
  • Size of remote file: 49.2 kB