xiaoyao9184 commited on
Commit
08dc990
·
verified ·
1 Parent(s): 9a6c924

Synced repo using 'sync_with_huggingface' Github Action

Browse files
Files changed (1) hide show
  1. gradio_app.py +72 -9
gradio_app.py CHANGED
@@ -17,6 +17,7 @@ import base64
17
  import io
18
  import re
19
  from typing import Any, Dict
 
20
 
21
  import pypdfium2
22
  from PIL import Image
@@ -25,7 +26,20 @@ from marker.converters.pdf import PdfConverter
25
  from marker.models import create_model_dict
26
  from marker.config.parser import ConfigParser
27
  from marker.output import text_from_rendered
28
-
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  def load_models():
31
  return create_model_dict()
@@ -83,6 +97,8 @@ def markdown_insert_images(markdown, images):
83
  if 'model_dict' not in globals():
84
  model_dict = load_models()
85
 
 
 
86
  with gr.Blocks(title="Marker") as demo:
87
  gr.Markdown("""
88
  # Marker Demo
@@ -96,12 +112,15 @@ with gr.Blocks(title="Marker") as demo:
96
  with gr.Column():
97
  in_file = gr.File(label="PDF file:", file_types=[".pdf"])
98
  in_num = gr.Slider(label="PDF file page number", minimum=1, maximum=1, value=1, step=1, visible=False)
99
- in_img = gr.Image(label="PDF file (preview)", type="pil", sources=None, visible=False)
 
 
100
 
101
  page_range_txt = gr.Textbox(label="Page range to parse, comma separated like 0,5-10,20", value=f"")
102
  output_format_dd = gr.Dropdown(label="Output format", choices=["markdown", "json", "html"], value="markdown")
103
 
104
  force_ocr_ckb = gr.Checkbox(label="Force OCR", value=True, info="Force OCR on all pages")
 
105
  debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
106
  use_llm_ckb = gr.Checkbox(label="Use LLM", value=False, info="Use LLM for higher quality processing")
107
  strip_existing_ocr_ckb = gr.Checkbox(label="Strip existing OCR", value=False, info="Strip existing OCR text from the PDF and re-OCR.")
@@ -113,6 +132,7 @@ with gr.Blocks(title="Marker") as demo:
113
  debug_img_pdf = gr.Image(label="PDF debug image", visible=False)
114
  debug_img_layout = gr.Image(label="Layout debug image", visible=False)
115
 
 
116
  def show_image(file, num=1):
117
  if file is None:
118
  return [
@@ -123,7 +143,7 @@ with gr.Blocks(title="Marker") as demo:
123
  img = get_page_image(file, num)
124
  return [
125
  gr.update(visible=True, maximum=count),
126
- gr.update(visible=True, value=img),
127
  f"0-{num-1}"]
128
 
129
  in_file.clear(
@@ -163,8 +183,15 @@ with gr.Blocks(title="Marker") as demo:
163
  api_name=False
164
  )
165
 
 
 
 
 
 
 
 
166
  # Run Marker
167
- def run_marker_img(filename, page_range, force_ocr, output_format, debug, use_llm, strip_existing_ocr):
168
  """
169
  Run marker on the given PDF file and return processed results in multiple formats.
170
 
@@ -174,6 +201,8 @@ with gr.Blocks(title="Marker") as demo:
174
  force_ocr (bool, optional): If True (default), force OCR even on text-based PDFs.
175
  output_format (str, optional): Output format. One of: "markdown", "html", "json".
176
  Defaults to "markdown".
 
 
177
  debug (bool, optional): If True, return additional debug images (rendered page and layout).
178
  Defaults to False.
179
  use_llm (bool, optional): If True, use LLM-assisted parsing for better semantic output.
@@ -188,6 +217,7 @@ with gr.Blocks(title="Marker") as demo:
188
  - html_result (str): HTML output string.
189
  - page_image (dict or None): Rendered image of PDF page (if debug is True, else None).
190
  - layout_image (dict or None): Visualized layout image (if debug is True, else None).
 
191
  """
192
  cli_options = {
193
  "output_format": output_format,
@@ -218,6 +248,8 @@ with gr.Blocks(title="Marker") as demo:
218
  img = Image.open(layout_image_path)
219
  gr_debug_lay = gr.update(visible=True, value=img)
220
 
 
 
221
  text, ext, images = text_from_rendered(rendered)
222
  if output_format == "markdown":
223
  text = markdown_insert_images(text, images)
@@ -226,15 +258,45 @@ with gr.Blocks(title="Marker") as demo:
226
  gr.update(visible=False),
227
  gr.update(visible=False),
228
  gr_debug_pdf,
229
- gr_debug_lay
 
230
  ]
231
  elif output_format == "json":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  return [
233
  gr.update(visible=False),
234
  gr.update(visible=True, value=text),
235
  gr.update(visible=False),
236
  gr_debug_pdf,
237
- gr_debug_lay
 
238
  ]
239
  elif output_format == "html":
240
  return [
@@ -242,13 +304,14 @@ with gr.Blocks(title="Marker") as demo:
242
  gr.update(visible=False),
243
  gr.update(visible=True, value=text),
244
  gr_debug_pdf,
245
- gr_debug_lay
 
246
  ]
247
 
248
  run_marker_btn.click(
249
  fn=run_marker_img,
250
- inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, debug_ckb, use_llm_ckb, strip_existing_ocr_ckb],
251
- outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout]
252
  )
253
 
254
  if __name__ == "__main__":
 
17
  import io
18
  import re
19
  from typing import Any, Dict
20
+ import json
21
 
22
  import pypdfium2
23
  from PIL import Image
 
26
  from marker.models import create_model_dict
27
  from marker.config.parser import ConfigParser
28
  from marker.output import text_from_rendered
29
+ from marker.schema import BlockTypes
30
+
31
+ COLORS = [
32
+ "#4e79a7",
33
+ "#f28e2c",
34
+ "#e15759",
35
+ "#76b7b2",
36
+ "#59a14f",
37
+ "#edc949",
38
+ "#af7aa1",
39
+ "#ff9da7",
40
+ "#9c755f",
41
+ "#bab0ab"
42
+ ]
43
 
44
  def load_models():
45
  return create_model_dict()
 
97
  if 'model_dict' not in globals():
98
  model_dict = load_models()
99
 
100
+ img_state = gr.State([])
101
+
102
  with gr.Blocks(title="Marker") as demo:
103
  gr.Markdown("""
104
  # Marker Demo
 
112
  with gr.Column():
113
  in_file = gr.File(label="PDF file:", file_types=[".pdf"])
114
  in_num = gr.Slider(label="PDF file page number", minimum=1, maximum=1, value=1, step=1, visible=False)
115
+ in_img = gr.AnnotatedImage(
116
+ label="PDF file (preview)", visible=False
117
+ )
118
 
119
  page_range_txt = gr.Textbox(label="Page range to parse, comma separated like 0,5-10,20", value=f"")
120
  output_format_dd = gr.Dropdown(label="Output format", choices=["markdown", "json", "html"], value="markdown")
121
 
122
  force_ocr_ckb = gr.Checkbox(label="Force OCR", value=True, info="Force OCR on all pages")
123
+ show_blocks_ckb = gr.Checkbox(label="Show Blocks", info="Display detected blocks, only when output is JSON", value=False, interactive=False)
124
  debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
125
  use_llm_ckb = gr.Checkbox(label="Use LLM", value=False, info="Use LLM for higher quality processing")
126
  strip_existing_ocr_ckb = gr.Checkbox(label="Strip existing OCR", value=False, info="Strip existing OCR text from the PDF and re-OCR.")
 
132
  debug_img_pdf = gr.Image(label="PDF debug image", visible=False)
133
  debug_img_layout = gr.Image(label="Layout debug image", visible=False)
134
 
135
+
136
  def show_image(file, num=1):
137
  if file is None:
138
  return [
 
143
  img = get_page_image(file, num)
144
  return [
145
  gr.update(visible=True, maximum=count),
146
+ gr.update(visible=True, value=(img, [])),
147
  f"0-{num-1}"]
148
 
149
  in_file.clear(
 
183
  api_name=False
184
  )
185
 
186
+ output_format_dd.change(
187
+ fn=lambda x: gr.update(interactive=x == "json", value=x == "json"),
188
+ inputs=[output_format_dd],
189
+ outputs=[show_blocks_ckb],
190
+ api_name=False
191
+ )
192
+
193
  # Run Marker
194
+ def run_marker_img(filename, page_range, force_ocr, output_format, show_blocks, debug, use_llm, strip_existing_ocr):
195
  """
196
  Run marker on the given PDF file and return processed results in multiple formats.
197
 
 
201
  force_ocr (bool, optional): If True (default), force OCR even on text-based PDFs.
202
  output_format (str, optional): Output format. One of: "markdown", "html", "json".
203
  Defaults to "markdown".
204
+ show_blocks (bool, optional): If True, show blocks in preview image with JSON output.
205
+ Defaults to False.
206
  debug (bool, optional): If True, return additional debug images (rendered page and layout).
207
  Defaults to False.
208
  use_llm (bool, optional): If True, use LLM-assisted parsing for better semantic output.
 
217
  - html_result (str): HTML output string.
218
  - page_image (dict or None): Rendered image of PDF page (if debug is True, else None).
219
  - layout_image (dict or None): Visualized layout image (if debug is True, else None).
220
+ - preview_image (dict or None): Preview image.
221
  """
222
  cli_options = {
223
  "output_format": output_format,
 
248
  img = Image.open(layout_image_path)
249
  gr_debug_lay = gr.update(visible=True, value=img)
250
 
251
+ gr_img = gr.update()
252
+
253
  text, ext, images = text_from_rendered(rendered)
254
  if output_format == "markdown":
255
  text = markdown_insert_images(text, images)
 
258
  gr.update(visible=False),
259
  gr.update(visible=False),
260
  gr_debug_pdf,
261
+ gr_debug_lay,
262
+ gr_img
263
  ]
264
  elif output_format == "json":
265
+ if show_blocks:
266
+ doc_json = json.loads(text)
267
+ color_map = {}
268
+ sections = []
269
+ def traverse(block):
270
+ if "block_type" in block:
271
+ try:
272
+ index = list(BlockTypes.__members__).index(block["block_type"])
273
+ color = COLORS[index % len(COLORS)]
274
+ except (ValueError, IndexError):
275
+ color = "#cccccc" # fallback color
276
+
277
+ label = block["id"].replace("/page/0/", "")
278
+ color_map[label] = color
279
+
280
+ bbox = tuple(int(x) for x in block["bbox"])
281
+ sections.append((bbox, label))
282
+ if "children" in block and isinstance(block["children"], list):
283
+ for child in block["children"]:
284
+ traverse(child)
285
+ traverse(doc_json["children"][0])
286
+
287
+ page_range = config_parser.generate_config_dict()["page_range"]
288
+ first_page = page_range[0] if page_range else 0
289
+ img = get_page_image(filename, first_page + 1, dpi=72)
290
+
291
+ gr_img = gr.update(value=(img, sections), color_map=color_map)
292
+
293
  return [
294
  gr.update(visible=False),
295
  gr.update(visible=True, value=text),
296
  gr.update(visible=False),
297
  gr_debug_pdf,
298
+ gr_debug_lay,
299
+ gr_img
300
  ]
301
  elif output_format == "html":
302
  return [
 
304
  gr.update(visible=False),
305
  gr.update(visible=True, value=text),
306
  gr_debug_pdf,
307
+ gr_debug_lay,
308
+ gr_img
309
  ]
310
 
311
  run_marker_btn.click(
312
  fn=run_marker_img,
313
+ inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, show_blocks_ckb, debug_ckb, use_llm_ckb, strip_existing_ocr_ckb],
314
+ outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout, in_img]
315
  )
316
 
317
  if __name__ == "__main__":