Felix92 commited on
Commit
51df59a
·
verified ·
1 Parent(s): 6744844

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +42 -21
  2. requirements.txt +1 -1
app.py CHANGED
@@ -9,7 +9,7 @@ from matplotlib.figure import Figure
9
  from PIL import Image
10
 
11
  from onnxtr.io import DocumentFile
12
- from onnxtr.models import ocr_predictor
13
  from onnxtr.models.predictor import OCRPredictor
14
  from onnxtr.utils.visualization import visualize_page
15
 
@@ -35,12 +35,17 @@ RECO_ARCHS: List[str] = [
35
  "parseq",
36
  ]
37
 
 
 
 
 
38
 
39
  def load_predictor(
40
  det_arch: str,
41
  reco_arch: str,
42
  assume_straight_pages: bool,
43
  straighten_pages: bool,
 
44
  detect_language: bool,
45
  load_in_8_bit: bool,
46
  bin_thresh: float,
@@ -58,6 +63,7 @@ def load_predictor(
58
  disable_crop_orientation: whether to disable crop orientation or not
59
  disable_page_orientation: whether to disable page orientation or not
60
  straighten_pages: whether to straighten rotated pages or not
 
61
  detect_language: whether to detect the language of the text
62
  load_in_8_bit: whether to load the image in 8 bit mode
63
  bin_thresh: binarization threshold for the segmentation map
@@ -68,13 +74,13 @@ def load_predictor(
68
  instance of OCRPredictor
69
  """
70
  predictor = ocr_predictor(
71
- det_arch,
72
- reco_arch,
73
  assume_straight_pages=assume_straight_pages,
74
  straighten_pages=straighten_pages,
75
  detect_language=detect_language,
76
  load_in_8_bit=load_in_8_bit,
77
- export_as_straight_boxes=straighten_pages,
78
  detect_orientation=not assume_straight_pages,
79
  disable_crop_orientation=disable_crop_orientation,
80
  disable_page_orientation=disable_page_orientation,
@@ -132,6 +138,7 @@ def analyze_page(
132
  disable_crop_orientation: bool,
133
  disable_page_orientation: bool,
134
  straighten_pages: bool,
 
135
  detect_language: bool,
136
  load_in_8_bit: bool,
137
  bin_thresh: float,
@@ -149,6 +156,7 @@ def analyze_page(
149
  disable_crop_orientation: whether to disable crop orientation or not
150
  disable_page_orientation: whether to disable page orientation or not
151
  straighten_pages: whether to straighten rotated pages or not
 
152
  detect_language: whether to detect the language of the text
153
  load_in_8_bit: whether to load the image in 8 bit mode
154
  bin_thresh: binarization threshold for the segmentation map
@@ -156,7 +164,7 @@ def analyze_page(
156
 
157
  Returns:
158
  -------
159
- input image, segmentation heatmap, output image, OCR output
160
  """
161
  if uploaded_file is None:
162
  return None, "Please upload a document", None, None, None
@@ -165,19 +173,23 @@ def analyze_page(
165
  doc = DocumentFile.from_pdf(uploaded_file)
166
  else:
167
  doc = DocumentFile.from_images(uploaded_file)
 
 
 
 
168
 
169
- page = doc[page_idx - 1]
170
  img = page
171
 
172
  predictor = load_predictor(
173
- det_arch,
174
- reco_arch,
175
- assume_straight_pages,
176
- straighten_pages,
177
- detect_language,
178
- load_in_8_bit,
179
- bin_thresh,
180
- box_thresh,
 
181
  disable_crop_orientation=disable_crop_orientation,
182
  disable_page_orientation=disable_page_orientation,
183
  )
@@ -194,7 +206,12 @@ def analyze_page(
194
 
195
  out_img = matplotlib_to_pil(fig)
196
 
197
- return img, seg_heatmap, out_img, page_export
 
 
 
 
 
198
 
199
 
200
  with gr.Blocks(fill_height=True) as demo:
@@ -226,11 +243,14 @@ with gr.Blocks(fill_height=True) as demo:
226
  upload = gr.File(label="Upload File [JPG | PNG | PDF]", file_types=["pdf", "jpg", "png"])
227
  page_selection = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Page selection")
228
  det_model = gr.Dropdown(choices=DET_ARCHS, value=DET_ARCHS[0], label="Text detection model")
229
- reco_model = gr.Dropdown(choices=RECO_ARCHS, value=RECO_ARCHS[0], label="Text recognition model")
 
 
230
  assume_straight = gr.Checkbox(value=True, label="Assume straight pages")
231
  disable_crop_orientation = gr.Checkbox(value=False, label="Disable crop orientation")
232
  disable_page_orientation = gr.Checkbox(value=False, label="Disable page orientation")
233
  straighten = gr.Checkbox(value=False, label="Straighten pages")
 
234
  det_language = gr.Checkbox(value=False, label="Detect language")
235
  load_in_8_bit = gr.Checkbox(value=False, label="Load 8-bit quantized models")
236
  binarization_threshold = gr.Slider(
@@ -243,11 +263,11 @@ with gr.Blocks(fill_height=True) as demo:
243
  input_image = gr.Image(label="Input page", width=600)
244
  segmentation_heatmap = gr.Image(label="Segmentation heatmap", width=600)
245
  output_image = gr.Image(label="Output page", width=600)
246
- with gr.Column(scale=2):
247
- with gr.Row():
248
- gr.Markdown("### OCR output")
249
- with gr.Row():
250
  ocr_output = gr.JSON(label="OCR output", render=True, scale=1)
 
 
251
 
252
  analyze_button.click(
253
  analyze_page,
@@ -260,12 +280,13 @@ with gr.Blocks(fill_height=True) as demo:
260
  disable_crop_orientation,
261
  disable_page_orientation,
262
  straighten,
 
263
  det_language,
264
  load_in_8_bit,
265
  binarization_threshold,
266
  box_threshold,
267
  ],
268
- outputs=[input_image, segmentation_heatmap, output_image, ocr_output],
269
  )
270
 
271
  demo.launch(inbrowser=True, allowed_paths=["./data/logo.jpg"])
 
9
  from PIL import Image
10
 
11
  from onnxtr.io import DocumentFile
12
+ from onnxtr.models import from_hub, ocr_predictor
13
  from onnxtr.models.predictor import OCRPredictor
14
  from onnxtr.utils.visualization import visualize_page
15
 
 
35
  "parseq",
36
  ]
37
 
38
+ CUSTOM_RECO_ARCHS: List[str] = [
39
+ "Felix92/onnxtr-parseq-multilingual-v1",
40
+ ]
41
+
42
 
43
  def load_predictor(
44
  det_arch: str,
45
  reco_arch: str,
46
  assume_straight_pages: bool,
47
  straighten_pages: bool,
48
+ export_as_straight_boxes: bool,
49
  detect_language: bool,
50
  load_in_8_bit: bool,
51
  bin_thresh: float,
 
63
  disable_crop_orientation: whether to disable crop orientation or not
64
  disable_page_orientation: whether to disable page orientation or not
65
  straighten_pages: whether to straighten rotated pages or not
66
+ export_as_straight_boxes: whether to export straight boxes
67
  detect_language: whether to detect the language of the text
68
  load_in_8_bit: whether to load the image in 8 bit mode
69
  bin_thresh: binarization threshold for the segmentation map
 
74
  instance of OCRPredictor
75
  """
76
  predictor = ocr_predictor(
77
+ det_arch=det_arch,
78
+ reco_arch=reco_arch if reco_arch not in CUSTOM_RECO_ARCHS else from_hub(reco_arch),
79
  assume_straight_pages=assume_straight_pages,
80
  straighten_pages=straighten_pages,
81
  detect_language=detect_language,
82
  load_in_8_bit=load_in_8_bit,
83
+ export_as_straight_boxes=export_as_straight_boxes,
84
  detect_orientation=not assume_straight_pages,
85
  disable_crop_orientation=disable_crop_orientation,
86
  disable_page_orientation=disable_page_orientation,
 
138
  disable_crop_orientation: bool,
139
  disable_page_orientation: bool,
140
  straighten_pages: bool,
141
+ export_as_straight_boxes: bool,
142
  detect_language: bool,
143
  load_in_8_bit: bool,
144
  bin_thresh: float,
 
156
  disable_crop_orientation: whether to disable crop orientation or not
157
  disable_page_orientation: whether to disable page orientation or not
158
  straighten_pages: whether to straighten rotated pages or not
159
+ export_as_straight_boxes: whether to export straight boxes
160
  detect_language: whether to detect the language of the text
161
  load_in_8_bit: whether to load the image in 8 bit mode
162
  bin_thresh: binarization threshold for the segmentation map
 
164
 
165
  Returns:
166
  -------
167
+ input image, segmentation heatmap, output image, OCR output, synthesized page
168
  """
169
  if uploaded_file is None:
170
  return None, "Please upload a document", None, None, None
 
173
  doc = DocumentFile.from_pdf(uploaded_file)
174
  else:
175
  doc = DocumentFile.from_images(uploaded_file)
176
+ try:
177
+ page = doc[page_idx - 1]
178
+ except IndexError:
179
+ page = doc[-1]
180
 
 
181
  img = page
182
 
183
  predictor = load_predictor(
184
+ det_arch=det_arch,
185
+ reco_arch=reco_arch,
186
+ assume_straight_pages=assume_straight_pages,
187
+ straighten_pages=straighten_pages,
188
+ export_as_straight_boxes=export_as_straight_boxes,
189
+ detect_language=detect_language,
190
+ load_in_8_bit=load_in_8_bit,
191
+ bin_thresh=bin_thresh,
192
+ box_thresh=box_thresh,
193
  disable_crop_orientation=disable_crop_orientation,
194
  disable_page_orientation=disable_page_orientation,
195
  )
 
206
 
207
  out_img = matplotlib_to_pil(fig)
208
 
209
+ if assume_straight_pages or straighten_pages:
210
+ synthesized_page = out.synthesize()[0]
211
+ else:
212
+ synthesized_page = None
213
+
214
+ return img, seg_heatmap, out_img, page_export, synthesized_page
215
 
216
 
217
  with gr.Blocks(fill_height=True) as demo:
 
243
  upload = gr.File(label="Upload File [JPG | PNG | PDF]", file_types=["pdf", "jpg", "png"])
244
  page_selection = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Page selection")
245
  det_model = gr.Dropdown(choices=DET_ARCHS, value=DET_ARCHS[0], label="Text detection model")
246
+ reco_model = gr.Dropdown(
247
+ choices=RECO_ARCHS + CUSTOM_RECO_ARCHS, value=RECO_ARCHS[0], label="Text recognition model"
248
+ )
249
  assume_straight = gr.Checkbox(value=True, label="Assume straight pages")
250
  disable_crop_orientation = gr.Checkbox(value=False, label="Disable crop orientation")
251
  disable_page_orientation = gr.Checkbox(value=False, label="Disable page orientation")
252
  straighten = gr.Checkbox(value=False, label="Straighten pages")
253
+ export_as_straight_boxes = gr.Checkbox(value=False, label="Export as straight boxes")
254
  det_language = gr.Checkbox(value=False, label="Detect language")
255
  load_in_8_bit = gr.Checkbox(value=False, label="Load 8-bit quantized models")
256
  binarization_threshold = gr.Slider(
 
263
  input_image = gr.Image(label="Input page", width=600)
264
  segmentation_heatmap = gr.Image(label="Segmentation heatmap", width=600)
265
  output_image = gr.Image(label="Output page", width=600)
266
+ with gr.Row():
267
+ with gr.Column(scale=3):
 
 
268
  ocr_output = gr.JSON(label="OCR output", render=True, scale=1)
269
+ with gr.Column(scale=3):
270
+ synthesized_page = gr.Image(label="Synthesized page", width=600)
271
 
272
  analyze_button.click(
273
  analyze_page,
 
280
  disable_crop_orientation,
281
  disable_page_orientation,
282
  straighten,
283
+ export_as_straight_boxes,
284
  det_language,
285
  load_in_8_bit,
286
  binarization_threshold,
287
  box_threshold,
288
  ],
289
+ outputs=[input_image, segmentation_heatmap, output_image, ocr_output, synthesized_page],
290
  )
291
 
292
  demo.launch(inbrowser=True, allowed_paths=["./data/logo.jpg"])
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
  -e git+https://github.com/felixdittrich92/OnnxTR.git#egg=onnxtr[cpu-headless,viz]
2
- gradio>=4.37.1,<6.0.0
 
1
  -e git+https://github.com/felixdittrich92/OnnxTR.git#egg=onnxtr[cpu-headless,viz]
2
+ gradio>=4.37.1,<5.0.0