Spaces:
Running
on
Zero
Running
on
Zero
Upload folder using huggingface_hub
Browse files- app.py +42 -21
- requirements.txt +1 -1
app.py
CHANGED
@@ -9,7 +9,7 @@ from matplotlib.figure import Figure
|
|
9 |
from PIL import Image
|
10 |
|
11 |
from onnxtr.io import DocumentFile
|
12 |
-
from onnxtr.models import ocr_predictor
|
13 |
from onnxtr.models.predictor import OCRPredictor
|
14 |
from onnxtr.utils.visualization import visualize_page
|
15 |
|
@@ -35,12 +35,17 @@ RECO_ARCHS: List[str] = [
|
|
35 |
"parseq",
|
36 |
]
|
37 |
|
|
|
|
|
|
|
|
|
38 |
|
39 |
def load_predictor(
|
40 |
det_arch: str,
|
41 |
reco_arch: str,
|
42 |
assume_straight_pages: bool,
|
43 |
straighten_pages: bool,
|
|
|
44 |
detect_language: bool,
|
45 |
load_in_8_bit: bool,
|
46 |
bin_thresh: float,
|
@@ -58,6 +63,7 @@ def load_predictor(
|
|
58 |
disable_crop_orientation: whether to disable crop orientation or not
|
59 |
disable_page_orientation: whether to disable page orientation or not
|
60 |
straighten_pages: whether to straighten rotated pages or not
|
|
|
61 |
detect_language: whether to detect the language of the text
|
62 |
load_in_8_bit: whether to load the image in 8 bit mode
|
63 |
bin_thresh: binarization threshold for the segmentation map
|
@@ -68,13 +74,13 @@ def load_predictor(
|
|
68 |
instance of OCRPredictor
|
69 |
"""
|
70 |
predictor = ocr_predictor(
|
71 |
-
det_arch,
|
72 |
-
reco_arch,
|
73 |
assume_straight_pages=assume_straight_pages,
|
74 |
straighten_pages=straighten_pages,
|
75 |
detect_language=detect_language,
|
76 |
load_in_8_bit=load_in_8_bit,
|
77 |
-
export_as_straight_boxes=
|
78 |
detect_orientation=not assume_straight_pages,
|
79 |
disable_crop_orientation=disable_crop_orientation,
|
80 |
disable_page_orientation=disable_page_orientation,
|
@@ -132,6 +138,7 @@ def analyze_page(
|
|
132 |
disable_crop_orientation: bool,
|
133 |
disable_page_orientation: bool,
|
134 |
straighten_pages: bool,
|
|
|
135 |
detect_language: bool,
|
136 |
load_in_8_bit: bool,
|
137 |
bin_thresh: float,
|
@@ -149,6 +156,7 @@ def analyze_page(
|
|
149 |
disable_crop_orientation: whether to disable crop orientation or not
|
150 |
disable_page_orientation: whether to disable page orientation or not
|
151 |
straighten_pages: whether to straighten rotated pages or not
|
|
|
152 |
detect_language: whether to detect the language of the text
|
153 |
load_in_8_bit: whether to load the image in 8 bit mode
|
154 |
bin_thresh: binarization threshold for the segmentation map
|
@@ -156,7 +164,7 @@ def analyze_page(
|
|
156 |
|
157 |
Returns:
|
158 |
-------
|
159 |
-
input image, segmentation heatmap, output image, OCR output
|
160 |
"""
|
161 |
if uploaded_file is None:
|
162 |
return None, "Please upload a document", None, None, None
|
@@ -165,19 +173,23 @@ def analyze_page(
|
|
165 |
doc = DocumentFile.from_pdf(uploaded_file)
|
166 |
else:
|
167 |
doc = DocumentFile.from_images(uploaded_file)
|
|
|
|
|
|
|
|
|
168 |
|
169 |
-
page = doc[page_idx - 1]
|
170 |
img = page
|
171 |
|
172 |
predictor = load_predictor(
|
173 |
-
det_arch,
|
174 |
-
reco_arch,
|
175 |
-
assume_straight_pages,
|
176 |
-
straighten_pages,
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
181 |
disable_crop_orientation=disable_crop_orientation,
|
182 |
disable_page_orientation=disable_page_orientation,
|
183 |
)
|
@@ -194,7 +206,12 @@ def analyze_page(
|
|
194 |
|
195 |
out_img = matplotlib_to_pil(fig)
|
196 |
|
197 |
-
|
|
|
|
|
|
|
|
|
|
|
198 |
|
199 |
|
200 |
with gr.Blocks(fill_height=True) as demo:
|
@@ -226,11 +243,14 @@ with gr.Blocks(fill_height=True) as demo:
|
|
226 |
upload = gr.File(label="Upload File [JPG | PNG | PDF]", file_types=["pdf", "jpg", "png"])
|
227 |
page_selection = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Page selection")
|
228 |
det_model = gr.Dropdown(choices=DET_ARCHS, value=DET_ARCHS[0], label="Text detection model")
|
229 |
-
reco_model = gr.Dropdown(
|
|
|
|
|
230 |
assume_straight = gr.Checkbox(value=True, label="Assume straight pages")
|
231 |
disable_crop_orientation = gr.Checkbox(value=False, label="Disable crop orientation")
|
232 |
disable_page_orientation = gr.Checkbox(value=False, label="Disable page orientation")
|
233 |
straighten = gr.Checkbox(value=False, label="Straighten pages")
|
|
|
234 |
det_language = gr.Checkbox(value=False, label="Detect language")
|
235 |
load_in_8_bit = gr.Checkbox(value=False, label="Load 8-bit quantized models")
|
236 |
binarization_threshold = gr.Slider(
|
@@ -243,11 +263,11 @@ with gr.Blocks(fill_height=True) as demo:
|
|
243 |
input_image = gr.Image(label="Input page", width=600)
|
244 |
segmentation_heatmap = gr.Image(label="Segmentation heatmap", width=600)
|
245 |
output_image = gr.Image(label="Output page", width=600)
|
246 |
-
with gr.
|
247 |
-
with gr.
|
248 |
-
gr.Markdown("### OCR output")
|
249 |
-
with gr.Row():
|
250 |
ocr_output = gr.JSON(label="OCR output", render=True, scale=1)
|
|
|
|
|
251 |
|
252 |
analyze_button.click(
|
253 |
analyze_page,
|
@@ -260,12 +280,13 @@ with gr.Blocks(fill_height=True) as demo:
|
|
260 |
disable_crop_orientation,
|
261 |
disable_page_orientation,
|
262 |
straighten,
|
|
|
263 |
det_language,
|
264 |
load_in_8_bit,
|
265 |
binarization_threshold,
|
266 |
box_threshold,
|
267 |
],
|
268 |
-
outputs=[input_image, segmentation_heatmap, output_image, ocr_output],
|
269 |
)
|
270 |
|
271 |
demo.launch(inbrowser=True, allowed_paths=["./data/logo.jpg"])
|
|
|
9 |
from PIL import Image
|
10 |
|
11 |
from onnxtr.io import DocumentFile
|
12 |
+
from onnxtr.models import from_hub, ocr_predictor
|
13 |
from onnxtr.models.predictor import OCRPredictor
|
14 |
from onnxtr.utils.visualization import visualize_page
|
15 |
|
|
|
35 |
"parseq",
|
36 |
]
|
37 |
|
38 |
+
CUSTOM_RECO_ARCHS: List[str] = [
|
39 |
+
"Felix92/onnxtr-parseq-multilingual-v1",
|
40 |
+
]
|
41 |
+
|
42 |
|
43 |
def load_predictor(
|
44 |
det_arch: str,
|
45 |
reco_arch: str,
|
46 |
assume_straight_pages: bool,
|
47 |
straighten_pages: bool,
|
48 |
+
export_as_straight_boxes: bool,
|
49 |
detect_language: bool,
|
50 |
load_in_8_bit: bool,
|
51 |
bin_thresh: float,
|
|
|
63 |
disable_crop_orientation: whether to disable crop orientation or not
|
64 |
disable_page_orientation: whether to disable page orientation or not
|
65 |
straighten_pages: whether to straighten rotated pages or not
|
66 |
+
export_as_straight_boxes: whether to export straight boxes
|
67 |
detect_language: whether to detect the language of the text
|
68 |
load_in_8_bit: whether to load the image in 8 bit mode
|
69 |
bin_thresh: binarization threshold for the segmentation map
|
|
|
74 |
instance of OCRPredictor
|
75 |
"""
|
76 |
predictor = ocr_predictor(
|
77 |
+
det_arch=det_arch,
|
78 |
+
reco_arch=reco_arch if reco_arch not in CUSTOM_RECO_ARCHS else from_hub(reco_arch),
|
79 |
assume_straight_pages=assume_straight_pages,
|
80 |
straighten_pages=straighten_pages,
|
81 |
detect_language=detect_language,
|
82 |
load_in_8_bit=load_in_8_bit,
|
83 |
+
export_as_straight_boxes=export_as_straight_boxes,
|
84 |
detect_orientation=not assume_straight_pages,
|
85 |
disable_crop_orientation=disable_crop_orientation,
|
86 |
disable_page_orientation=disable_page_orientation,
|
|
|
138 |
disable_crop_orientation: bool,
|
139 |
disable_page_orientation: bool,
|
140 |
straighten_pages: bool,
|
141 |
+
export_as_straight_boxes: bool,
|
142 |
detect_language: bool,
|
143 |
load_in_8_bit: bool,
|
144 |
bin_thresh: float,
|
|
|
156 |
disable_crop_orientation: whether to disable crop orientation or not
|
157 |
disable_page_orientation: whether to disable page orientation or not
|
158 |
straighten_pages: whether to straighten rotated pages or not
|
159 |
+
export_as_straight_boxes: whether to export straight boxes
|
160 |
detect_language: whether to detect the language of the text
|
161 |
load_in_8_bit: whether to load the image in 8 bit mode
|
162 |
bin_thresh: binarization threshold for the segmentation map
|
|
|
164 |
|
165 |
Returns:
|
166 |
-------
|
167 |
+
input image, segmentation heatmap, output image, OCR output, synthesized page
|
168 |
"""
|
169 |
if uploaded_file is None:
|
170 |
return None, "Please upload a document", None, None, None
|
|
|
173 |
doc = DocumentFile.from_pdf(uploaded_file)
|
174 |
else:
|
175 |
doc = DocumentFile.from_images(uploaded_file)
|
176 |
+
try:
|
177 |
+
page = doc[page_idx - 1]
|
178 |
+
except IndexError:
|
179 |
+
page = doc[-1]
|
180 |
|
|
|
181 |
img = page
|
182 |
|
183 |
predictor = load_predictor(
|
184 |
+
det_arch=det_arch,
|
185 |
+
reco_arch=reco_arch,
|
186 |
+
assume_straight_pages=assume_straight_pages,
|
187 |
+
straighten_pages=straighten_pages,
|
188 |
+
export_as_straight_boxes=export_as_straight_boxes,
|
189 |
+
detect_language=detect_language,
|
190 |
+
load_in_8_bit=load_in_8_bit,
|
191 |
+
bin_thresh=bin_thresh,
|
192 |
+
box_thresh=box_thresh,
|
193 |
disable_crop_orientation=disable_crop_orientation,
|
194 |
disable_page_orientation=disable_page_orientation,
|
195 |
)
|
|
|
206 |
|
207 |
out_img = matplotlib_to_pil(fig)
|
208 |
|
209 |
+
if assume_straight_pages or straighten_pages:
|
210 |
+
synthesized_page = out.synthesize()[0]
|
211 |
+
else:
|
212 |
+
synthesized_page = None
|
213 |
+
|
214 |
+
return img, seg_heatmap, out_img, page_export, synthesized_page
|
215 |
|
216 |
|
217 |
with gr.Blocks(fill_height=True) as demo:
|
|
|
243 |
upload = gr.File(label="Upload File [JPG | PNG | PDF]", file_types=["pdf", "jpg", "png"])
|
244 |
page_selection = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Page selection")
|
245 |
det_model = gr.Dropdown(choices=DET_ARCHS, value=DET_ARCHS[0], label="Text detection model")
|
246 |
+
reco_model = gr.Dropdown(
|
247 |
+
choices=RECO_ARCHS + CUSTOM_RECO_ARCHS, value=RECO_ARCHS[0], label="Text recognition model"
|
248 |
+
)
|
249 |
assume_straight = gr.Checkbox(value=True, label="Assume straight pages")
|
250 |
disable_crop_orientation = gr.Checkbox(value=False, label="Disable crop orientation")
|
251 |
disable_page_orientation = gr.Checkbox(value=False, label="Disable page orientation")
|
252 |
straighten = gr.Checkbox(value=False, label="Straighten pages")
|
253 |
+
export_as_straight_boxes = gr.Checkbox(value=False, label="Export as straight boxes")
|
254 |
det_language = gr.Checkbox(value=False, label="Detect language")
|
255 |
load_in_8_bit = gr.Checkbox(value=False, label="Load 8-bit quantized models")
|
256 |
binarization_threshold = gr.Slider(
|
|
|
263 |
input_image = gr.Image(label="Input page", width=600)
|
264 |
segmentation_heatmap = gr.Image(label="Segmentation heatmap", width=600)
|
265 |
output_image = gr.Image(label="Output page", width=600)
|
266 |
+
with gr.Row():
|
267 |
+
with gr.Column(scale=3):
|
|
|
|
|
268 |
ocr_output = gr.JSON(label="OCR output", render=True, scale=1)
|
269 |
+
with gr.Column(scale=3):
|
270 |
+
synthesized_page = gr.Image(label="Synthesized page", width=600)
|
271 |
|
272 |
analyze_button.click(
|
273 |
analyze_page,
|
|
|
280 |
disable_crop_orientation,
|
281 |
disable_page_orientation,
|
282 |
straighten,
|
283 |
+
export_as_straight_boxes,
|
284 |
det_language,
|
285 |
load_in_8_bit,
|
286 |
binarization_threshold,
|
287 |
box_threshold,
|
288 |
],
|
289 |
+
outputs=[input_image, segmentation_heatmap, output_image, ocr_output, synthesized_page],
|
290 |
)
|
291 |
|
292 |
demo.launch(inbrowser=True, allowed_paths=["./data/logo.jpg"])
|
requirements.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-e git+https://github.com/felixdittrich92/OnnxTR.git#egg=onnxtr[cpu-headless,viz]
|
2 |
-
gradio>=4.37.1,<
|
|
|
1 |
-e git+https://github.com/felixdittrich92/OnnxTR.git#egg=onnxtr[cpu-headless,viz]
|
2 |
+
gradio>=4.37.1,<5.0.0
|