Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,60 +1,64 @@
|
|
1 |
import gradio as gr
|
2 |
import spaces
|
3 |
from transformers import AutoModel, AutoTokenizer
|
|
|
|
|
4 |
|
5 |
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
|
6 |
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True)
|
7 |
model = model.eval().cuda()
|
8 |
|
9 |
@spaces.GPU
|
10 |
-
def
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
elif
|
22 |
-
res = model.chat(tokenizer, image, ocr_type='
|
|
|
|
|
|
|
|
|
23 |
with open('./demo.html', 'r') as f:
|
24 |
-
|
25 |
-
return res,
|
26 |
-
|
27 |
return res, None
|
28 |
|
29 |
-
def
|
30 |
-
if
|
31 |
-
return [gr.update(visible=False)] * 4
|
32 |
-
elif task == "Fine-grained OCR (Box)":
|
33 |
return [
|
34 |
-
gr.update(visible=True, choices=["ocr", "format"]),
|
35 |
gr.update(visible=True),
|
36 |
gr.update(visible=False),
|
37 |
-
gr.update(visible=False)
|
38 |
]
|
39 |
-
|
40 |
return [
|
41 |
-
gr.update(visible=True, choices=["ocr", "format"]),
|
42 |
gr.update(visible=False),
|
43 |
-
gr.update(visible=
|
44 |
-
gr.update(visible=False)
|
45 |
]
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
with gr.Blocks() as demo:
|
56 |
-
gr.Markdown()
|
57 |
-
gr.Markdown()
|
58 |
gr.Markdown("""
|
59 |
# "General OCR Theory: Towards OCR-2.0 via a Unified End-to-end Model"
|
60 |
|
@@ -62,7 +66,7 @@ with gr.Blocks() as demo:
|
|
62 |
|
63 |
### Repo
|
64 |
- **Hugging Face**: [ucaslcl/GOT-OCR2_0](https://huggingface.co/ucaslcl/GOT-OCR2_0)
|
65 |
-
- **GitHub**: [
|
66 |
- **Paper**: [AriXiv](https://arxiv.org/abs/2409.01704)
|
67 |
""")
|
68 |
|
@@ -71,52 +75,64 @@ with gr.Blocks() as demo:
|
|
71 |
image_input = gr.Image(type="filepath", label="upload your image")
|
72 |
task_dropdown = gr.Dropdown(
|
73 |
choices=[
|
74 |
-
"plain
|
75 |
-
"format
|
76 |
-
"
|
77 |
-
"
|
78 |
-
"
|
79 |
-
"
|
80 |
],
|
81 |
-
label="
|
82 |
-
value="
|
83 |
)
|
84 |
-
|
85 |
-
choices=["
|
86 |
-
label="
|
87 |
visible=False
|
88 |
)
|
89 |
-
|
90 |
-
label="OCR Box (x1,y1,x2,y2)",
|
91 |
-
placeholder="e.g., 100,100,200,200",
|
92 |
-
visible=False
|
93 |
-
)
|
94 |
-
ocr_color_dropdown = gr.Dropdown(
|
95 |
choices=["red", "green", "blue"],
|
96 |
-
label="
|
97 |
visible=False
|
98 |
)
|
99 |
-
|
100 |
-
label="
|
|
|
101 |
visible=False
|
102 |
)
|
103 |
-
submit_button = gr.Button("
|
104 |
|
105 |
with gr.Column():
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
-
|
110 |
task_dropdown.change(
|
111 |
-
|
112 |
inputs=[task_dropdown],
|
113 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
114 |
)
|
115 |
|
116 |
submit_button.click(
|
117 |
-
|
118 |
-
inputs=[image_input, task_dropdown,
|
119 |
-
outputs=[
|
120 |
)
|
121 |
|
122 |
-
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import spaces
|
3 |
from transformers import AutoModel, AutoTokenizer
|
4 |
+
from PIL import Image
|
5 |
+
import numpy as np
|
6 |
|
7 |
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
|
8 |
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True)
|
9 |
model = model.eval().cuda()
|
10 |
|
11 |
@spaces.GPU
|
12 |
+
def run_GOT(image_array, got_mode, ocr_box="", ocr_color=""):
|
13 |
+
image = Image.fromarray(np.uint8(image_array))
|
14 |
+
if got_mode == "plain texts OCR":
|
15 |
+
res = model.chat(tokenizer, image, ocr_type='ocr', gradio_input=True)
|
16 |
+
elif got_mode == "format texts OCR":
|
17 |
+
res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file='./demo.html', gradio_input=True)
|
18 |
+
elif got_mode == "plain multi-crop OCR":
|
19 |
+
res = model.chat_crop(tokenizer, image, ocr_type='ocr', gradio_input=True)
|
20 |
+
elif got_mode == "format multi-crop OCR":
|
21 |
+
res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file='./demo.html', gradio_input=True)
|
22 |
+
|
23 |
+
elif got_mode == "plain fine-grained OCR":
|
24 |
+
res = model.chat(tokenizer, image, ocr_type='ocr', ocr_box=ocr_box, ocr_color=ocr_color, gradio_input=True)
|
25 |
+
elif got_mode == "format fine-grained OCR":
|
26 |
+
res = model.chat(tokenizer, image, ocr_type='format', ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file='./demo.html', gradio_input=True)
|
27 |
+
|
28 |
+
if "format" in got_mode:
|
29 |
with open('./demo.html', 'r') as f:
|
30 |
+
demo_html = f.read()
|
31 |
+
return res, demo_html
|
|
|
32 |
return res, None
|
33 |
|
34 |
+
def task_update(task):
|
35 |
+
if "fine-grained" in task:
|
|
|
|
|
36 |
return [
|
|
|
37 |
gr.update(visible=True),
|
38 |
gr.update(visible=False),
|
39 |
+
gr.update(visible=False),
|
40 |
]
|
41 |
+
else:
|
42 |
return [
|
|
|
43 |
gr.update(visible=False),
|
44 |
+
gr.update(visible=False),
|
45 |
+
gr.update(visible=False),
|
46 |
]
|
47 |
+
|
48 |
+
def fine_grained_update(task):
|
49 |
+
if task == "box":
|
50 |
+
return [
|
51 |
+
gr.update(visible=False, value = ""),
|
52 |
+
gr.update(visible=True),
|
53 |
+
]
|
54 |
+
elif task == 'color':
|
55 |
+
return [
|
56 |
+
gr.update(visible=True),
|
57 |
+
gr.update(visible=False, value = ""),
|
58 |
+
]
|
59 |
+
|
60 |
|
61 |
with gr.Blocks() as demo:
|
|
|
|
|
62 |
gr.Markdown("""
|
63 |
# "General OCR Theory: Towards OCR-2.0 via a Unified End-to-end Model"
|
64 |
|
|
|
66 |
|
67 |
### Repo
|
68 |
- **Hugging Face**: [ucaslcl/GOT-OCR2_0](https://huggingface.co/ucaslcl/GOT-OCR2_0)
|
69 |
+
- **GitHub**: [Ucas-HaoranWei/GOT-OCR2_0](https://github.com/Ucas-HaoranWei/GOT-OCR2.0/)
|
70 |
- **Paper**: [AriXiv](https://arxiv.org/abs/2409.01704)
|
71 |
""")
|
72 |
|
|
|
75 |
image_input = gr.Image(type="filepath", label="upload your image")
|
76 |
task_dropdown = gr.Dropdown(
|
77 |
choices=[
|
78 |
+
"plain texts OCR",
|
79 |
+
"format texts OCR",
|
80 |
+
"plain multi-crop OCR",
|
81 |
+
"format multi-crop OCR",
|
82 |
+
"plain fine-grained OCR",
|
83 |
+
"format fine-grained OCR",
|
84 |
],
|
85 |
+
label="Choose one mode of GOT",
|
86 |
+
value="plain texts OCR"
|
87 |
)
|
88 |
+
fine_grained_dropdown = gr.Dropdown(
|
89 |
+
choices=["box", "color"],
|
90 |
+
label="fine-grained type",
|
91 |
visible=False
|
92 |
)
|
93 |
+
color_dropdown = gr.Dropdown(
|
|
|
|
|
|
|
|
|
|
|
94 |
choices=["red", "green", "blue"],
|
95 |
+
label="color list",
|
96 |
visible=False
|
97 |
)
|
98 |
+
box_input = gr.Textbox(
|
99 |
+
label="input box: [x1,y1,x2,y2]",
|
100 |
+
placeholder="e.g., [0,0,100,100]",
|
101 |
visible=False
|
102 |
)
|
103 |
+
submit_button = gr.Button("Submit")
|
104 |
|
105 |
with gr.Column():
|
106 |
+
ocr_result = gr.Textbox(label="GOT output")
|
107 |
+
html_result = gr.HTML(label="rendered html")
|
108 |
+
|
109 |
+
gr.Examples(
|
110 |
+
examples=[
|
111 |
+
["assets/coco.jpg", "plain texts OCR", "", ""],
|
112 |
+
["assets/en2.png", "plain texts OCR", "", ""],
|
113 |
+
["assets/eq.jpg", "format texts OCR", "", ""],
|
114 |
+
["assets/table.jpg", "format texts OCR", "", ""],
|
115 |
+
["assets/aff2.png", "plain fine-grained OCR", "[409,763,756,891]", ""],
|
116 |
+
],
|
117 |
+
inputs=[image_input, task_dropdown],
|
118 |
+
label="examples",
|
119 |
+
)
|
120 |
|
|
|
121 |
task_dropdown.change(
|
122 |
+
task_update,
|
123 |
inputs=[task_dropdown],
|
124 |
+
outputs=[fine_grained_dropdown, color_dropdown, box_input]
|
125 |
+
)
|
126 |
+
fine_grained_dropdown.change(
|
127 |
+
fine_grained_update,
|
128 |
+
inputs=[fine_grained_dropdown],
|
129 |
+
outputs=[color_dropdown, box_input]
|
130 |
)
|
131 |
|
132 |
submit_button.click(
|
133 |
+
run_GOT,
|
134 |
+
inputs=[image_input, task_dropdown, box_input, color_dropdown],
|
135 |
+
outputs=[ocr_result, html_result]
|
136 |
)
|
137 |
|
138 |
+
demo.launch(share=True)
|