File size: 5,019 Bytes
e79d672
8b78611
 
0058c86
 
e79d672
8b78611
 
 
e79d672
8b78611
0058c86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b78611
0058c86
 
8b78611
e79d672
0058c86
 
8b78611
 
 
0058c86
8b78611
0058c86
8b78611
 
0058c86
 
8b78611
0058c86
 
 
 
 
 
 
 
 
 
 
 
 
e79d672
8b78611
 
 
 
 
 
 
 
0058c86
8b78611
 
 
 
 
 
 
 
0058c86
 
 
 
 
 
8b78611
0058c86
 
8b78611
0058c86
 
 
8b78611
 
0058c86
8b78611
0058c86
8b78611
 
0058c86
 
 
8b78611
 
0058c86
8b78611
 
0058c86
 
 
 
 
 
 
 
 
 
 
 
 
 
8b78611
 
0058c86
8b78611
0058c86
 
 
 
 
 
8b78611
 
 
0058c86
 
 
8b78611
e79d672
0058c86
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import gradio as gr
import spaces
from transformers import AutoModel, AutoTokenizer
from PIL import Image
import numpy as np

tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True)
model = model.eval().cuda()

@spaces.GPU
def run_GOT(image_array, got_mode, ocr_box="", ocr_color=""):
    image = Image.fromarray(np.uint8(image_array))
    if got_mode == "plain texts OCR":
        res = model.chat(tokenizer, image, ocr_type='ocr', gradio_input=True)
    elif got_mode == "format texts OCR":
        res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file='./demo.html', gradio_input=True)
    elif got_mode == "plain multi-crop OCR":
        res = model.chat_crop(tokenizer, image, ocr_type='ocr', gradio_input=True)
    elif got_mode == "format multi-crop OCR":
        res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file='./demo.html', gradio_input=True)

    elif got_mode == "plain fine-grained OCR":
        res = model.chat(tokenizer, image, ocr_type='ocr', ocr_box=ocr_box, ocr_color=ocr_color, gradio_input=True)
    elif got_mode == "format fine-grained OCR":
        res = model.chat(tokenizer, image, ocr_type='format', ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file='./demo.html', gradio_input=True)

    if "format" in got_mode:
        with open('./demo.html', 'r') as f:
            demo_html = f.read()
        return res, demo_html
    return res, None

def task_update(task):
    if "fine-grained" in task:
        return [
            gr.update(visible=True),
            gr.update(visible=False),
            gr.update(visible=False),
        ]
    else:
        return [
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
        ]

def fine_grained_update(task):
    if task == "box":
        return [
            gr.update(visible=False, value = ""),
            gr.update(visible=True),
        ]
    elif task == 'color':
        return [
            gr.update(visible=True),
            gr.update(visible=False, value = ""),
        ]


with gr.Blocks() as demo:
    gr.Markdown("""
    # "General OCR Theory: Towards OCR-2.0 via a Unified End-to-end Model"
    
    "🔥🔥🔥This is the official online demo of GOT-OCR-2.0 model!!!"
    
    ### Repo
    - **Hugging Face**: [ucaslcl/GOT-OCR2_0](https://huggingface.co/ucaslcl/GOT-OCR2_0)
    - **GitHub**: [Ucas-HaoranWei/GOT-OCR2_0](https://github.com/Ucas-HaoranWei/GOT-OCR2.0/)
    - **Paper**: [AriXiv](https://arxiv.org/abs/2409.01704)
    """)
    
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="filepath", label="upload your image")
            task_dropdown = gr.Dropdown(
                choices=[
                    "plain texts OCR",
                    "format texts OCR",
                    "plain multi-crop OCR",
                    "format multi-crop OCR",
                    "plain fine-grained OCR",
                    "format fine-grained OCR",
                ],
                label="Choose one mode of GOT",
                value="plain texts OCR"
            )
            fine_grained_dropdown = gr.Dropdown(
                choices=["box", "color"],
                label="fine-grained type",
                visible=False
            )
            color_dropdown = gr.Dropdown(
                choices=["red", "green", "blue"],
                label="color list",
                visible=False
            )
            box_input = gr.Textbox(
                label="input box: [x1,y1,x2,y2]",
                placeholder="e.g., [0,0,100,100]",
                visible=False
            )
            submit_button = gr.Button("Submit")
        
        with gr.Column():
            ocr_result = gr.Textbox(label="GOT output")
            html_result = gr.HTML(label="rendered html")
    
    gr.Examples(
        examples=[
            ["assets/coco.jpg", "plain texts OCR", "", ""],
            ["assets/en2.png", "plain texts OCR", "", ""],
            ["assets/eq.jpg", "format texts OCR", "", ""],
            ["assets/table.jpg", "format texts OCR", "", ""],
            ["assets/aff2.png", "plain fine-grained OCR", "[409,763,756,891]", ""],
        ],
        inputs=[image_input, task_dropdown],
        label="examples",
    )
    
    task_dropdown.change(
        task_update,
        inputs=[task_dropdown],
        outputs=[fine_grained_dropdown, color_dropdown, box_input]
    )
    fine_grained_dropdown.change(
        fine_grained_update,
        inputs=[fine_grained_dropdown],
        outputs=[color_dropdown, box_input]
    )
    
    submit_button.click(
        run_GOT,
        inputs=[image_input, task_dropdown, box_input, color_dropdown],
        outputs=[ocr_result, html_result]
    )

demo.launch(share=True)