Spaces:

TongkunGuan
/

Token-level_Text_Image_Foundation_Model

Running

App Files Files Community

TongkunGuan commited on Mar 12

Commit

79d5e07

verified ·

1 Parent(s): b70aad2

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -36

app.py CHANGED Viewed

@@ -98,7 +98,7 @@ def process_image(model, tokenizer, transform, device, check_type, image, text,
     return image, vis[0], bpe[0], len(vis) - 1
 # Gradio界面
-with gr.Blocks(title="BPE Visualization Demo") as demo:
     gr.Markdown("## BPE Visualization Demo - TokenFD基座模型能力可视化")
     with gr.Row():
@@ -106,13 +106,11 @@ with gr.Blocks(title="BPE Visualization Demo") as demo:
             model_type = gr.Dropdown(
                 choices=["TokenFD_4096_English_seg", "TokenFD_2048_Bilingual_seg", "R50", "R50_siglip"],
                 label="Select model type",
-                value="TokenOCR_4096_English_seg"  # 设置默认值为第一个选项
             )
             image_input = gr.Image(label="Upload images", type="pil")
             text_input = gr.Textbox(label="Input text")
             run_btn = gr.Button("RUN")
             gr.Examples(
                 examples=[
                     [os.path.join("examples", "examples0.jpg"), "Veterans and Benefits"],
@@ -125,60 +123,58 @@ with gr.Blocks(title="BPE Visualization Demo") as demo:
         with gr.Column(scale=2):
             gr.Markdown("<p style='font-size:20px;'><span style='color:red;'>If the input text is not included in the image</span>, the attention map will show a lot of noise (the actual response value is very low), since we normalize the attention map according to the relative value.</p>")
-            with gr.Row():
-                orig_img = gr.Image(label="Original picture", interactive=False)
-                heatmap = gr.Image(label="BPE visualization", interactive=False)
-            with gr.Row() as controls:
-                prev_btn = gr.Button("⬅ Last", visible=False)
-                index_slider = gr.Slider(0, 1, value=0, step=1, label="BPE index", visible=False)
-                next_btn = gr.Button("⮕ Next", visible=False)
             bpe_display = gr.Markdown("Current BPE: ", visible=False)
-    state = gr.State(current_vis=[], current_bpe=[], current_index=0)
     @spaces.GPU
     def on_run_clicked(model_type, image, text, state):
         image, vis, bpe, slider_max_val = process_image(*load_model(model_type), model_type, image, text, state)
         bpe_text = format_bpe_display(bpe)
-        index_slider.update(visible=True, maximum=slider_max_val, value=0)
-        prev_btn.update(visible=True)
-        next_btn.update(visible=True)
-        return image, vis, bpe_text
-    def update_index(change, state):
-        state['current_index'] = max(0, min(len(state['current_vis']) - 1, state['current_index'] + change))
-        return state['current_vis'][state['current_index']], format_bpe_display(state['current_bpe'][state['current_index']])
-    def format_bpe_display(bpe):
-        return f"<div style='text-align:center; font-size:20px;'><strong>Current BPE: <span style='color:red;'>{bpe}</span></strong></div>"
     run_btn.click(
         on_run_clicked,
         inputs=[model_type, image_input, text_input, state],
         outputs=[orig_img, heatmap, bpe_display],
     )
     prev_btn.click(
-        lambda state: (*update_index(-1, state), state['current_index']),
         inputs=[state],
         outputs=[heatmap, bpe_display, index_slider]
     )
     next_btn.click(
-        lambda state: (*update_index(1, state), state['current_index']),
         inputs=[state],
         outputs=[heatmap, bpe_display, index_slider]
     )
     index_slider.change(
-            lambda x, state: update_slider_index(x, state),
-            inputs=[index_slider, state],
-            outputs=[heatmap, bpe_display]
-        )
 if __name__ == "__main__":
-    demo.launch()

     return image, vis[0], bpe[0], len(vis) - 1
 # Gradio界面
+with gr.Blocks() as demo:
     gr.Markdown("## BPE Visualization Demo - TokenFD基座模型能力可视化")
     with gr.Row():
             model_type = gr.Dropdown(
                 choices=["TokenFD_4096_English_seg", "TokenFD_2048_Bilingual_seg", "R50", "R50_siglip"],
                 label="Select model type",
+                value="TokenOCR_4096_English_seg"
             )
             image_input = gr.Image(label="Upload images", type="pil")
             text_input = gr.Textbox(label="Input text")
             run_btn = gr.Button("RUN")
             gr.Examples(
                 examples=[
                     [os.path.join("examples", "examples0.jpg"), "Veterans and Benefits"],
         with gr.Column(scale=2):
             gr.Markdown("<p style='font-size:20px;'><span style='color:red;'>If the input text is not included in the image</span>, the attention map will show a lot of noise (the actual response value is very low), since we normalize the attention map according to the relative value.</p>")
+            orig_img = gr.Image(label="Original picture", interactive=False)
+            heatmap = gr.Image(label="BPE visualization", interactive=False)
+            prev_btn = gr.Button("⬅ Last", visible=False)
+            index_slider = gr.Slider(0, 1, value=0, step=1, label="BPE index", visible=False)
+            next_btn = gr.Button("⮕ Next", visible=False)
             bpe_display = gr.Markdown("Current BPE: ", visible=False)
+    state = gr.State()
+    state['current_vis'] = []
+    state['current_bpe'] = []
+    state['current_index'] = 0
     @spaces.GPU
     def on_run_clicked(model_type, image, text, state):
         image, vis, bpe, slider_max_val = process_image(*load_model(model_type), model_type, image, text, state)
+        state['current_vis'] = vis
+        state['current_bpe'] = bpe
+        state['current_index'] = 0
         bpe_text = format_bpe_display(bpe)
+        return image, vis, bpe_text, slider_max_val
     run_btn.click(
         on_run_clicked,
         inputs=[model_type, image_input, text_input, state],
         outputs=[orig_img, heatmap, bpe_display],
+        _js="""
+            (orig_img, heatmap, bpe_display, slider_max_val) => {
+                index_slider.update({ visible: true, maximum: slider_max_val, value: 0 });
+                prev_btn.update({ visible: true });
+                next_btn.update({ visible: true });
+                return [orig_img, heatmap, bpe_display];
+            }
+        """
     )
     prev_btn.click(
+        lambda state: update_index(-1, state),
         inputs=[state],
         outputs=[heatmap, bpe_display, index_slider]
     )
     next_btn.click(
+        lambda state: update_index(1, state),
         inputs=[state],
         outputs=[heatmap, bpe_display, index_slider]
     )
     index_slider.change(
+        lambda x, state: update_slider_index(x, state),
+        inputs=[index_slider, state],
+        outputs=[heatmap, bpe_display]
+    )
 if __name__ == "__main__":
+    demo.launch()