Spaces:

Gen-Verse
/

MMaDA

Running on Zero

App Files Files Community

YucYux commited on 5 days ago

Commit

d08f144

1 Parent(s): db20615

Revert "Added support for MMaDA-8B-MixCoT"

Browse files

This reverts commit db20615a9ccddd7b9c1ee9043750d591e46628a2.

Files changed (1) hide show

app.py +25 -240

app.py CHANGED Viewed

@@ -47,23 +47,22 @@ def get_num_transfer_tokens(mask_index, steps):
     return num_transfer_tokens
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
-DEFAULT_MODEL_PATH = "Gen-Verse/MMaDA-8B-MixCoT" # Default
 MASK_ID = 126336
 MODEL = MMadaModelLM.from_pretrained(DEFAULT_MODEL_PATH, trust_remote_code=True, torch_dtype=torch.bfloat16).to(DEVICE).eval()
 TOKENIZER = AutoTokenizer.from_pretrained(DEFAULT_MODEL_PATH, trust_remote_code=True)
 uni_prompting = UniversalPrompting(TOKENIZER, max_text_len=512, special_tokens=("<|soi|>", "<|eoi|>", "<|sov|>", "<|eov|>", "<|t2i|>", "<|mmu|>", "<|t2v|>", "<|v2v|>", "<|lvg|>"),ignore_id=-100, cond_dropout_prob=0.1, use_reserved_token=True)
 VQ_MODEL = MAGVITv2().from_pretrained("showlab/magvitv2").to(DEVICE)
-CURRENT_MODEL_PATH = DEFAULT_MODEL_PATH
 MODEL_CHOICES = [
     "MMaDA-8B-Base",
-    "MMaDA-8B-MixCoT",
     "MMaDA-8B-Max (coming soon)"
 ]
 MODEL_ACTUAL_PATHS = {
-    "MMaDA-8B-Base": "Gen-Verse/MMaDA-8B-Base",
-    "MMaDA-8B-MixCoT": "Gen-Verse/MMaDA-8B-MixCoT"
 }
 def clear_outputs_action():
@@ -117,91 +116,19 @@ def _load_model_and_tokenizer_core(model_path_to_load, model_display_name_for_st
     #     return f"Error loading model '{model_display_name_for_status}': {str(e)}"
 def handle_model_selection_change(selected_model_name_ui):
-    global MODEL, TOKENIZER, MASK_ID, CURRENT_MODEL_PATH, DEVICE, uni_prompting
-    status_msg = ""
-    # 初始化 Examples 的可见性更新
-    vis_lm_base = gr.update(visible=False)
-    vis_lm_mixcot = gr.update(visible=False)
-    vis_lm_max = gr.update(visible=False)
-    vis_mmu_base = gr.update(visible=False)
-    vis_mmu_mixcot = gr.update(visible=False)
-    vis_mmu_max = gr.update(visible=False)
-    # 根据选择的模型决定 thinking mode 的默认状态
-    is_mixcot_model_selected = (selected_model_name_ui == "MMaDA-8B-MixCoT")
-    # 初始 thinking mode 状态和按钮标签
-    # 如果是 MixCoT 模型，则默认为 True (开启)
-    current_thinking_mode_lm_state = is_mixcot_model_selected
-    current_thinking_mode_mmu_state = is_mixcot_model_selected
-    lm_think_button_label = "Thinking Mode ✅" if current_thinking_mode_lm_state else "Thinking Mode ❌"
-    mmu_think_button_label = "Thinking Mode ✅" if current_thinking_mode_mmu_state else "Thinking Mode ❌"
-    update_think_button_lm = gr.update(value=lm_think_button_label)
-    update_think_button_mmu = gr.update(value=mmu_think_button_label)
-    if selected_model_name_ui == "MMaDA-8B-Max (coming soon)":
         MODEL = None
         TOKENIZER = None
         MASK_ID = None
         CURRENT_MODEL_PATH = None
-        status_msg = f"'{selected_model_name_ui}' is not yet available. Please select another model."
-        vis_lm_max = gr.update(visible=True)
-        vis_mmu_max = gr.update(visible=True)
-        # 对于非 MixCoT 模型，thinking mode 在上面已经根据 is_mixcot_model_selected 设置为 False
-    else:
-        actual_path = MODEL_ACTUAL_PATHS.get(selected_model_name_ui)
-        if not actual_path:
-            MODEL = None
-            TOKENIZER = None
-            MASK_ID = None
-            CURRENT_MODEL_PATH = None
-            status_msg = f"Path for '{selected_model_name_ui}' is not defined. Cannot load."
-            # 如果路径未定义（意味着不是有效的MixCoT加载），thinking mode应为False
-            if is_mixcot_model_selected: # 如果本应是MixCoT但路径没有
-                current_thinking_mode_lm_state = False
-                current_thinking_mode_mmu_state = False
-                update_think_button_lm = gr.update(value="Thinking Mode ❌")
-                update_think_button_mmu = gr.update(value="Thinking Mode ❌")
-        else:
-            # 尝试加载模型
-            status_msg = _load_model_and_tokenizer_core(actual_path, selected_model_name_ui)
-            # 检查模型是否成功加载
-            if "Error loading model" in status_msg or MODEL is None:
-                # 如果是 MixCoT 模型但加载失败，则关闭 thinking mode
-                if is_mixcot_model_selected:
-                    current_thinking_mode_lm_state = False
-                    current_thinking_mode_mmu_state = False
-                    update_think_button_lm = gr.update(value="Thinking Mode ❌")
-                    update_think_button_mmu = gr.update(value="Thinking Mode ❌")
-                if MODEL is None and "Error" not in status_msg: # 补充一个通用错误信息
-                    status_msg = f"Failed to properly load model '{selected_model_name_ui}'. {status_msg}"
-            else: # 模型成功加载
-                if selected_model_name_ui == "MMaDA-8B-Base":
-                    vis_lm_base = gr.update(visible=True)
-                    vis_mmu_base = gr.update(visible=True)
-                elif selected_model_name_ui == "MMaDA-8B-MixCoT":
-                    vis_lm_mixcot = gr.update(visible=True)
-                    vis_mmu_mixcot = gr.update(visible=True)
-                    # thinking mode 已经在函数开头根据 is_mixcot_model_selected 设置为 True
-    return (
-        status_msg,
-        vis_lm_base,
-        vis_lm_mixcot,
-        vis_lm_max,
-        vis_mmu_base,
-        vis_mmu_mixcot,
-        vis_mmu_max,
-        # 新增的返回值，用于更新 thinking_mode 状态和按钮
-        current_thinking_mode_lm_state, # 直接返回值给 gr.State
-        update_think_button_lm,         # gr.update 对象给 gr.Button
-        current_thinking_mode_mmu_state,
-        update_think_button_mmu
-    )
 def get_highlighted_text_tuples(current_x_ids_batch, prompt_input_ids, prompt_len, tk, current_mask_id, raw_prompt_attention_mask):
@@ -691,7 +618,7 @@ with gr.Blocks(css=css_styles, theme=theme) as demo:
         model_select_radio = gr.Radio(
             label="Select Text Generation Model",
             choices=MODEL_CHOICES,
-            value="MMaDA-8B-MixCoT"
         )
         model_load_status_box = gr.Textbox(
             label="Model Load Status",
@@ -736,27 +663,7 @@ with gr.Blocks(css=css_styles, theme=theme) as demo:
-    examples_lm_base = gr.Examples(
-        examples=[
-            ["A rectangular prism has a length of 5 units, a width of 4 units, and a height of 3 units. What is the volume of the prism?", 256, 512, 128, 1, 0, "low_confidence"],
-            ["Lily can run 12 kilometers per hour for 4 hours. After that, she can run 6 kilometers per hour. How many kilometers can she run in 8 hours?", 256, 512, 64, 1, 0, "low_confidence"]
-        ],
-        inputs=[prompt_input_box_lm, steps_slider_lm, gen_length_slider_lm, block_length_slider_lm, temperature_slider_lm, cfg_scale_slider_lm, remasking_dropdown_lm],
-        outputs=[output_visualization_box_lm, output_final_text_box_lm],
-        fn=generate_viz_wrapper_lm,
-        cache_examples=False
-    )
-    examples_lm_mixcot = gr.Examples(
-        examples=[
-            ["A rectangular prism has a length of 5 units, a width of 4 units, and a height of 3 units. What is the volume of the prism?", 256, 512, 128, 1, 0, "low_confidence"],
-            ["Lily can run 12 kilometers per hour for 4 hours. After that, she can run 6 kilometers per hour. How many kilometers can she run in 8 hours?", 256, 512, 64, 1, 0, "low_confidence"]
-        ],
-        inputs=[prompt_input_box_lm, steps_slider_lm, gen_length_slider_lm, block_length_slider_lm, temperature_slider_lm, cfg_scale_slider_lm, remasking_dropdown_lm],
-        outputs=[output_visualization_box_lm, output_final_text_box_lm],
-        fn=generate_viz_wrapper_lm,
-        cache_examples=False
-    )
-    examples_lm_max = gr.Examples(
         examples=[
             ["A rectangular prism has a length of 5 units, a width of 4 units, and a height of 3 units. What is the volume of the prism?", 256, 512, 128, 1, 0, "low_confidence"],
             ["Lily can run 12 kilometers per hour for 4 hours. After that, she can run 6 kilometers per hour. How many kilometers can she run in 8 hours?", 256, 512, 64, 1, 0, "low_confidence"]
@@ -774,7 +681,7 @@ with gr.Blocks(css=css_styles, theme=theme) as demo:
             prompt_input_box_mmu = gr.Textbox(
                 label="Enter your prompt:",
                 lines=3,
-                value=""
             )
             think_button_mmu = gr.Button("🧠 Enable Thinking Mode", elem_id="think_btn")
             with gr.Accordion("Generation Parameters", open=True):
@@ -782,7 +689,7 @@ with gr.Blocks(css=css_styles, theme=theme) as demo:
                     gen_length_slider_mmu = gr.Slider(minimum=64, maximum=1024, value=512, step=64, label="Generation Length", info="Number of tokens to generate.")
                     steps_slider_mmu = gr.Slider(minimum=1, maximum=512, value=256, step=32, label="Total Sampling Steps", info="Must be divisible by (gen_length / block_length).")
                 with gr.Row():
-                    block_length_slider_mmu = gr.Slider(minimum=32, maximum=1024, value=64, step=32, label="Block Length", info="gen_length must be divisible by this.")
                     remasking_dropdown_mmu = gr.Dropdown(choices=['low_confidence', 'random'], value='low_confidence', label="Remasking Strategy")
                 with gr.Row():
                     cfg_scale_slider_mmu = gr.Slider(minimum=0.0, maximum=2.0, value=0.0, step=0.1, label="CFG Scale", info="Classifier-Free Guidance. 0 disables it.")
@@ -809,81 +716,7 @@ with gr.Blocks(css=css_styles, theme=theme) as demo:
             output_final_text_box_mmu = gr.Textbox(label="Final Output", lines=8, interactive=False, show_copy_button=True)
-    examples_mmu_base = gr.Examples(
-        examples=[
-            [
-                "figs/sunflower.jpg",
-                "Please describe this image in detail.",
-                256,
-                512,
-                128,
-                1,
-                0,
-                "low_confidence"
-            ],
-            [
-                "figs/woman.jpg",
-                "Please describe this image in detail.",
-                256,
-                512,
-                128,
-                1,
-                0,
-                "low_confidence"
-            ]
-        ],
-        inputs=[
-            image_upload_box,
-            prompt_input_box_mmu,
-            steps_slider_mmu,
-            gen_length_slider_mmu,
-            block_length_slider_mmu,
-            temperature_slider_mmu,
-            cfg_scale_slider_mmu,
-            remasking_dropdown_mmu
-        ],
-        outputs=[output_visualization_box_mmu, output_final_text_box_mmu],
-        fn=generate_viz_wrapper,
-        cache_examples=False
-    )
-    examples_mmu_mixcot = gr.Examples(
-        examples=[
-            [
-                "figs/geo.png",
-                "In the given figure, a square ABCD is inscribed in a circle with center O. Point P is located on side CD. What is the value of angle APB?",
-                256,
-                512,
-                64,
-                1,
-                0,
-                "low_confidence"
-            ],
-            [
-                "figs/bus.jpg",
-                "What are the colors of the bus?",
-                256,
-                512,
-                64,
-                1,
-                0,
-                "low_confidence"
-            ]
-        ],
-        inputs=[
-            image_upload_box,
-            prompt_input_box_mmu,
-            steps_slider_mmu,
-            gen_length_slider_mmu,
-            block_length_slider_mmu,
-            temperature_slider_mmu,
-            cfg_scale_slider_mmu,
-            remasking_dropdown_mmu
-        ],
-        outputs=[output_visualization_box_mmu, output_final_text_box_mmu],
-        fn=generate_viz_wrapper,
-        cache_examples=False
-    )
-    examples_mmu_max = gr.Examples(
         examples=[
             [
                 "figs/sunflower.jpg",
@@ -990,69 +823,21 @@ with gr.Blocks(css=css_styles, theme=theme) as demo:
         inputs=[thinking_mode_mmu],
         outputs=[thinking_mode_mmu, think_button_mmu]
     )
-    def initialize_app_state():
-        default_model_choice = "MMaDA-8B-MixCoT" # 默认加载 MixCoT
-        # handle_model_selection_change 现在返回更多项
-        status, lm_b_vis, lm_m_vis, lm_x_vis, \
-        mmu_b_vis, mmu_m_vis, mmu_x_vis, \
-        init_thinking_lm_state, init_think_lm_btn_update, \
-        init_thinking_mmu_state, init_think_mmu_btn_update = handle_model_selection_change(default_model_choice)
-        return (
-            default_model_choice,
-            status,
-            lm_b_vis,
-            lm_m_vis,
-            lm_x_vis,
-            mmu_b_vis,
-            mmu_m_vis,
-            mmu_x_vis,
-            init_thinking_lm_state,
-            init_think_lm_btn_update,
-            init_thinking_mmu_state,
-            init_think_mmu_btn_update
-        )
     demo.load(
-        fn=initialize_app_state,
         inputs=None,
-        outputs=[
-            model_select_radio,
-            model_load_status_box,
-            examples_lm_base,
-            examples_lm_mixcot,
-            examples_lm_max,
-            examples_mmu_base,
-            examples_mmu_mixcot,
-            examples_mmu_max,
-            thinking_mode_lm,       # gr.State for LM thinking mode
-            think_button_lm,        # gr.Button for LM thinking mode
-            thinking_mode_mmu,      # gr.State for MMU thinking mode
-            think_button_mmu        # gr.Button for MMU thinking mode
-        ],
         queue=True
     )
-    model_select_radio.change(
-        fn=handle_model_selection_change,
-        inputs=[model_select_radio],
-        outputs=[
-            model_load_status_box,
-            examples_lm_base,
-            examples_lm_mixcot,
-            examples_lm_max,
-            examples_mmu_base,
-            examples_mmu_mixcot,
-            examples_mmu_max,
-            thinking_mode_lm,
-            think_button_lm,
-            thinking_mode_mmu,
-            think_button_mmu
-        ]
-    )
     def clear_outputs():
         return None, None, None  # Clear image, visualization, and final text

     return num_transfer_tokens
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+DEFAULT_MODEL_PATH = "Gen-Verse/MMaDA-8B-Base" # Default
 MASK_ID = 126336
 MODEL = MMadaModelLM.from_pretrained(DEFAULT_MODEL_PATH, trust_remote_code=True, torch_dtype=torch.bfloat16).to(DEVICE).eval()
 TOKENIZER = AutoTokenizer.from_pretrained(DEFAULT_MODEL_PATH, trust_remote_code=True)
 uni_prompting = UniversalPrompting(TOKENIZER, max_text_len=512, special_tokens=("<|soi|>", "<|eoi|>", "<|sov|>", "<|eov|>", "<|t2i|>", "<|mmu|>", "<|t2v|>", "<|v2v|>", "<|lvg|>"),ignore_id=-100, cond_dropout_prob=0.1, use_reserved_token=True)
 VQ_MODEL = MAGVITv2().from_pretrained("showlab/magvitv2").to(DEVICE)
+CURRENT_MODEL_PATH = None
 MODEL_CHOICES = [
     "MMaDA-8B-Base",
+    "MMaDA-8B-MixCoT (coming soon)",
     "MMaDA-8B-Max (coming soon)"
 ]
 MODEL_ACTUAL_PATHS = {
+    "MMaDA-8B-Base": DEFAULT_MODEL_PATH,
 }
 def clear_outputs_action():
     #     return f"Error loading model '{model_display_name_for_status}': {str(e)}"
 def handle_model_selection_change(selected_model_name_ui):
+    if "coming soon" in selected_model_name_ui.lower():
+        global MODEL, TOKENIZER, MASK_ID, CURRENT_MODEL_PATH
         MODEL = None
         TOKENIZER = None
         MASK_ID = None
         CURRENT_MODEL_PATH = None
+        return f"'{selected_model_name_ui}' is not yet available. Please select 'Model A'."
+    actual_path = MODEL_ACTUAL_PATHS.get(selected_model_name_ui)
+    if not actual_path:
+        return f"Path for '{selected_model_name_ui}' is not defined. Cannot load."
+    return _load_model_and_tokenizer_core(actual_path, selected_model_name_ui)
 def get_highlighted_text_tuples(current_x_ids_batch, prompt_input_ids, prompt_len, tk, current_mask_id, raw_prompt_attention_mask):
         model_select_radio = gr.Radio(
             label="Select Text Generation Model",
             choices=MODEL_CHOICES,
+            value=MODEL_CHOICES[0]
         )
         model_load_status_box = gr.Textbox(
             label="Model Load Status",
+    gr.Examples(
         examples=[
             ["A rectangular prism has a length of 5 units, a width of 4 units, and a height of 3 units. What is the volume of the prism?", 256, 512, 128, 1, 0, "low_confidence"],
             ["Lily can run 12 kilometers per hour for 4 hours. After that, she can run 6 kilometers per hour. How many kilometers can she run in 8 hours?", 256, 512, 64, 1, 0, "low_confidence"]
             prompt_input_box_mmu = gr.Textbox(
                 label="Enter your prompt:",
                 lines=3,
+                value="Please describe this image in detail."
             )
             think_button_mmu = gr.Button("🧠 Enable Thinking Mode", elem_id="think_btn")
             with gr.Accordion("Generation Parameters", open=True):
                     gen_length_slider_mmu = gr.Slider(minimum=64, maximum=1024, value=512, step=64, label="Generation Length", info="Number of tokens to generate.")
                     steps_slider_mmu = gr.Slider(minimum=1, maximum=512, value=256, step=32, label="Total Sampling Steps", info="Must be divisible by (gen_length / block_length).")
                 with gr.Row():
+                    block_length_slider_mmu = gr.Slider(minimum=32, maximum=1024, value=128, step=32, label="Block Length", info="gen_length must be divisible by this.")
                     remasking_dropdown_mmu = gr.Dropdown(choices=['low_confidence', 'random'], value='low_confidence', label="Remasking Strategy")
                 with gr.Row():
                     cfg_scale_slider_mmu = gr.Slider(minimum=0.0, maximum=2.0, value=0.0, step=0.1, label="CFG Scale", info="Classifier-Free Guidance. 0 disables it.")
             output_final_text_box_mmu = gr.Textbox(label="Final Output", lines=8, interactive=False, show_copy_button=True)
+    gr.Examples(
         examples=[
             [
                 "figs/sunflower.jpg",
         inputs=[thinking_mode_mmu],
         outputs=[thinking_mode_mmu, think_button_mmu]
     )
+    def initialize_default_model():
+        default_model = "MMaDA-8B-Base"
+        result = handle_model_selection_change(default_model)
+        return default_model, result
     demo.load(
+        fn=initialize_default_model,
         inputs=None,
+        outputs=[model_select_radio, model_load_status_box],
         queue=True
     )
     def clear_outputs():
         return None, None, None  # Clear image, visualization, and final text