Spaces:

Tongbo
/

flashsloth

Sleeping

App Files Files Community

Tongbo commited on Jan 6

Commit

b973fba

verified ·

1 Parent(s): 0f8fc82

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -17

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import torch
 from flashsloth.constants import (
     IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN,
@@ -14,28 +15,39 @@ from flashsloth.mm_utils import (
 from PIL import Image
 import gradio as gr
 from transformers import TextIteratorStreamer
 from threading import Thread
 disable_torch_init()
-MODEL_PATH = "Tongbo/FlashSloth_HD-3.2B"
-model_name = get_model_name_from_path(MODEL_PATH)
-tokenizer, model, image_processor, context_len = load_pretrained_model(MODEL_PATH, None, model_name)
-model.to('cuda')
-model.eval()
-def generate_description(image, prompt_text, temperature, top_p, max_tokens):
     keywords = ['</s>']
     text = DEFAULT_IMAGE_TOKEN + '\n' + prompt_text
     text = text + LEARNABLE_TOKEN
     image = image.convert('RGB')
     if model.config.image_hd:
@@ -43,14 +55,12 @@ def generate_description(image, prompt_text, temperature, top_p, max_tokens):
     else:
         image_tensor = process_images([image], image_processor, model.config)[0]
     image_tensor = image_tensor.unsqueeze(0).to(dtype=torch.float16, device='cuda', non_blocking=True)
     conv = conv_templates["phi2"].copy()
     conv.append_message(conv.roles[0], text)
     conv.append_message(conv.roles[1], None)
     prompt = conv.get_prompt()
     input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt')
     input_ids = input_ids.unsqueeze(0).to(device='cuda', non_blocking=True)
@@ -79,11 +89,9 @@ def generate_description(image, prompt_text, temperature, top_p, max_tokens):
         with torch.inference_mode():
             model.generate(**generation_kwargs)
-    # 在单独线程中运行生成，防止阻塞
     generation_thread = Thread(target=_generate)
     generation_thread.start()
-    # 边生成边yield输出
     partial_text = ""
     for new_text in streamer:
         partial_text += new_text
@@ -91,7 +99,6 @@ def generate_description(image, prompt_text, temperature, top_p, max_tokens):
     generation_thread.join()
-# 自定义CSS样式，用于增大字体和美化界面
 custom_css = """
 <style>
 /* 增大标题字体 */
@@ -152,10 +159,17 @@ with gr.Blocks(css=custom_css) as demo:
                 minimum=64,
                 maximum=3072,
                 step=1,
-                value=512,
                 label="Max Tokens"
             )
         with gr.Column(scale=1):
             prompt_input = gr.Textbox(
                 lines=3,
@@ -173,10 +187,10 @@ with gr.Blocks(css=custom_css) as demo:
     submit_button.click(
         fn=generate_description,
-        inputs=[image_input, prompt_input, temperature_slider, topp_slider, maxtoken_slider],
         outputs=output_text,
         show_progress=True
     )
 if __name__ == "__main__":
-    demo.queue().launch()

 import os
+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 import torch
 from flashsloth.constants import (
     IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN,
 from PIL import Image
 import gradio as gr
 from transformers import TextIteratorStreamer
 from threading import Thread
 disable_torch_init()
+MODEL_PATH_HD = "Tongbo/FlashSloth_HD-3.2B"
+MODEL_PATH_NEW = "Tongbo/FlashSloth-3.2B"
+model_name_hd = get_model_name_from_path(MODEL_PATH_HD)
+model_name_new = get_model_name_from_path(MODEL_PATH_NEW)
+models = {
+    "FlashSloth HD": load_pretrained_model(MODEL_PATH_HD, None, model_name_hd),
+    "FlashSloth": load_pretrained_model(MODEL_PATH_NEW, None, model_name_new)
+}
+for key in models:
+    tokenizer, model, image_processor, context_len = models[key]
+    model.to('cuda')
+    model.eval()
+def generate_description(image, prompt_text, temperature, top_p, max_tokens, selected_model):
+    """
+    生成图片描述的函数，支持流式输出，并根据选择的模型进行处理。
+    新增参数:
+      - selected_model: 用户选择的模型名称
+    """
     keywords = ['</s>']
+    tokenizer, model, image_processor, context_len = models[selected_model]
     text = DEFAULT_IMAGE_TOKEN + '\n' + prompt_text
     text = text + LEARNABLE_TOKEN
     image = image.convert('RGB')
     if model.config.image_hd:
     else:
         image_tensor = process_images([image], image_processor, model.config)[0]
     image_tensor = image_tensor.unsqueeze(0).to(dtype=torch.float16, device='cuda', non_blocking=True)
     conv = conv_templates["phi2"].copy()
     conv.append_message(conv.roles[0], text)
     conv.append_message(conv.roles[1], None)
     prompt = conv.get_prompt()
     input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt')
     input_ids = input_ids.unsqueeze(0).to(device='cuda', non_blocking=True)
         with torch.inference_mode():
             model.generate(**generation_kwargs)
     generation_thread = Thread(target=_generate)
     generation_thread.start()
     partial_text = ""
     for new_text in streamer:
         partial_text += new_text
     generation_thread.join()
 custom_css = """
 <style>
 /* 增大标题字体 */
                 minimum=64,
                 maximum=3072,
                 step=1,
+                value=3072,
                 label="Max Tokens"
             )
+            model_dropdown = gr.Dropdown(
+                choices=list(models.keys()),
+                value=list(models.keys())[0],
+                label="选择模型",
+                type="value"
+            )
         with gr.Column(scale=1):
             prompt_input = gr.Textbox(
                 lines=3,
     submit_button.click(
         fn=generate_description,
+        inputs=[image_input, prompt_input, temperature_slider, topp_slider, maxtoken_slider, model_dropdown],
         outputs=output_text,
         show_progress=True
     )
 if __name__ == "__main__":
+    demo.queue().launch(server_name="0.0.0.0", server_port=8888)