Spaces:

YaphetYan
/

MyDemoSpace

Running

YaphetYan commited on Jul 12, 2024

Commit

580cc25

1 Parent(s): 1ca6e57

chore: 一些尝试

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,17 +2,35 @@ import torch
 import gradio as gr
 from transformers import AutoModel, pipeline, AutoTokenizer
-path = "radna/Triton-InternVL2-2B"
 model = (
     AutoModel.from_pretrained(
-        path, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, trust_remote_code=True
     )
     .eval()
     .cuda()
 )
-tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
-inference = pipeline(task="visual-question-answering", model=model, tokenizer=tokenizer)
 def predict(input_img, questions):
@@ -21,7 +39,7 @@ def predict(input_img, questions):
         return str(predictions)
     except Exception as e:
         # 捕获异常，并将错误信息转换为字符串
-        error_message = str(e)
         # 抛出gradio.Error来展示错误弹窗
         raise gr.Error(error_message, duration=25)

 import gradio as gr
 from transformers import AutoModel, pipeline, AutoTokenizer
+# from issue: https://discuss.huggingface.co/t/how-to-install-flash-attention-on-hf-gradio-space/70698/2
+import subprocess
+# InternVL2 需要的 flash_attn 这个依赖只能这样运行时装
+subprocess.run(
+    "pip install flash-attn --no-build-isolation",
+    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+    shell=True,
+)
+model_name = "OpenGVLab/InternVL2-8B"
 model = (
     AutoModel.from_pretrained(
+        model_name,
+        torch_dtype=torch.bfloat16,
+        # low_cpu_mem_usage=True,
+        trust_remote_code=True,
     )
     .eval()
     .cuda()
 )
+try:
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    inference = pipeline(
+        task="visual-question-answering", model=model, tokenizer=tokenizer
+    )
+except Exception as error:
+    raise gr.Error("👌" + str(error), duration=30)
 def predict(input_img, questions):
         return str(predictions)
     except Exception as e:
         # 捕获异常，并将错误信息转换为字符串
+        error_message = "❌" + str(e)
         # 抛出gradio.Error来展示错误弹窗
         raise gr.Error(error_message, duration=25)