Spaces:

YaphetYan
/

MyDemoSpace

Running

YaphetYan commited on Jul 13, 2024

Commit

9ce3948

1 Parent(s): 695df9a

fix: 先 cuda 再 eval

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import torch
 import gradio as gr
 from transformers import AutoModel, pipeline, AutoTokenizer
 import subprocess
 # from issue: https://discuss.huggingface.co/t/how-to-install-flash-attention-on-hf-gradio-space/70698/2
@@ -11,21 +11,22 @@ subprocess.run(
     env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
     shell=True,
 )
-model_name = "OpenGVLab/InternVL2-8B"
-model = (
-    AutoModel.from_pretrained(
-        model_name,
-        torch_dtype=torch.bfloat16,
-        # low_cpu_mem_usage=True,
-        trust_remote_code=True,
     )
-    .eval()
-    .cuda()
-)
-try:
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
     inference = pipeline(
         task="visual-question-answering", model=model, tokenizer=tokenizer
     )
@@ -33,9 +34,11 @@ except Exception as error:
     raise gr.Error("👌" + str(error), duration=30)
 def predict(input_img, questions):
     try:
-        gr.Info(str(type(inference)))
         predictions = inference(question=questions, image=input_img)
         return str(predictions)
     except Exception as e:

 import torch
 import gradio as gr
 from transformers import AutoModel, pipeline, AutoTokenizer
+import spaces
 import subprocess
 # from issue: https://discuss.huggingface.co/t/how-to-install-flash-attention-on-hf-gradio-space/70698/2
     env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
     shell=True,
 )
+try:
+    model_name = "OpenGVLab/InternVL2-8B"
+    # model: <class 'transformers_modules.OpenGVLab.InternVL2-8B.0e6d592d957d9739b6df0f4b90be4cb0826756b9.modeling_internvl_chat.InternVLChatModel'>
+    model = (
+        AutoModel.from_pretrained(
+            model_name,
+            torch_dtype=torch.bfloat16,
+            # low_cpu_mem_usage=True,
+            trust_remote_code=True,
+        )
+        .cuda()
+        .eval()
     )
     tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    # pipeline: <class 'transformers.pipelines.visual_question_answering.VisualQuestionAnsweringPipeline'>
     inference = pipeline(
         task="visual-question-answering", model=model, tokenizer=tokenizer
     )
     raise gr.Error("👌" + str(error), duration=30)
+@spaces.GPU
 def predict(input_img, questions):
     try:
+        gr.Info("pipeline: " + str(type(inference)))
+        gr.Info("model: " + str(type(model)))
         predictions = inference(question=questions, image=input_img)
         return str(predictions)
     except Exception as e: