YaphetYan commited on
Commit
9ce3948
·
1 Parent(s): 695df9a

fix: 先 cuda 再 eval

Browse files
Files changed (1) hide show
  1. app.py +17 -14
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import torch
2
  import gradio as gr
3
  from transformers import AutoModel, pipeline, AutoTokenizer
4
-
5
  import subprocess
6
 
7
  # from issue: https://discuss.huggingface.co/t/how-to-install-flash-attention-on-hf-gradio-space/70698/2
@@ -11,21 +11,22 @@ subprocess.run(
11
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
12
  shell=True,
13
  )
14
-
15
- model_name = "OpenGVLab/InternVL2-8B"
16
- model = (
17
- AutoModel.from_pretrained(
18
- model_name,
19
- torch_dtype=torch.bfloat16,
20
- # low_cpu_mem_usage=True,
21
- trust_remote_code=True,
 
 
 
 
22
  )
23
- .eval()
24
- .cuda()
25
- )
26
 
27
- try:
28
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
29
  inference = pipeline(
30
  task="visual-question-answering", model=model, tokenizer=tokenizer
31
  )
@@ -33,9 +34,11 @@ except Exception as error:
33
  raise gr.Error("👌" + str(error), duration=30)
34
 
35
 
 
36
  def predict(input_img, questions):
37
  try:
38
- gr.Info(str(type(inference)))
 
39
  predictions = inference(question=questions, image=input_img)
40
  return str(predictions)
41
  except Exception as e:
 
1
  import torch
2
  import gradio as gr
3
  from transformers import AutoModel, pipeline, AutoTokenizer
4
+ import spaces
5
  import subprocess
6
 
7
  # from issue: https://discuss.huggingface.co/t/how-to-install-flash-attention-on-hf-gradio-space/70698/2
 
11
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
12
  shell=True,
13
  )
14
+ try:
15
+ model_name = "OpenGVLab/InternVL2-8B"
16
+ # model: <class 'transformers_modules.OpenGVLab.InternVL2-8B.0e6d592d957d9739b6df0f4b90be4cb0826756b9.modeling_internvl_chat.InternVLChatModel'>
17
+ model = (
18
+ AutoModel.from_pretrained(
19
+ model_name,
20
+ torch_dtype=torch.bfloat16,
21
+ # low_cpu_mem_usage=True,
22
+ trust_remote_code=True,
23
+ )
24
+ .cuda()
25
+ .eval()
26
  )
 
 
 
27
 
 
28
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
29
+ # pipeline: <class 'transformers.pipelines.visual_question_answering.VisualQuestionAnsweringPipeline'>
30
  inference = pipeline(
31
  task="visual-question-answering", model=model, tokenizer=tokenizer
32
  )
 
34
  raise gr.Error("👌" + str(error), duration=30)
35
 
36
 
37
+ @spaces.GPU
38
  def predict(input_img, questions):
39
  try:
40
+ gr.Info("pipeline: " + str(type(inference)))
41
+ gr.Info("model: " + str(type(model)))
42
  predictions = inference(question=questions, image=input_img)
43
  return str(predictions)
44
  except Exception as e: