YaphetYan commited on
Commit
580cc25
·
1 Parent(s): 1ca6e57

chore: 一些尝试

Browse files
Files changed (1) hide show
  1. app.py +23 -5
app.py CHANGED
@@ -2,17 +2,35 @@ import torch
2
  import gradio as gr
3
  from transformers import AutoModel, pipeline, AutoTokenizer
4
 
5
- path = "radna/Triton-InternVL2-2B"
 
 
 
 
 
 
 
 
 
 
6
  model = (
7
  AutoModel.from_pretrained(
8
- path, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, trust_remote_code=True
 
 
 
9
  )
10
  .eval()
11
  .cuda()
12
  )
13
 
14
- tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
15
- inference = pipeline(task="visual-question-answering", model=model, tokenizer=tokenizer)
 
 
 
 
 
16
 
17
 
18
  def predict(input_img, questions):
@@ -21,7 +39,7 @@ def predict(input_img, questions):
21
  return str(predictions)
22
  except Exception as e:
23
  # 捕获异常,并将错误信息转换为字符串
24
- error_message = str(e)
25
  # 抛出gradio.Error来展示错误弹窗
26
  raise gr.Error(error_message, duration=25)
27
 
 
2
  import gradio as gr
3
  from transformers import AutoModel, pipeline, AutoTokenizer
4
 
5
+ # from issue: https://discuss.huggingface.co/t/how-to-install-flash-attention-on-hf-gradio-space/70698/2
6
+ import subprocess
7
+
8
+ # InternVL2 需要的 flash_attn 这个依赖只能这样运行时装
9
+ subprocess.run(
10
+ "pip install flash-attn --no-build-isolation",
11
+ env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
12
+ shell=True,
13
+ )
14
+
15
+ model_name = "OpenGVLab/InternVL2-8B"
16
  model = (
17
  AutoModel.from_pretrained(
18
+ model_name,
19
+ torch_dtype=torch.bfloat16,
20
+ # low_cpu_mem_usage=True,
21
+ trust_remote_code=True,
22
  )
23
  .eval()
24
  .cuda()
25
  )
26
 
27
+ try:
28
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
29
+ inference = pipeline(
30
+ task="visual-question-answering", model=model, tokenizer=tokenizer
31
+ )
32
+ except Exception as error:
33
+ raise gr.Error("👌" + str(error), duration=30)
34
 
35
 
36
  def predict(input_img, questions):
 
39
  return str(predictions)
40
  except Exception as e:
41
  # 捕获异常,并将错误信息转换为字符串
42
+ error_message = "❌" + str(e)
43
  # 抛出gradio.Error来展示错误弹窗
44
  raise gr.Error(error_message, duration=25)
45