John6666 commited on
Commit
eed255c
·
verified ·
1 Parent(s): d2e9938

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +11 -5
  2. requirements.txt +1 -1
app.py CHANGED
@@ -6,7 +6,7 @@ from threading import Thread
6
  from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
7
  from PIL import ImageDraw
8
  from torchvision.transforms.v2 import Resize
9
-
10
  import subprocess
11
  #subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
@@ -17,14 +17,20 @@ model_id = "vikhyatk/moondream2"
17
  #revision = "2024-08-26"
18
  #tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
19
  tokenizer = AutoTokenizer.from_pretrained(model_id)
20
- moondream = AutoModelForCausalLM.from_pretrained(
21
- model_id, trust_remote_code=True, #revision=revision,
22
- torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32, #device_map="auto",
23
  #ignore_mismatched_sizes=True,
24
  #attn_implementation="flash_attention_2"
 
 
 
 
 
25
  ).to(device)
 
26
  moondream.eval()
27
- moondream.to_bettertransformer()
28
 
29
  #@spaces.GPU
30
  def answer_question(img, prompt):
 
6
  from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
7
  from PIL import ImageDraw
8
  from torchvision.transforms.v2 import Resize
9
+ from optimum.onnxruntime import ORTModelForImageClassification
10
  import subprocess
11
  #subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
 
17
  #revision = "2024-08-26"
18
  #tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
19
  tokenizer = AutoTokenizer.from_pretrained(model_id)
20
+ #moondream = AutoModelForCausalLM.from_pretrained(
21
+ # model_id, trust_remote_code=True, #revision=revision,
22
+ # torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32, #device_map="auto",
23
  #ignore_mismatched_sizes=True,
24
  #attn_implementation="flash_attention_2"
25
+ #).to(device)
26
+ moondream = ORTModelForImageClassification.from_pretrained(
27
+ model_id, trust_remote_code=True,
28
+ torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32,
29
+ from_transformers=True
30
  ).to(device)
31
+
32
  moondream.eval()
33
+ #moondream.to_bettertransformer()
34
 
35
  #@spaces.GPU
36
  def answer_question(img, prompt):
requirements.txt CHANGED
@@ -5,5 +5,5 @@ accelerate>=0.32.1
5
  numpy<2
6
  torch==2.4.0
7
  torchvision
8
- optimum
9
  intel_extension_for_pytorch
 
5
  numpy<2
6
  torch==2.4.0
7
  torchvision
8
+ optimum[onnxruntime]
9
  intel_extension_for_pytorch