Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -6,14 +6,15 @@ from PIL import Image
|
|
6 |
import torch
|
7 |
import spaces
|
8 |
import subprocess
|
9 |
-
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
10 |
|
11 |
|
12 |
processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM_converted_4")
|
13 |
|
14 |
model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceTB/SmolVLM_converted_4",
|
15 |
torch_dtype=torch.bfloat16,
|
16 |
-
_attn_implementation="flash_attention_2"
|
|
|
17 |
|
18 |
@spaces.GPU
|
19 |
def model_inference(
|
|
|
6 |
import torch
|
7 |
import spaces
|
8 |
import subprocess
|
9 |
+
#subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
10 |
|
11 |
|
12 |
processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM_converted_4")
|
13 |
|
14 |
model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceTB/SmolVLM_converted_4",
|
15 |
torch_dtype=torch.bfloat16,
|
16 |
+
#_attn_implementation="flash_attention_2"
|
17 |
+
).to("cuda")
|
18 |
|
19 |
@spaces.GPU
|
20 |
def model_inference(
|