ruslanmv commited on
Commit
b39a5c0
·
1 Parent(s): 6d16e49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -4
app.py CHANGED
@@ -3,8 +3,20 @@ import os
3
  import torch
4
  from transformers import AutoProcessor, MllamaForConditionalGeneration
5
  from PIL import Image
 
6
 
7
- # Get your Hugging Face token from environment variables
 
 
 
 
 
 
 
 
 
 
 
8
  HF_TOKEN = os.environ.get('HF_TOKEN')
9
 
10
  # Load the model and processor
@@ -12,11 +24,15 @@ model_name = "ruslanmv/Llama-3.2-11B-Vision-Instruct"
12
  model = MllamaForConditionalGeneration.from_pretrained(
13
  model_name,
14
  use_auth_token=HF_TOKEN,
15
- torch_dtype=torch.bfloat16,
16
- device_map="auto",
17
  )
 
 
 
18
  processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
19
 
 
20
  def predict(image, text):
21
  # Prepare the input messages
22
  messages = [
@@ -30,7 +46,7 @@ def predict(image, text):
30
  input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
31
 
32
  # Process the inputs and move to the appropriate device
33
- inputs = processor(image, input_text, return_tensors="pt").to(model.device)
34
 
35
  # Generate a response from the model
36
  outputs = model.generate(**inputs, max_new_tokens=100)
 
3
  import torch
4
  from transformers import AutoProcessor, MllamaForConditionalGeneration
5
  from PIL import Image
6
+ import spaces
7
 
8
+ # Check if we're running in a Hugging Face Space and if SPACES_ZERO_GPU is enabled
9
+ IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
10
+ IS_SPACE = os.environ.get("SPACE_ID", None) is not None
11
+
12
+ # Determine the device (GPU if available, else CPU)
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+ LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
15
+
16
+ print(f"Using device: {device}")
17
+ print(f"Low memory mode: {LOW_MEMORY}")
18
+
19
+ # Get Hugging Face token from environment variables
20
  HF_TOKEN = os.environ.get('HF_TOKEN')
21
 
22
  # Load the model and processor
 
24
  model = MllamaForConditionalGeneration.from_pretrained(
25
  model_name,
26
  use_auth_token=HF_TOKEN,
27
+ torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32,
28
+ device_map="auto" if device == "cuda" else None, # Use device mapping if CUDA is available
29
  )
30
+
31
+ # Move the model to the appropriate device (GPU if available)
32
+ model.to(device)
33
  processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
34
 
35
+ @spaces.GPU # Use the free GPU provided by Hugging Face Spaces
36
  def predict(image, text):
37
  # Prepare the input messages
38
  messages = [
 
46
  input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
47
 
48
  # Process the inputs and move to the appropriate device
49
+ inputs = processor(image, input_text, return_tensors="pt").to(device)
50
 
51
  # Generate a response from the model
52
  outputs = model.generate(**inputs, max_new_tokens=100)