Spaces:

ruslanmv
/

Llama-3.2-11B-Vision-Instruct

Paused

App Files Files Community

ruslanmv commited on Oct 2, 2024

Commit

b39a5c0

1 Parent(s): 6d16e49

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -4

app.py CHANGED Viewed

@@ -3,8 +3,20 @@ import os
 import torch
 from transformers import AutoProcessor, MllamaForConditionalGeneration
 from PIL import Image
-# Get your Hugging Face token from environment variables
 HF_TOKEN = os.environ.get('HF_TOKEN')
 # Load the model and processor
@@ -12,11 +24,15 @@ model_name = "ruslanmv/Llama-3.2-11B-Vision-Instruct"
 model = MllamaForConditionalGeneration.from_pretrained(
     model_name,
     use_auth_token=HF_TOKEN,
-    torch_dtype=torch.bfloat16,
-    device_map="auto",
 )
 processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
 def predict(image, text):
     # Prepare the input messages
     messages = [
@@ -30,7 +46,7 @@ def predict(image, text):
     input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
     # Process the inputs and move to the appropriate device
-    inputs = processor(image, input_text, return_tensors="pt").to(model.device)
     # Generate a response from the model
     outputs = model.generate(**inputs, max_new_tokens=100)

 import torch
 from transformers import AutoProcessor, MllamaForConditionalGeneration
 from PIL import Image
+import spaces
+# Check if we're running in a Hugging Face Space and if SPACES_ZERO_GPU is enabled
+IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
+IS_SPACE = os.environ.get("SPACE_ID", None) is not None
+# Determine the device (GPU if available, else CPU)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
+print(f"Using device: {device}")
+print(f"Low memory mode: {LOW_MEMORY}")
+# Get Hugging Face token from environment variables
 HF_TOKEN = os.environ.get('HF_TOKEN')
 # Load the model and processor
 model = MllamaForConditionalGeneration.from_pretrained(
     model_name,
     use_auth_token=HF_TOKEN,
+    torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32,
+    device_map="auto" if device == "cuda" else None,  # Use device mapping if CUDA is available
 )
+# Move the model to the appropriate device (GPU if available)
+model.to(device)
 processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
+@spaces.GPU  # Use the free GPU provided by Hugging Face Spaces
 def predict(image, text):
     # Prepare the input messages
     messages = [
     input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
     # Process the inputs and move to the appropriate device
+    inputs = processor(image, input_text, return_tensors="pt").to(device)
     # Generate a response from the model
     outputs = model.generate(**inputs, max_new_tokens=100)