Spaces:

Maximofn
/

SmolLM2_backend_LocalModel

Build error

Maximofn commited on Mar 3

Commit

b756054

1 Parent(s): 56ffcf9

Improve model loading with device-specific configuration and error handling

- Add try-except block for robust model loading
- Implement separate loading strategies for CUDA and CPU devices
- Include low CPU memory usage option for CUDA
- Add informative print statements for device and loading status
- Enhance error handling during model initialization

Files changed (1) hide show

app.py +24 -9

app.py CHANGED Viewed

@@ -16,15 +16,30 @@ print("Cargando modelo y tokenizer...")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
-# Load the model in BF16 format for better performance and lower memory usage
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype=torch.bfloat16,
-    device_map="auto"  # This will automatically distribute the model across available GPUs
-)
-print(f"Modelo cargado en dispositivo: {device}")
 # Define the function that calls the model
 def call_model(state: MessagesState):

 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
+try:
+    # Load the model in BF16 format for better performance and lower memory usage
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    if device == "cuda":
+        print("Usando GPU para el modelo...")
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.bfloat16,
+            device_map="auto",
+            low_cpu_mem_usage=True
+        )
+    else:
+        print("Usando CPU para el modelo...")
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            device_map={"": device},
+            torch_dtype=torch.float32
+        )
+    print(f"Modelo cargado exitosamente en: {device}")
+except Exception as e:
+    print(f"Error al cargar el modelo: {str(e)}")
+    raise
 # Define the function that calls the model
 def call_model(state: MessagesState):