Spaces:

nmarafo
/

Child-Safe-Chatbot

Runtime error

nmarafo commited on Oct 16, 2024

Commit

5155d78

verified ·

1 Parent(s): 3f907da

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,27 +7,8 @@ import os
 # Cargar el token de Hugging Face desde los secretos
 token = os.environ.get("HF_TOKEN")
-# IDs del modelo y el tokenizador
-model_id = "PrunaAI/google-shieldgemma-2b-bnb-4bit-smashed"
-tokenizer_id = "google/shieldgemma-2b"
-tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, token=token)
-# Configurar BitsAndBytes para cuantización en 4 bits
-quantization_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.bfloat16
-)
-# Cargar el modelo con la configuración de cuantización
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    quantization_config=quantization_config,
-    trust_remote_code=True,
-    device_map="auto",
-    token=token
-)
 # Función para generar el prompt dependiendo del idioma seleccionado
 def generar_prompt(message, tipo_clasificacion, idioma):
@@ -95,7 +76,7 @@ def generar_prompt(message, tipo_clasificacion, idioma):
 def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, language, harm_type):
     prompt = generar_prompt(message, harm_type, language)
-    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
     with torch.no_grad():
         logits = model(**inputs).logits

 # Cargar el token de Hugging Face desde los secretos
 token = os.environ.get("HF_TOKEN")
+model = AutoModelForCausalLM.from_pretrained("PrunaAI/google-shieldgemma-2b-bnb-4bit-smashed", trust_remote_code=True, device_map='auto')
+tokenizer = AutoTokenizer.from_pretrained("google/shieldgemma-2b")
 # Función para generar el prompt dependiendo del idioma seleccionado
 def generar_prompt(message, tipo_clasificacion, idioma):
 def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, language, harm_type):
     prompt = generar_prompt(message, harm_type, language)
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)["input_ids"]
     with torch.no_grad():
         logits = model(**inputs).logits