Spaces:

AIdeaText
/

TestOneLlama

Paused

App Files Files Community

AIdeaText commited on Nov 26, 2024

Commit

e2a67af

verified ·

1 Parent(s): 8844a4c

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -11

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import torch
 from huggingface_hub import login
 import os
 def setup_llama3_auth():
     """Configurar autenticación para Llama 3"""
     if 'HUGGING_FACE_TOKEN_3' in st.secrets:
@@ -17,14 +18,17 @@ def setup_llama3_auth():
 class Llama3Demo:
     def __init__(self):
-        # Verificar autenticación antes de cargar el modelo
         setup_llama3_auth()
-        # Usando el modelo de 3B con instrucciones
         self.model_name = "meta-llama/Llama-3.2-3B-Instruct"
         self._model = None
         self._tokenizer = None
     @property
     def model(self):
         if self._model is None:
@@ -33,12 +37,11 @@ class Llama3Demo:
                     self.model_name,
                     torch_dtype=torch.float16,
                     device_map="auto",
-                    load_in_8bit=True,  # Optimización de memoria
-                    use_auth_token=st.secrets['HUGGING_FACE_TOKEN_3']
                 )
             except Exception as e:
                 st.error(f"Error cargando el modelo: {str(e)}")
-                st.error("Verifica tu acceso a Llama 3.2 en https://huggingface.co/meta-llama")
                 raise e
         return self._model
@@ -48,21 +51,26 @@ class Llama3Demo:
             try:
                 self._tokenizer = AutoTokenizer.from_pretrained(
                     self.model_name,
-                    use_auth_token=st.secrets['HUGGING_FACE_TOKEN_3']
                 )
             except Exception as e:
                 st.error(f"Error cargando el tokenizer: {str(e)}")
                 raise e
         return self._tokenizer
     def generate_response(self, prompt: str, max_new_tokens: int = 512) -> str:
-        # Formato específico para Llama 3.2
         formatted_prompt = f"""<|system|>You are a helpful AI assistant.</s>
 <|user|>{prompt}</s>
 <|assistant|>"""
         inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.model.device)
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
@@ -70,16 +78,16 @@ class Llama3Demo:
                 num_return_sequences=1,
                 temperature=0.7,
                 do_sample=True,
-                top_p=0.9
             )
-            # Limpiar memoria GPU
             torch.cuda.empty_cache()
         response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extraer solo la respuesta del asistente
         return response.split("<|assistant|>")[-1].strip()
 def main():
     st.set_page_config(page_title="Llama 3.2 Chat", page_icon="🦙")

 from huggingface_hub import login
 import os
+##################################################################
 def setup_llama3_auth():
     """Configurar autenticación para Llama 3"""
     if 'HUGGING_FACE_TOKEN_3' in st.secrets:
 class Llama3Demo:
     def __init__(self):
         setup_llama3_auth()
         self.model_name = "meta-llama/Llama-3.2-3B-Instruct"
         self._model = None
         self._tokenizer = None
+        # Configuración de cuantización
+        self.quantization_config = BitsAndBytesConfig(
+            load_in_8bit=True,
+            bnb_4bit_compute_dtype=torch.float16
+        )
     @property
     def model(self):
         if self._model is None:
                     self.model_name,
                     torch_dtype=torch.float16,
                     device_map="auto",
+                    quantization_config=self.quantization_config,  # Nueva forma de configurar cuantización
+                    token=st.secrets['HUGGING_FACE_TOKEN_3']  # Actualizado de use_auth_token a token
                 )
             except Exception as e:
                 st.error(f"Error cargando el modelo: {str(e)}")
                 raise e
         return self._model
             try:
                 self._tokenizer = AutoTokenizer.from_pretrained(
                     self.model_name,
+                    token=st.secrets['HUGGING_FACE_TOKEN_3']  # Actualizado de use_auth_token a token
                 )
             except Exception as e:
                 st.error(f"Error cargando el tokenizer: {str(e)}")
                 raise e
         return self._tokenizer
+##################################################################
     def generate_response(self, prompt: str, max_new_tokens: int = 512) -> str:
         formatted_prompt = f"""<|system|>You are a helpful AI assistant.</s>
 <|user|>{prompt}</s>
 <|assistant|>"""
         inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.model.device)
+        # Asegurar que tenemos un pad_token_id válido
+        if self.tokenizer.pad_token_id is None:
+            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
         with torch.no_grad():
             outputs = self.model.generate(
                 **inputs,
                 num_return_sequences=1,
                 temperature=0.7,
                 do_sample=True,
+                top_p=0.9,
+                pad_token_id=self.tokenizer.pad_token_id  # Explícitamente establecer pad_token_id
             )
             torch.cuda.empty_cache()
         response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
         return response.split("<|assistant|>")[-1].strip()
+##################################################################
 def main():
     st.set_page_config(page_title="Llama 3.2 Chat", page_icon="🦙")