Spaces:

CamiloVega
/

aQuaBot

Sleeping

App Files Files Community

CamiloVega commited on Oct 30, 2024

Commit

1bf4ae2

verified ·

1 Parent(s): c9a896c

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -147

app.py CHANGED Viewed

@@ -3,61 +3,60 @@ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 import torch
 import logging
-import sys
 import os
 from accelerate import infer_auto_device_map, init_empty_weights
-# Configure logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
-# Get HuggingFace token from environment variable
 hf_token = os.environ.get('HUGGINGFACE_TOKEN')
 if not hf_token:
-    logger.error("HUGGINGFACE_TOKEN environment variable not set")
-    raise ValueError("Please set the HUGGINGFACE_TOKEN environment variable")
-# Define the model name
 model_name = "meta-llama/Llama-2-7b-hf"
 try:
-    logger.info("Starting model initialization...")
-    # Check CUDA availability
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    logger.info(f"Using device: {device}")
-    # Configure PyTorch settings
     if device == "cuda":
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
-    # Load tokenizer
-    logger.info("Loading tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained(
         model_name,
         trust_remote_code=True,
-        token=hf_token
     )
     tokenizer.pad_token = tokenizer.eos_token
-    logger.info("Tokenizer loaded successfully")
-    # Load model with basic configuration
-    logger.info("Loading model...")
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16 if device == "cuda" else torch.float32,
         trust_remote_code=True,
-        token=hf_token,
         device_map="auto"
     )
-    logger.info("Model loaded successfully")
-    # Create pipeline
-    logger.info("Creating generation pipeline...")
     model_gen = pipeline(
         "text-generation",
         model=model,
@@ -69,102 +68,22 @@ try:
         repetition_penalty=1.1,
         device_map="auto"
     )
-    logger.info("Pipeline created successfully")
 except Exception as e:
-    logger.error(f"Error during initialization: {str(e)}")
     raise
-# Configure system message
-system_message = """You are a helpful AI assistant called AQuaBot. You provide direct, clear, and detailed answers to questions while being aware of environmental impact. Keep your responses natural and informative, but concise. Always provide context and explanations with your answers. Respond directly to questions without using any special tags or markers."""
-@spaces.GPU(duration=60)
-@torch.inference_mode()
-def generate_response(user_input, chat_history):
-    try:
-        logger.info("Generating response for user input...")
-        global total_water_consumption
-        # Calculate water consumption for input
-        input_water_consumption = calculate_water_consumption(user_input, True)
-        total_water_consumption += input_water_consumption
-        # Create prompt with Llama 2 chat format
-        conversation_history = ""
-        if chat_history:
-            for message in chat_history:
-                # Remove any [INST] tags from the history
-                user_msg = message[0].replace("[INST]", "").replace("[/INST]", "").strip()
-                assistant_msg = message[1].replace("[INST]", "").replace("[/INST]", "").strip()
-                conversation_history += f"[INST] {user_msg} [/INST] {assistant_msg} "
-        prompt = f"<s>[INST] {system_message}\n\n{conversation_history}[INST] {user_input} [/INST]"
-        logger.info("Generating model response...")
-        outputs = model_gen(
-            prompt,
-            max_new_tokens=256,
-            return_full_text=False,
-            pad_token_id=tokenizer.eos_token_id,
-            do_sample=True,
-            temperature=0.7,
-            top_p=0.9,
-            repetition_penalty=1.1
-        )
-        logger.info("Model response generated successfully")
-        # Clean up the response by removing any [INST] tags and trimming
-        assistant_response = outputs[0]['generated_text'].strip()
-        assistant_response = assistant_response.replace("[INST]", "").replace("[/INST]", "").strip()
-        # If the response is too short, try to generate a more detailed one
-        if len(assistant_response.split()) < 10:
-            prompt += "\nPlease provide a more detailed answer with context and explanation."
-            outputs = model_gen(
-                prompt,
-                max_new_tokens=256,
-                return_full_text=False,
-                pad_token_id=tokenizer.eos_token_id,
-                do_sample=True,
-                temperature=0.7,
-                top_p=0.9,
-                repetition_penalty=1.1
-            )
-            assistant_response = outputs[0]['generated_text'].strip()
-            assistant_response = assistant_response.replace("[INST]", "").replace("[/INST]", "").strip()
-        # Calculate water consumption for output
-        output_water_consumption = calculate_water_consumption(assistant_response, False)
-        total_water_consumption += output_water_consumption
-        # Update chat history with the cleaned messages
-        chat_history.append([user_input, assistant_response])
-        # Prepare water consumption message
-        water_message = f"""
-        <div style="position: fixed; top: 20px; right: 20px;
-                    background-color: white; padding: 15px;
-                    border: 2px solid #ff0000; border-radius: 10px;
-                    box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
-            <div style="color: #ff0000; font-size: 24px; font-weight: bold;">
-                💧 {total_water_consumption:.4f} ml
-            </div>
-            <div style="color: #666; font-size: 14px;">
-                Water Consumed
-            </div>
-        </div>
-        """
-        return chat_history, water_message
-    except Exception as e:
-        logger.error(f"Error in generate_response: {str(e)}")
-        error_message = f"An error occurred: {str(e)}"
-        chat_history.append([user_input, error_message])
-        return chat_history, show_water
-# Constants for water consumption calculation
 WATER_PER_TOKEN = {
     "input_training": 0.0000309,
     "output_training": 0.0000309,
@@ -172,15 +91,15 @@ WATER_PER_TOKEN = {
     "output_inference": 0.05
 }
-# Initialize variables
 total_water_consumption = 0
 def calculate_tokens(text):
     try:
         return len(tokenizer.encode(text))
     except Exception as e:
-        logger.error(f"Error calculating tokens: {str(e)}")
-        return len(text.split()) + len(text) // 4  # Fallback to approximation
 def calculate_water_consumption(text, is_input=True):
     tokens = calculate_tokens(text)
@@ -195,40 +114,53 @@ def format_message(role, content):
 @torch.inference_mode()
 def generate_response(user_input, chat_history):
     try:
-        logger.info("Generating response for user input...")
         global total_water_consumption
-        # Calculate water consumption for input
         input_water_consumption = calculate_water_consumption(user_input, True)
         total_water_consumption += input_water_consumption
-        # Create prompt with Llama 2 chat format
         conversation_history = ""
         if chat_history:
             for message in chat_history:
-                conversation_history += f"[INST] {message[0]} [/INST] {message[1]} "
-        prompt = f"<s>[INST] {system_message}\n\n{conversation_history}[INST] {user_input} [/INST]"
-        logger.info("Generating model response...")
         outputs = model_gen(
             prompt,
             max_new_tokens=256,
             return_full_text=False,
             pad_token_id=tokenizer.eos_token_id,
         )
-        logger.info("Model response generated successfully")
         assistant_response = outputs[0]['generated_text'].strip()
-        # Calculate water consumption for output
         output_water_consumption = calculate_water_consumption(assistant_response, False)
         total_water_consumption += output_water_consumption
-        # Update chat history with the new formatted messages
         chat_history.append([user_input, assistant_response])
-        # Prepare water consumption message
         water_message = f"""
         <div style="position: fixed; top: 20px; right: 20px;
                     background-color: white; padding: 15px;
@@ -238,7 +170,7 @@ def generate_response(user_input, chat_history):
                 💧 {total_water_consumption:.4f} ml
             </div>
             <div style="color: #666; font-size: 14px;">
-                Water Consumed
             </div>
         </div>
         """
@@ -246,28 +178,28 @@ def generate_response(user_input, chat_history):
         return chat_history, water_message
     except Exception as e:
-        logger.error(f"Error in generate_response: {str(e)}")
-        error_message = f"An error occurred: {str(e)}"
         chat_history.append([user_input, error_message])
         return chat_history, show_water
-# Create Gradio interface
 try:
-    logger.info("Creating Gradio interface...")
     with gr.Blocks(css="div.gradio-container {background-color: #f0f2f6}") as demo:
         gr.HTML("""
             <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
                 <h1 style="color: #2d333a;">AQuaBot</h1>
                 <p style="color: #4a5568;">
-                    Welcome to AQuaBot - An AI assistant that helps raise awareness
-                    about water consumption in language models.
                 </p>
             </div>
         """)
         chatbot = gr.Chatbot()
         message = gr.Textbox(
-            placeholder="Type your message here...",
             show_label=False
         )
         show_water = gr.HTML(f"""
@@ -279,19 +211,19 @@ try:
                     💧 0.0000 ml
                 </div>
                 <div style="color: #666; font-size: 14px;">
-                    Water Consumed
                 </div>
             </div>
         """)
-        clear = gr.Button("Clear Chat")
-        # Add footer with citation and disclaimer
         gr.HTML("""
             <div style="text-align: center; max-width: 800px; margin: 20px auto; padding: 20px;
                         background-color: #f8f9fa; border-radius: 10px;">
                 <div style="margin-bottom: 15px;">
                     <p style="color: #666; font-size: 14px; font-style: italic;">
-                        Water consumption calculations are based on the study:<br>
                         Li, P. et al. (2023). Making AI Less Thirsty: Uncovering and Addressing the Secret Water
                         Footprint of AI Models. ArXiv Preprint,
                         <a href="https://arxiv.org/abs/2304.03271" target="_blank">https://arxiv.org/abs/2304.03271</a>
@@ -299,10 +231,9 @@ try:
                 </div>
                 <div style="border-top: 1px solid #ddd; padding-top: 15px;">
                     <p style="color: #666; font-size: 14px;">
-                        <strong>Important note:</strong> This application uses Meta Llama-2-7b model
-                        instead of GPT-3 for availability and cost reasons. However,
-                        the water consumption calculations per token (input/output) are based on the
-                        conclusions from the cited paper.
                     </p>
                 </div>
             </div>
@@ -311,7 +242,7 @@ try:
         def submit(user_input, chat_history):
             return generate_response(user_input, chat_history)
-        # Configure event handlers
         message.submit(submit, [message, chatbot], [chatbot, show_water])
         clear.click(
             lambda: ([], f"""
@@ -323,7 +254,7 @@ try:
                         💧 0.0000 ml
                     </div>
                     <div style="color: #666; font-size: 14px;">
-                        Water Consumed
                     </div>
                 </div>
             """),
@@ -331,12 +262,12 @@ try:
             [chatbot, show_water]
         )
-    logger.info("Gradio interface created successfully")
-    # Launch the application
-    logger.info("Launching application...")
     demo.launch()
 except Exception as e:
-    logger.error(f"Error in Gradio interface creation: {str(e)}")
     raise

 import gradio as gr
 import torch
 import logging
 import os
 from accelerate import infer_auto_device_map, init_empty_weights
+# Configurar el registro
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
+# Obtener el token de HuggingFace desde la variable de entorno
 hf_token = os.environ.get('HUGGINGFACE_TOKEN')
 if not hf_token:
+    logger.error("La variable de entorno HUGGINGFACE_TOKEN no está configurada")
+    raise ValueError("Por favor, configura la variable de entorno HUGGINGFACE_TOKEN")
+# Definir el nombre del modelo
 model_name = "meta-llama/Llama-2-7b-hf"
 try:
+    logger.info("Iniciando la inicialización del modelo...")
+    # Comprobar la disponibilidad de CUDA
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    logger.info(f"Usando el dispositivo: {device}")
+    # Configurar los ajustes de PyTorch
     if device == "cuda":
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
+    # Cargar el tokenizer
+    logger.info("Cargando el tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained(
         model_name,
         trust_remote_code=True,
+        use_auth_token=hf_token
     )
     tokenizer.pad_token = tokenizer.eos_token
+    logger.info("Tokenizer cargado exitosamente")
+    # Cargar el modelo con la configuración básica
+    logger.info("Cargando el modelo...")
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16 if device == "cuda" else torch.float32,
         trust_remote_code=True,
+        use_auth_token=hf_token,
         device_map="auto"
     )
+    logger.info("Modelo cargado exitosamente")
+    # Crear el pipeline
+    logger.info("Creando el pipeline de generación...")
     model_gen = pipeline(
         "text-generation",
         model=model,
         repetition_penalty=1.1,
         device_map="auto"
     )
+    logger.info("Pipeline creado exitosamente")
 except Exception as e:
+    logger.error(f"Error durante la inicialización: {str(e)}")
     raise
+# Configurar el mensaje del sistema
+system_message = (
+    "You are a helpful AI assistant called AQuaBot. "
+    "You provide direct, clear, and detailed answers to questions while being aware of environmental impact. "
+    "Keep your responses natural and informative, but concise. "
+    "Always provide context and explanations with your answers. "
+    "Respond directly to questions without using any special tags or markers."
+)
+# Constantes para el cálculo de consumo de agua
 WATER_PER_TOKEN = {
     "input_training": 0.0000309,
     "output_training": 0.0000309,
     "output_inference": 0.05
 }
+# Inicializar variables
 total_water_consumption = 0
 def calculate_tokens(text):
     try:
         return len(tokenizer.encode(text))
     except Exception as e:
+        logger.error(f"Error al calcular los tokens: {str(e)}")
+        return len(text.split()) + len(text) // 4  # Aproximación en caso de fallo
 def calculate_water_consumption(text, is_input=True):
     tokens = calculate_tokens(text)
 @torch.inference_mode()
 def generate_response(user_input, chat_history):
     try:
+        logger.info("Generando respuesta para la entrada del usuario...")
         global total_water_consumption
+        # Calcular el consumo de agua para la entrada
         input_water_consumption = calculate_water_consumption(user_input, True)
         total_water_consumption += input_water_consumption
+        # Crear el historial de conversación
         conversation_history = ""
         if chat_history:
             for message in chat_history:
+                user_msg = message[0].strip()
+                assistant_msg = message[1].strip()
+                conversation_history += f"[INST] {user_msg} [/INST] {assistant_msg} "
+        # Construir el prompt siguiendo el formato correcto
+        prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n{conversation_history}[INST] {user_input} [/INST]"
+        logger.info("Generando respuesta del modelo...")
         outputs = model_gen(
             prompt,
             max_new_tokens=256,
             return_full_text=False,
             pad_token_id=tokenizer.eos_token_id,
+            do_sample=True,
+            temperature=0.7,
+            top_p=0.9,
+            repetition_penalty=1.1
         )
+        logger.info("Respuesta del modelo generada exitosamente")
+        # Obtener la respuesta del asistente y limpiar etiquetas
         assistant_response = outputs[0]['generated_text'].strip()
+        # Limpiar las etiquetas [INST] y [/INST]
+        if '[INST]' in assistant_response:
+            assistant_response = assistant_response.split('[/INST]')[-1].strip()
+        assistant_response = assistant_response.replace("[INST]", "").replace("[/INST]", "").strip()
+        # Calcular el consumo de agua para la respuesta
         output_water_consumption = calculate_water_consumption(assistant_response, False)
         total_water_consumption += output_water_consumption
+        # Actualizar el historial de chat
         chat_history.append([user_input, assistant_response])
+        # Preparar el mensaje de consumo de agua
         water_message = f"""
         <div style="position: fixed; top: 20px; right: 20px;
                     background-color: white; padding: 15px;
                 💧 {total_water_consumption:.4f} ml
             </div>
             <div style="color: #666; font-size: 14px;">
+                Consumo de Agua
             </div>
         </div>
         """
         return chat_history, water_message
     except Exception as e:
+        logger.error(f"Error en generate_response: {str(e)}")
+        error_message = f"Ocurrió un error: {str(e)}"
         chat_history.append([user_input, error_message])
         return chat_history, show_water
+# Crear la interfaz de Gradio
 try:
+    logger.info("Creando la interfaz de Gradio...")
     with gr.Blocks(css="div.gradio-container {background-color: #f0f2f6}") as demo:
         gr.HTML("""
             <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
                 <h1 style="color: #2d333a;">AQuaBot</h1>
                 <p style="color: #4a5568;">
+                    Bienvenido a AQuaBot - Un asistente de IA que ayuda a concienciar
+                    sobre el consumo de agua en los modelos de lenguaje.
                 </p>
             </div>
         """)
         chatbot = gr.Chatbot()
         message = gr.Textbox(
+            placeholder="Escribe tu mensaje aquí...",
             show_label=False
         )
         show_water = gr.HTML(f"""
                     💧 0.0000 ml
                 </div>
                 <div style="color: #666; font-size: 14px;">
+                    Consumo de Agua
                 </div>
             </div>
         """)
+        clear = gr.Button("Limpiar Chat")
+        # Añadir pie de página con cita y descargo de responsabilidad
         gr.HTML("""
             <div style="text-align: center; max-width: 800px; margin: 20px auto; padding: 20px;
                         background-color: #f8f9fa; border-radius: 10px;">
                 <div style="margin-bottom: 15px;">
                     <p style="color: #666; font-size: 14px; font-style: italic;">
+                        Los cálculos de consumo de agua se basan en el estudio:<br>
                         Li, P. et al. (2023). Making AI Less Thirsty: Uncovering and Addressing the Secret Water
                         Footprint of AI Models. ArXiv Preprint,
                         <a href="https://arxiv.org/abs/2304.03271" target="_blank">https://arxiv.org/abs/2304.03271</a>
                 </div>
                 <div style="border-top: 1px solid #ddd; padding-top: 15px;">
                     <p style="color: #666; font-size: 14px;">
+                        <strong>Nota importante:</strong> Esta aplicación utiliza el modelo Llama 2 de Meta (7B parámetros).
+                        Los c��lculos de consumo de agua por token (entrada/salida) se basan en las
+                        conclusiones generales del artículo citado sobre modelos de lenguaje grandes.
                     </p>
                 </div>
             </div>
         def submit(user_input, chat_history):
             return generate_response(user_input, chat_history)
+        # Configurar los controladores de eventos
         message.submit(submit, [message, chatbot], [chatbot, show_water])
         clear.click(
             lambda: ([], f"""
                         💧 0.0000 ml
                     </div>
                     <div style="color: #666; font-size: 14px;">
+                        Consumo de Agua
                     </div>
                 </div>
             """),
             [chatbot, show_water]
         )
+    logger.info("Interfaz de Gradio creada exitosamente")
+    # Lanzar la aplicación
+    logger.info("Lanzando la aplicación...")
     demo.launch()
 except Exception as e:
+    logger.error(f"Error en la creación de la interfaz de Gradio: {str(e)}")
     raise