Spaces:

CamiloVega
/

aQuaBot

Running on Zero

App Files Files Community

CamiloVega commited on Oct 29, 2024

Commit

a9cb5eb

verified ·

1 Parent(s): 935cc40

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -65

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 import spaces
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
@@ -6,11 +5,6 @@ import torch
 import logging
 import sys
 from accelerate import infer_auto_device_map, init_empty_weights
-from huggingface_hub import login
-from dotenv import load_dotenv
-# Load environment variables
-load_dotenv()
 # Configure logging
 logging.basicConfig(
@@ -19,22 +13,8 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
-# Get HuggingFace token from environment variable
-hf_token = os.getenv('HUGGINGFACE_TOKEN')
-if not hf_token:
-    logger.error("HUGGINGFACE_TOKEN environment variable not found")
-    raise ValueError("Please set the HUGGINGFACE_TOKEN environment variable")
-# Login to Hugging Face
-try:
-    login(token=hf_token)
-    logger.info("Successfully logged in to Hugging Face")
-except Exception as e:
-    logger.error(f"Failed to login to Hugging Face: {str(e)}")
-    raise
 # Define the model name
-model_name = "meta-llama/Llama-2-7b-hf"
 try:
     logger.info("Starting model initialization...")
@@ -56,14 +36,13 @@ try:
     )
     logger.info("Tokenizer loaded successfully")
-    # Load model with 8-bit quantization
     logger.info("Loading model...")
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-        trust_remote_code=True,
-        load_in_8bit=True,
-        device_map="auto"
     )
     logger.info("Model loaded successfully")
@@ -87,14 +66,13 @@ except Exception as e:
     raise
 # Configure system message
-system_message = """You are AQuaBot, an AI assistant aware of environmental impact.
-You help users with any topic while raising awareness about water consumption
-in AI. Did you know that training GPT-3 consumed 5.4 million liters of water,
-equivalent to the daily consumption of a city of 10,000 people?"""
-# Llama 2 specific tokens
-B_INST, E_INST = "[INST]", "[/INST]"
-B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
 # Constants for water consumption calculation
 WATER_PER_TOKEN = {
@@ -105,6 +83,7 @@ WATER_PER_TOKEN = {
 }
 # Initialize variables
 total_water_consumption = 0
 def calculate_tokens(text):
@@ -120,33 +99,30 @@ def calculate_water_consumption(text, is_input=True):
         return tokens * (WATER_PER_TOKEN["input_training"] + WATER_PER_TOKEN["input_inference"])
     return tokens * (WATER_PER_TOKEN["output_training"] + WATER_PER_TOKEN["output_inference"])
-def format_prompt(user_input, chat_history):
-    """
-    Format the prompt according to Llama 2 specific style
-    """
-    prompt = f"{B_INST}{B_SYS}{system_message}{E_SYS}"
-    if chat_history:
-        for user_msg, assistant_msg in chat_history:
-            prompt += f"{user_msg}{E_INST}{assistant_msg}{B_INST}"
-    prompt += f"{user_input}{E_INST}"
-    return prompt
 @spaces.GPU(duration=60)
 @torch.inference_mode()
 def generate_response(user_input, chat_history):
     try:
         logger.info("Generating response for user input...")
-        global total_water_consumption
         # Calculate water consumption for input
         input_water_consumption = calculate_water_consumption(user_input, True)
         total_water_consumption += input_water_consumption
-        # Format prompt for Llama 2
-        prompt = format_prompt(user_input, chat_history)
         logger.info("Generating model response...")
         outputs = model_gen(
@@ -163,8 +139,11 @@ def generate_response(user_input, chat_history):
         output_water_consumption = calculate_water_consumption(assistant_response, False)
         total_water_consumption += output_water_consumption
         # Update chat history
-        chat_history.append([user_input, assistant_response])
         # Prepare water consumption message
         water_message = f"""
@@ -186,7 +165,7 @@ def generate_response(user_input, chat_history):
     except Exception as e:
         logger.error(f"Error in generate_response: {str(e)}")
         error_message = f"An error occurred: {str(e)}"
-        chat_history.append([user_input, error_message])
         return chat_history, show_water
 # Create Gradio interface
@@ -197,13 +176,13 @@ try:
             <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
                 <h1 style="color: #2d333a;">AQuaBot</h1>
                 <p style="color: #4a5568;">
-                    Welcome to AQuaBot - An AI assistant powered by Llama 2 that helps raise awareness
-                    about water consumption in language models.
                 </p>
             </div>
         """)
-        chatbot = gr.Chatbot()
         message = gr.Textbox(
             placeholder="Type your message here...",
             show_label=False
@@ -223,7 +202,7 @@ try:
         """)
         clear = gr.Button("Clear Chat")
-        # Add footer with citation, disclaimer, and credits
         gr.HTML("""
             <div style="text-align: center; max-width: 800px; margin: 20px auto; padding: 20px;
                         background-color: #f8f9fa; border-radius: 10px;">
@@ -237,15 +216,10 @@ try:
                 </div>
                 <div style="border-top: 1px solid #ddd; padding-top: 15px;">
                     <p style="color: #666; font-size: 14px;">
-                        <strong>Model Information:</strong> This application uses Meta's Llama 2 (7B) model,
-                        a state-of-the-art language model fine-tuned for chat interactions. Water consumption
-                        calculations are based on the methodology from the cited paper.
-                    </p>
-                </div>
-                <div style="border-top: 1px solid #ddd; margin-top: 15px; padding-top: 15px;">
-                    <p style="color: #666; font-size: 14px;">
-                        Created by Camilo Vega - AI Consultant<br>
-                        <a href="https://github.com/vegadevs/aquabot" target="_blank">GitHub Repository</a>
                     </p>
                 </div>
             </div>

 import spaces
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 import logging
 import sys
 from accelerate import infer_auto_device_map, init_empty_weights
 # Configure logging
 logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
 # Define the model name
+model_name = "microsoft/phi-2"
 try:
     logger.info("Starting model initialization...")
     )
     logger.info("Tokenizer loaded successfully")
+    # Load model
     logger.info("Loading model...")
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+        device_map="auto",
+        trust_remote_code=True
     )
     logger.info("Model loaded successfully")
     raise
 # Configure system message
+system_message = {
+    "role": "system",
+    "content": """You are AQuaBot, an AI assistant aware of environmental impact.
+    You help users with any topic while raising awareness about water consumption
+    in AI. Did you know that training GPT-3 consumed 5.4 million liters of water,
+    equivalent to the daily consumption of a city of 10,000 people?"""
+}
 # Constants for water consumption calculation
 WATER_PER_TOKEN = {
 }
 # Initialize variables
+messages = [system_message]
 total_water_consumption = 0
 def calculate_tokens(text):
         return tokens * (WATER_PER_TOKEN["input_training"] + WATER_PER_TOKEN["input_inference"])
     return tokens * (WATER_PER_TOKEN["output_training"] + WATER_PER_TOKEN["output_inference"])
 @spaces.GPU(duration=60)
 @torch.inference_mode()
 def generate_response(user_input, chat_history):
     try:
         logger.info("Generating response for user input...")
+        global total_water_consumption, messages
         # Calculate water consumption for input
         input_water_consumption = calculate_water_consumption(user_input, True)
         total_water_consumption += input_water_consumption
+        # Add user input to messages
+        messages.append({"role": "user", "content": user_input})
+        # Create prompt
+        prompt = ""
+        for m in messages:
+            if m["role"] == "system":
+                prompt += f"<START SYSTEM MESSAGE>\n{m['content']}\n<END SYSTEM MESSAGE>\n\n"
+            elif m["role"] == "user":
+                prompt += f"User: {m['content']}\n"
+            else:
+                prompt += f"Assistant: {m['content']}\n"
+        prompt += "Assistant:"
         logger.info("Generating model response...")
         outputs = model_gen(
         output_water_consumption = calculate_water_consumption(assistant_response, False)
         total_water_consumption += output_water_consumption
+        # Add assistant's response to messages
+        messages.append({"role": "assistant", "content": assistant_response})
         # Update chat history
+        chat_history.append((user_input, assistant_response))
         # Prepare water consumption message
         water_message = f"""
     except Exception as e:
         logger.error(f"Error in generate_response: {str(e)}")
         error_message = f"An error occurred: {str(e)}"
+        chat_history.append((user_input, error_message))
         return chat_history, show_water
 # Create Gradio interface
             <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
                 <h1 style="color: #2d333a;">AQuaBot</h1>
                 <p style="color: #4a5568;">
+                    Welcome to AQuaBot - An AI assistant that helps raise awareness about water
+                    consumption in language models.
                 </p>
             </div>
         """)
+        chatbot = gr.Chatbot(type="messages")
         message = gr.Textbox(
             placeholder="Type your message here...",
             show_label=False
         """)
         clear = gr.Button("Clear Chat")
+        # Add footer with citation and disclaimer
         gr.HTML("""
             <div style="text-align: center; max-width: 800px; margin: 20px auto; padding: 20px;
                         background-color: #f8f9fa; border-radius: 10px;">
                 </div>
                 <div style="border-top: 1px solid #ddd; padding-top: 15px;">
                     <p style="color: #666; font-size: 14px;">
+                        <strong>Important note:</strong> This application uses Microsoft's Phi-2 model
+                        instead of GPT-3 for availability and cost reasons. However,
+                        the water consumption calculations per token (input/output) are based on the
+                        conclusions from the cited paper.
                     </p>
                 </div>
             </div>