Spaces:

CamiloVega
/

aQuaBot

Sleeping

App Files Files Community

CamiloVega commited on Oct 30, 2024

Commit

e308af0

verified ·

1 Parent(s): 45fe324

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -18

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)
 # Define the model name
-model_name = "microsoft/phi-2"
 try:
     logger.info("Starting model initialization...")
@@ -32,19 +32,22 @@ try:
     logger.info("Loading tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained(
         model_name,
-        trust_remote_code=True
     )
     logger.info("Tokenizer loaded successfully")
-    # Load model
     logger.info("Loading model...")
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-        trust_remote_code=True
     )
-    if device == "cuda":
-        model = model.to(device)
     logger.info("Model loaded successfully")
     # Create pipeline
@@ -58,7 +61,7 @@ try:
         temperature=0.7,
         top_p=0.9,
         repetition_penalty=1.1,
-        device=0 if device == "cuda" else -1
     )
     logger.info("Pipeline created successfully")
@@ -69,8 +72,9 @@ except Exception as e:
 # Configure system message
 system_message = """You are AQuaBot, an AI assistant aware of environmental impact.
 You help users with any topic while raising awareness about water consumption
-in AI. Did you know that training GPT-3 consumed 5.4 million liters of water,
-equivalent to the daily consumption of a city of 10,000 people?"""
 # Constants for water consumption calculation
 WATER_PER_TOKEN = {
@@ -110,13 +114,13 @@ def generate_response(user_input, chat_history):
         input_water_consumption = calculate_water_consumption(user_input, True)
         total_water_consumption += input_water_consumption
-        # Create prompt
         conversation_history = ""
         if chat_history:
             for message in chat_history:
-                conversation_history += f"User: {message[0]}\nAssistant: {message[1]}\n"
-        prompt = f"{system_message}\n\n{conversation_history}User: {user_input}\nAssistant:"
         logger.info("Generating model response...")
         outputs = model_gen(
@@ -167,8 +171,8 @@ try:
             <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
                 <h1 style="color: #2d333a;">AQuaBot</h1>
                 <p style="color: #4a5568;">
-                    Welcome to AQuaBot - An AI assistant that helps raise awareness about water
-                    consumption in language models.
                 </p>
             </div>
         """)
@@ -207,10 +211,9 @@ try:
                 </div>
                 <div style="border-top: 1px solid #ddd; padding-top: 15px;">
                     <p style="color: #666; font-size: 14px;">
-                        <strong>Important note:</strong> This application uses Microsoft's Phi-2 model
-                        instead of GPT-3 for availability and cost reasons. However,
-                        the water consumption calculations per token (input/output) are based on the
-                        conclusions from the cited paper.
                     </p>
                 </div>
             </div>

 logger = logging.getLogger(__name__)
 # Define the model name
+model_name = "meta-llama/Llama-2-7b-hf"
 try:
     logger.info("Starting model initialization...")
     logger.info("Loading tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained(
         model_name,
+        trust_remote_code=True,
+        token=True  # You'll need to set your HF token here
     )
+    tokenizer.pad_token = tokenizer.eos_token
     logger.info("Tokenizer loaded successfully")
+    # Load model with device map
     logger.info("Loading model...")
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+        trust_remote_code=True,
+        token=True,  # You'll need to set your HF token here
+        device_map="auto",
+        load_in_8bit=True
     )
     logger.info("Model loaded successfully")
     # Create pipeline
         temperature=0.7,
         top_p=0.9,
         repetition_penalty=1.1,
+        device_map="auto"
     )
     logger.info("Pipeline created successfully")
 # Configure system message
 system_message = """You are AQuaBot, an AI assistant aware of environmental impact.
 You help users with any topic while raising awareness about water consumption
+in AI. Did you know that training large language models like Llama 2 can consume
+substantial amounts of water due to the cooling requirements of data centers?
+Let's work together while being mindful of our environmental impact."""
 # Constants for water consumption calculation
 WATER_PER_TOKEN = {
         input_water_consumption = calculate_water_consumption(user_input, True)
         total_water_consumption += input_water_consumption
+        # Create prompt with Llama 2 chat format
         conversation_history = ""
         if chat_history:
             for message in chat_history:
+                conversation_history += f"[INST] {message[0]} [/INST] {message[1]} "
+        prompt = f"{system_message}\n\n{conversation_history}[INST] {user_input} [/INST]"
         logger.info("Generating model response...")
         outputs = model_gen(
             <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
                 <h1 style="color: #2d333a;">AQuaBot</h1>
                 <p style="color: #4a5568;">
+                    Welcome to AQuaBot - An AI assistant powered by Llama 2 that helps raise awareness
+                    about water consumption in language models.
                 </p>
             </div>
         """)
                 </div>
                 <div style="border-top: 1px solid #ddd; padding-top: 15px;">
                     <p style="color: #666; font-size: 14px;">
+                        <strong>Important note:</strong> This application uses Meta's Llama 2 (7B parameters) model.
+                        The water consumption calculations per token (input/output) are based on the
+                        general conclusions from the cited paper about large language models.
                     </p>
                 </div>
             </div>