Spaces:

CamiloVega
/

aQuaBot

Sleeping

App Files Files Community

CamiloVega commited on Oct 30, 2024

Commit

2b8a955

verified ·

1 Parent(s): 89bb689

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -9

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
-logger = logging.getLogger(__name__)
 # Get HuggingFace token from environment variable
 hf_token = os.environ.get('HUGGINGFACE_TOKEN')
@@ -94,9 +94,12 @@ def generate_response(user_input, chat_history):
         conversation_history = ""
         if chat_history:
             for message in chat_history:
-                conversation_history += f"{message[0]} {message[1]} "
-        prompt = f"[INST] {system_message}\n\n{conversation_history}{user_input}"
         logger.info("Generating model response...")
         outputs = model_gen(
@@ -104,16 +107,38 @@ def generate_response(user_input, chat_history):
             max_new_tokens=256,
             return_full_text=False,
             pad_token_id=tokenizer.eos_token_id,
         )
         logger.info("Model response generated successfully")
         assistant_response = outputs[0]['generated_text'].strip()
         # Calculate water consumption for output
         output_water_consumption = calculate_water_consumption(assistant_response, False)
         total_water_consumption += output_water_consumption
-        # Update chat history with the new formatted messages
         chat_history.append([user_input, assistant_response])
         # Prepare water consumption message
@@ -138,7 +163,6 @@ def generate_response(user_input, chat_history):
         error_message = f"An error occurred: {str(e)}"
         chat_history.append([user_input, error_message])
         return chat_history, show_water
 # Constants for water consumption calculation
 WATER_PER_TOKEN = {
@@ -275,10 +299,9 @@ try:
                 </div>
                 <div style="border-top: 1px solid #ddd; padding-top: 15px;">
                     <p style="color: #666; font-size: 14px;">
-                        <strong>Important note:</strong> This application uses Meta Llama-2-7b model
-                        instead of GPT-3 for availability and cost reasons. However,
-                        the water consumption calculations per token (input/output) are based on the
-                        conclusions from the cited paper.
                     </p>
                 </div>
             </div>

     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
+logger = logging.getLogger(_name_)
 # Get HuggingFace token from environment variable
 hf_token = os.environ.get('HUGGINGFACE_TOKEN')
         conversation_history = ""
         if chat_history:
             for message in chat_history:
+                # Remove any [INST] tags from the history
+                user_msg = message[0].replace("[INST]", "").replace("[/INST]", "").strip()
+                assistant_msg = message[1].replace("[INST]", "").replace("[/INST]", "").strip()
+                conversation_history += f"[INST] {user_msg} [/INST] {assistant_msg} "
+        prompt = f"<s>[INST] {system_message}\n\n{conversation_history}[INST] {user_input} [/INST]"
         logger.info("Generating model response...")
         outputs = model_gen(
             max_new_tokens=256,
             return_full_text=False,
             pad_token_id=tokenizer.eos_token_id,
+            do_sample=True,
+            temperature=0.7,
+            top_p=0.9,
+            repetition_penalty=1.1
         )
         logger.info("Model response generated successfully")
+        # Clean up the response by removing any [INST] tags and trimming
         assistant_response = outputs[0]['generated_text'].strip()
+        assistant_response = assistant_response.replace("[INST]", "").replace("[/INST]", "").strip()
+        # If the response is too short, try to generate a more detailed one
+        if len(assistant_response.split()) < 10:
+            prompt += "\nPlease provide a more detailed answer with context and explanation."
+            outputs = model_gen(
+                prompt,
+                max_new_tokens=256,
+                return_full_text=False,
+                pad_token_id=tokenizer.eos_token_id,
+                do_sample=True,
+                temperature=0.7,
+                top_p=0.9,
+                repetition_penalty=1.1
+            )
+            assistant_response = outputs[0]['generated_text'].strip()
+            assistant_response = assistant_response.replace("[INST]", "").replace("[/INST]", "").strip()
         # Calculate water consumption for output
         output_water_consumption = calculate_water_consumption(assistant_response, False)
         total_water_consumption += output_water_consumption
+        # Update chat history with the cleaned messages
         chat_history.append([user_input, assistant_response])
         # Prepare water consumption message
         error_message = f"An error occurred: {str(e)}"
         chat_history.append([user_input, error_message])
         return chat_history, show_water
 # Constants for water consumption calculation
 WATER_PER_TOKEN = {
                 </div>
                 <div style="border-top: 1px solid #ddd; padding-top: 15px;">
                     <p style="color: #666; font-size: 14px;">
+                        <strong>Important note:</strong> This application uses Meta's Llama 2 (7B parameters) model.
+                        The water consumption calculations per token (input/output) are based on the
+                        general conclusions from the cited paper about large language models.
                     </p>
                 </div>
             </div>