Spaces:

CamiloVega
/

aQuaBot

Sleeping

App Files Files Community

CamiloVega commited on Oct 30, 2024

Commit

83f5322

verified ·

1 Parent(s): 23734f7

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -25

app.py CHANGED Viewed

@@ -30,11 +30,6 @@ try:
     device = "cuda" if torch.cuda.is_available() else "cpu"
     logger.info(f"Using device: {device}")
-    # Configure PyTorch settings
-    if device == "cuda":
-        torch.backends.cuda.matmul.allow_tf32 = True
-        torch.backends.cudnn.allow_tf32 = True
     # Load tokenizer
     logger.info("Loading tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained(
@@ -45,16 +40,15 @@ try:
     tokenizer.pad_token = tokenizer.eos_token
     logger.info("Tokenizer loaded successfully")
-    # Load model with optimized configuration
     logger.info("Loading model...")
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16 if device == "cuda" else torch.float32,
         trust_remote_code=True,
         token=hf_token,
-        device_map="auto",
-        max_memory={0: "12GiB"} if device == "cuda" else None,
-        load_in_8bit=True if device == "cuda" else False
     )
     logger.info("Model loaded successfully")
@@ -64,12 +58,11 @@ try:
         "text-generation",
         model=model,
         tokenizer=tokenizer,
-        max_new_tokens=512,  # Increased for more detailed responses
         do_sample=True,
-        temperature=0.8,  # Slightly increased for more creative responses
-        top_p=0.95,  # Increased for more varied responses
-        top_k=50,  # Added top_k for better response quality
-        repetition_penalty=1.2,  # Increased to reduce repetition
         device_map="auto"
     )
     logger.info("Pipeline created successfully")
@@ -78,15 +71,13 @@ except Exception as e:
     logger.error(f"Error during initialization: {str(e)}")
     raise
-# Improved system message with better context and guidelines
 system_message = """You are AQuaBot, an AI assistant focused on providing accurate and environmentally conscious information. Your responses should be:
 1. Clear and concise yet informative
 2. Based on verified information when discussing economic and financial topics
 3. Balanced and well-reasoned
 4. Mindful of environmental impact
-5. Professional but conversational in tone
-Maintain a helpful and knowledgeable demeanor while avoiding speculation. If you're unsure about something, acknowledge it openly."""
 @spaces.GPU(duration=60)
 @torch.inference_mode()
@@ -99,7 +90,7 @@ def generate_response(user_input, chat_history):
         input_water_consumption = calculate_water_consumption(user_input, True)
         total_water_consumption += input_water_consumption
-        # Create a clean conversation history without [INST] tags
         conversation_history = ""
         if chat_history:
             for user_msg, assistant_msg in chat_history:
@@ -117,13 +108,9 @@ def generate_response(user_input, chat_history):
         )
         logger.info("Model response generated successfully")
-        # Clean up response and remove any remaining [INST] tags
         assistant_response = outputs[0]['generated_text'].strip()
         assistant_response = assistant_response.split('User:')[0].split('Assistant:')[-1].strip()
-        # Add fact-check disclaimer for economic/financial responses
-        if any(keyword in user_input.lower() for keyword in ['invest', 'money', 'salary', 'cost', 'wage', 'economy']):
-            assistant_response += "\n\nNote: Financial information provided should be verified with current market data and professional advisors."
         # Calculate water consumption for output
         output_water_consumption = calculate_water_consumption(assistant_response, False)
@@ -132,7 +119,7 @@ def generate_response(user_input, chat_history):
         # Update chat history
         chat_history.append([user_input, assistant_response])
-        # Prepare water consumption message with improved styling
         water_message = f"""
         <div style="position: fixed; top: 20px; right: 20px;
                     background-color: white; padding: 15px;
@@ -155,6 +142,7 @@ def generate_response(user_input, chat_history):
         chat_history.append([user_input, error_message])
         return chat_history, show_water
 # Constants for water consumption calculation
 WATER_PER_TOKEN = {
     "input_training": 0.0000309,

     device = "cuda" if torch.cuda.is_available() else "cpu"
     logger.info(f"Using device: {device}")
     # Load tokenizer
     logger.info("Loading tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained(
     tokenizer.pad_token = tokenizer.eos_token
     logger.info("Tokenizer loaded successfully")
+    # Load model with basic configuration
+    # Accelerate helps with automatic device mapping for large models
     logger.info("Loading model...")
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16 if device == "cuda" else torch.float32,
         trust_remote_code=True,
         token=hf_token,
+        device_map="auto"  # Accelerate maneja automáticamente la distribución del modelo
     )
     logger.info("Model loaded successfully")
         "text-generation",
         model=model,
         tokenizer=tokenizer,
+        max_new_tokens=512,
         do_sample=True,
+        temperature=0.8,
+        top_p=0.95,
+        repetition_penalty=1.2,
         device_map="auto"
     )
     logger.info("Pipeline created successfully")
     logger.error(f"Error during initialization: {str(e)}")
     raise
+# Improved system message
 system_message = """You are AQuaBot, an AI assistant focused on providing accurate and environmentally conscious information. Your responses should be:
 1. Clear and concise yet informative
 2. Based on verified information when discussing economic and financial topics
 3. Balanced and well-reasoned
 4. Mindful of environmental impact
+5. Professional but conversational in tone"""
 @spaces.GPU(duration=60)
 @torch.inference_mode()
         input_water_consumption = calculate_water_consumption(user_input, True)
         total_water_consumption += input_water_consumption
+        # Create a clean conversation history
         conversation_history = ""
         if chat_history:
             for user_msg, assistant_msg in chat_history:
         )
         logger.info("Model response generated successfully")
+        # Clean up response
         assistant_response = outputs[0]['generated_text'].strip()
         assistant_response = assistant_response.split('User:')[0].split('Assistant:')[-1].strip()
         # Calculate water consumption for output
         output_water_consumption = calculate_water_consumption(assistant_response, False)
         # Update chat history
         chat_history.append([user_input, assistant_response])
+        # Water consumption message
         water_message = f"""
         <div style="position: fixed; top: 20px; right: 20px;
                     background-color: white; padding: 15px;
         chat_history.append([user_input, error_message])
         return chat_history, show_water
 # Constants for water consumption calculation
 WATER_PER_TOKEN = {
     "input_training": 0.0000309,