CamiloVega commited on
Commit
83f5322
verified
1 Parent(s): 23734f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -25
app.py CHANGED
@@ -30,11 +30,6 @@ try:
30
  device = "cuda" if torch.cuda.is_available() else "cpu"
31
  logger.info(f"Using device: {device}")
32
 
33
- # Configure PyTorch settings
34
- if device == "cuda":
35
- torch.backends.cuda.matmul.allow_tf32 = True
36
- torch.backends.cudnn.allow_tf32 = True
37
-
38
  # Load tokenizer
39
  logger.info("Loading tokenizer...")
40
  tokenizer = AutoTokenizer.from_pretrained(
@@ -45,16 +40,15 @@ try:
45
  tokenizer.pad_token = tokenizer.eos_token
46
  logger.info("Tokenizer loaded successfully")
47
 
48
- # Load model with optimized configuration
 
49
  logger.info("Loading model...")
50
  model = AutoModelForCausalLM.from_pretrained(
51
  model_name,
52
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
53
  trust_remote_code=True,
54
  token=hf_token,
55
- device_map="auto",
56
- max_memory={0: "12GiB"} if device == "cuda" else None,
57
- load_in_8bit=True if device == "cuda" else False
58
  )
59
  logger.info("Model loaded successfully")
60
 
@@ -64,12 +58,11 @@ try:
64
  "text-generation",
65
  model=model,
66
  tokenizer=tokenizer,
67
- max_new_tokens=512, # Increased for more detailed responses
68
  do_sample=True,
69
- temperature=0.8, # Slightly increased for more creative responses
70
- top_p=0.95, # Increased for more varied responses
71
- top_k=50, # Added top_k for better response quality
72
- repetition_penalty=1.2, # Increased to reduce repetition
73
  device_map="auto"
74
  )
75
  logger.info("Pipeline created successfully")
@@ -78,15 +71,13 @@ except Exception as e:
78
  logger.error(f"Error during initialization: {str(e)}")
79
  raise
80
 
81
- # Improved system message with better context and guidelines
82
  system_message = """You are AQuaBot, an AI assistant focused on providing accurate and environmentally conscious information. Your responses should be:
83
  1. Clear and concise yet informative
84
  2. Based on verified information when discussing economic and financial topics
85
  3. Balanced and well-reasoned
86
  4. Mindful of environmental impact
87
- 5. Professional but conversational in tone
88
-
89
- Maintain a helpful and knowledgeable demeanor while avoiding speculation. If you're unsure about something, acknowledge it openly."""
90
 
91
  @spaces.GPU(duration=60)
92
  @torch.inference_mode()
@@ -99,7 +90,7 @@ def generate_response(user_input, chat_history):
99
  input_water_consumption = calculate_water_consumption(user_input, True)
100
  total_water_consumption += input_water_consumption
101
 
102
- # Create a clean conversation history without [INST] tags
103
  conversation_history = ""
104
  if chat_history:
105
  for user_msg, assistant_msg in chat_history:
@@ -117,13 +108,9 @@ def generate_response(user_input, chat_history):
117
  )
118
  logger.info("Model response generated successfully")
119
 
120
- # Clean up response and remove any remaining [INST] tags
121
  assistant_response = outputs[0]['generated_text'].strip()
122
  assistant_response = assistant_response.split('User:')[0].split('Assistant:')[-1].strip()
123
-
124
- # Add fact-check disclaimer for economic/financial responses
125
- if any(keyword in user_input.lower() for keyword in ['invest', 'money', 'salary', 'cost', 'wage', 'economy']):
126
- assistant_response += "\n\nNote: Financial information provided should be verified with current market data and professional advisors."
127
 
128
  # Calculate water consumption for output
129
  output_water_consumption = calculate_water_consumption(assistant_response, False)
@@ -132,7 +119,7 @@ def generate_response(user_input, chat_history):
132
  # Update chat history
133
  chat_history.append([user_input, assistant_response])
134
 
135
- # Prepare water consumption message with improved styling
136
  water_message = f"""
137
  <div style="position: fixed; top: 20px; right: 20px;
138
  background-color: white; padding: 15px;
@@ -155,6 +142,7 @@ def generate_response(user_input, chat_history):
155
  chat_history.append([user_input, error_message])
156
  return chat_history, show_water
157
 
 
158
  # Constants for water consumption calculation
159
  WATER_PER_TOKEN = {
160
  "input_training": 0.0000309,
 
30
  device = "cuda" if torch.cuda.is_available() else "cpu"
31
  logger.info(f"Using device: {device}")
32
 
 
 
 
 
 
33
  # Load tokenizer
34
  logger.info("Loading tokenizer...")
35
  tokenizer = AutoTokenizer.from_pretrained(
 
40
  tokenizer.pad_token = tokenizer.eos_token
41
  logger.info("Tokenizer loaded successfully")
42
 
43
+ # Load model with basic configuration
44
+ # Accelerate helps with automatic device mapping for large models
45
  logger.info("Loading model...")
46
  model = AutoModelForCausalLM.from_pretrained(
47
  model_name,
48
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
49
  trust_remote_code=True,
50
  token=hf_token,
51
+ device_map="auto" # Accelerate maneja autom谩ticamente la distribuci贸n del modelo
 
 
52
  )
53
  logger.info("Model loaded successfully")
54
 
 
58
  "text-generation",
59
  model=model,
60
  tokenizer=tokenizer,
61
+ max_new_tokens=512,
62
  do_sample=True,
63
+ temperature=0.8,
64
+ top_p=0.95,
65
+ repetition_penalty=1.2,
 
66
  device_map="auto"
67
  )
68
  logger.info("Pipeline created successfully")
 
71
  logger.error(f"Error during initialization: {str(e)}")
72
  raise
73
 
74
+ # Improved system message
75
  system_message = """You are AQuaBot, an AI assistant focused on providing accurate and environmentally conscious information. Your responses should be:
76
  1. Clear and concise yet informative
77
  2. Based on verified information when discussing economic and financial topics
78
  3. Balanced and well-reasoned
79
  4. Mindful of environmental impact
80
+ 5. Professional but conversational in tone"""
 
 
81
 
82
  @spaces.GPU(duration=60)
83
  @torch.inference_mode()
 
90
  input_water_consumption = calculate_water_consumption(user_input, True)
91
  total_water_consumption += input_water_consumption
92
 
93
+ # Create a clean conversation history
94
  conversation_history = ""
95
  if chat_history:
96
  for user_msg, assistant_msg in chat_history:
 
108
  )
109
  logger.info("Model response generated successfully")
110
 
111
+ # Clean up response
112
  assistant_response = outputs[0]['generated_text'].strip()
113
  assistant_response = assistant_response.split('User:')[0].split('Assistant:')[-1].strip()
 
 
 
 
114
 
115
  # Calculate water consumption for output
116
  output_water_consumption = calculate_water_consumption(assistant_response, False)
 
119
  # Update chat history
120
  chat_history.append([user_input, assistant_response])
121
 
122
+ # Water consumption message
123
  water_message = f"""
124
  <div style="position: fixed; top: 20px; right: 20px;
125
  background-color: white; padding: 15px;
 
142
  chat_history.append([user_input, error_message])
143
  return chat_history, show_water
144
 
145
+
146
  # Constants for water consumption calculation
147
  WATER_PER_TOKEN = {
148
  "input_training": 0.0000309,