CamiloVega commited on
Commit
1bf4ae2
verified
1 Parent(s): c9a896c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -147
app.py CHANGED
@@ -3,61 +3,60 @@ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
3
  import gradio as gr
4
  import torch
5
  import logging
6
- import sys
7
  import os
8
  from accelerate import infer_auto_device_map, init_empty_weights
9
 
10
- # Configure logging
11
  logging.basicConfig(
12
  level=logging.INFO,
13
  format='%(asctime)s - %(levelname)s - %(message)s'
14
  )
15
  logger = logging.getLogger(__name__)
16
 
17
- # Get HuggingFace token from environment variable
18
  hf_token = os.environ.get('HUGGINGFACE_TOKEN')
19
  if not hf_token:
20
- logger.error("HUGGINGFACE_TOKEN environment variable not set")
21
- raise ValueError("Please set the HUGGINGFACE_TOKEN environment variable")
22
 
23
- # Define the model name
24
  model_name = "meta-llama/Llama-2-7b-hf"
25
 
26
  try:
27
- logger.info("Starting model initialization...")
28
 
29
- # Check CUDA availability
30
  device = "cuda" if torch.cuda.is_available() else "cpu"
31
- logger.info(f"Using device: {device}")
32
 
33
- # Configure PyTorch settings
34
  if device == "cuda":
35
  torch.backends.cuda.matmul.allow_tf32 = True
36
  torch.backends.cudnn.allow_tf32 = True
37
 
38
- # Load tokenizer
39
- logger.info("Loading tokenizer...")
40
  tokenizer = AutoTokenizer.from_pretrained(
41
  model_name,
42
  trust_remote_code=True,
43
- token=hf_token
44
  )
45
  tokenizer.pad_token = tokenizer.eos_token
46
- logger.info("Tokenizer loaded successfully")
47
 
48
- # Load model with basic configuration
49
- logger.info("Loading model...")
50
  model = AutoModelForCausalLM.from_pretrained(
51
  model_name,
52
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
53
  trust_remote_code=True,
54
- token=hf_token,
55
  device_map="auto"
56
  )
57
- logger.info("Model loaded successfully")
58
 
59
- # Create pipeline
60
- logger.info("Creating generation pipeline...")
61
  model_gen = pipeline(
62
  "text-generation",
63
  model=model,
@@ -69,102 +68,22 @@ try:
69
  repetition_penalty=1.1,
70
  device_map="auto"
71
  )
72
- logger.info("Pipeline created successfully")
73
 
74
  except Exception as e:
75
- logger.error(f"Error during initialization: {str(e)}")
76
  raise
77
 
78
- # Configure system message
79
-
80
- system_message = """You are a helpful AI assistant called AQuaBot. You provide direct, clear, and detailed answers to questions while being aware of environmental impact. Keep your responses natural and informative, but concise. Always provide context and explanations with your answers. Respond directly to questions without using any special tags or markers."""
81
-
82
- @spaces.GPU(duration=60)
83
- @torch.inference_mode()
84
- def generate_response(user_input, chat_history):
85
- try:
86
- logger.info("Generating response for user input...")
87
- global total_water_consumption
88
-
89
- # Calculate water consumption for input
90
- input_water_consumption = calculate_water_consumption(user_input, True)
91
- total_water_consumption += input_water_consumption
92
-
93
- # Create prompt with Llama 2 chat format
94
- conversation_history = ""
95
- if chat_history:
96
- for message in chat_history:
97
- # Remove any [INST] tags from the history
98
- user_msg = message[0].replace("[INST]", "").replace("[/INST]", "").strip()
99
- assistant_msg = message[1].replace("[INST]", "").replace("[/INST]", "").strip()
100
- conversation_history += f"[INST] {user_msg} [/INST] {assistant_msg} "
101
-
102
- prompt = f"<s>[INST] {system_message}\n\n{conversation_history}[INST] {user_input} [/INST]"
103
-
104
- logger.info("Generating model response...")
105
- outputs = model_gen(
106
- prompt,
107
- max_new_tokens=256,
108
- return_full_text=False,
109
- pad_token_id=tokenizer.eos_token_id,
110
- do_sample=True,
111
- temperature=0.7,
112
- top_p=0.9,
113
- repetition_penalty=1.1
114
- )
115
- logger.info("Model response generated successfully")
116
-
117
- # Clean up the response by removing any [INST] tags and trimming
118
- assistant_response = outputs[0]['generated_text'].strip()
119
- assistant_response = assistant_response.replace("[INST]", "").replace("[/INST]", "").strip()
120
-
121
- # If the response is too short, try to generate a more detailed one
122
- if len(assistant_response.split()) < 10:
123
- prompt += "\nPlease provide a more detailed answer with context and explanation."
124
- outputs = model_gen(
125
- prompt,
126
- max_new_tokens=256,
127
- return_full_text=False,
128
- pad_token_id=tokenizer.eos_token_id,
129
- do_sample=True,
130
- temperature=0.7,
131
- top_p=0.9,
132
- repetition_penalty=1.1
133
- )
134
- assistant_response = outputs[0]['generated_text'].strip()
135
- assistant_response = assistant_response.replace("[INST]", "").replace("[/INST]", "").strip()
136
-
137
- # Calculate water consumption for output
138
- output_water_consumption = calculate_water_consumption(assistant_response, False)
139
- total_water_consumption += output_water_consumption
140
-
141
- # Update chat history with the cleaned messages
142
- chat_history.append([user_input, assistant_response])
143
-
144
- # Prepare water consumption message
145
- water_message = f"""
146
- <div style="position: fixed; top: 20px; right: 20px;
147
- background-color: white; padding: 15px;
148
- border: 2px solid #ff0000; border-radius: 10px;
149
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
150
- <div style="color: #ff0000; font-size: 24px; font-weight: bold;">
151
- 馃挧 {total_water_consumption:.4f} ml
152
- </div>
153
- <div style="color: #666; font-size: 14px;">
154
- Water Consumed
155
- </div>
156
- </div>
157
- """
158
-
159
- return chat_history, water_message
160
-
161
- except Exception as e:
162
- logger.error(f"Error in generate_response: {str(e)}")
163
- error_message = f"An error occurred: {str(e)}"
164
- chat_history.append([user_input, error_message])
165
- return chat_history, show_water
166
 
167
- # Constants for water consumption calculation
168
  WATER_PER_TOKEN = {
169
  "input_training": 0.0000309,
170
  "output_training": 0.0000309,
@@ -172,15 +91,15 @@ WATER_PER_TOKEN = {
172
  "output_inference": 0.05
173
  }
174
 
175
- # Initialize variables
176
  total_water_consumption = 0
177
 
178
  def calculate_tokens(text):
179
  try:
180
  return len(tokenizer.encode(text))
181
  except Exception as e:
182
- logger.error(f"Error calculating tokens: {str(e)}")
183
- return len(text.split()) + len(text) // 4 # Fallback to approximation
184
 
185
  def calculate_water_consumption(text, is_input=True):
186
  tokens = calculate_tokens(text)
@@ -195,40 +114,53 @@ def format_message(role, content):
195
  @torch.inference_mode()
196
  def generate_response(user_input, chat_history):
197
  try:
198
- logger.info("Generating response for user input...")
199
  global total_water_consumption
200
 
201
- # Calculate water consumption for input
202
  input_water_consumption = calculate_water_consumption(user_input, True)
203
  total_water_consumption += input_water_consumption
204
 
205
- # Create prompt with Llama 2 chat format
206
  conversation_history = ""
207
  if chat_history:
208
  for message in chat_history:
209
- conversation_history += f"[INST] {message[0]} [/INST] {message[1]} "
210
-
211
- prompt = f"<s>[INST] {system_message}\n\n{conversation_history}[INST] {user_input} [/INST]"
212
 
213
- logger.info("Generating model response...")
 
 
 
214
  outputs = model_gen(
215
  prompt,
216
  max_new_tokens=256,
217
  return_full_text=False,
218
  pad_token_id=tokenizer.eos_token_id,
 
 
 
 
219
  )
220
- logger.info("Model response generated successfully")
221
 
 
222
  assistant_response = outputs[0]['generated_text'].strip()
223
 
224
- # Calculate water consumption for output
 
 
 
 
 
225
  output_water_consumption = calculate_water_consumption(assistant_response, False)
226
  total_water_consumption += output_water_consumption
227
 
228
- # Update chat history with the new formatted messages
229
  chat_history.append([user_input, assistant_response])
230
 
231
- # Prepare water consumption message
232
  water_message = f"""
233
  <div style="position: fixed; top: 20px; right: 20px;
234
  background-color: white; padding: 15px;
@@ -238,7 +170,7 @@ def generate_response(user_input, chat_history):
238
  馃挧 {total_water_consumption:.4f} ml
239
  </div>
240
  <div style="color: #666; font-size: 14px;">
241
- Water Consumed
242
  </div>
243
  </div>
244
  """
@@ -246,28 +178,28 @@ def generate_response(user_input, chat_history):
246
  return chat_history, water_message
247
 
248
  except Exception as e:
249
- logger.error(f"Error in generate_response: {str(e)}")
250
- error_message = f"An error occurred: {str(e)}"
251
  chat_history.append([user_input, error_message])
252
  return chat_history, show_water
253
 
254
- # Create Gradio interface
255
  try:
256
- logger.info("Creating Gradio interface...")
257
  with gr.Blocks(css="div.gradio-container {background-color: #f0f2f6}") as demo:
258
  gr.HTML("""
259
  <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
260
  <h1 style="color: #2d333a;">AQuaBot</h1>
261
  <p style="color: #4a5568;">
262
- Welcome to AQuaBot - An AI assistant that helps raise awareness
263
- about water consumption in language models.
264
  </p>
265
  </div>
266
  """)
267
 
268
  chatbot = gr.Chatbot()
269
  message = gr.Textbox(
270
- placeholder="Type your message here...",
271
  show_label=False
272
  )
273
  show_water = gr.HTML(f"""
@@ -279,19 +211,19 @@ try:
279
  馃挧 0.0000 ml
280
  </div>
281
  <div style="color: #666; font-size: 14px;">
282
- Water Consumed
283
  </div>
284
  </div>
285
  """)
286
- clear = gr.Button("Clear Chat")
287
 
288
- # Add footer with citation and disclaimer
289
  gr.HTML("""
290
  <div style="text-align: center; max-width: 800px; margin: 20px auto; padding: 20px;
291
  background-color: #f8f9fa; border-radius: 10px;">
292
  <div style="margin-bottom: 15px;">
293
  <p style="color: #666; font-size: 14px; font-style: italic;">
294
- Water consumption calculations are based on the study:<br>
295
  Li, P. et al. (2023). Making AI Less Thirsty: Uncovering and Addressing the Secret Water
296
  Footprint of AI Models. ArXiv Preprint,
297
  <a href="https://arxiv.org/abs/2304.03271" target="_blank">https://arxiv.org/abs/2304.03271</a>
@@ -299,10 +231,9 @@ try:
299
  </div>
300
  <div style="border-top: 1px solid #ddd; padding-top: 15px;">
301
  <p style="color: #666; font-size: 14px;">
302
- <strong>Important note:</strong> This application uses Meta Llama-2-7b model
303
- instead of GPT-3 for availability and cost reasons. However,
304
- the water consumption calculations per token (input/output) are based on the
305
- conclusions from the cited paper.
306
  </p>
307
  </div>
308
  </div>
@@ -311,7 +242,7 @@ try:
311
  def submit(user_input, chat_history):
312
  return generate_response(user_input, chat_history)
313
 
314
- # Configure event handlers
315
  message.submit(submit, [message, chatbot], [chatbot, show_water])
316
  clear.click(
317
  lambda: ([], f"""
@@ -323,7 +254,7 @@ try:
323
  馃挧 0.0000 ml
324
  </div>
325
  <div style="color: #666; font-size: 14px;">
326
- Water Consumed
327
  </div>
328
  </div>
329
  """),
@@ -331,12 +262,12 @@ try:
331
  [chatbot, show_water]
332
  )
333
 
334
- logger.info("Gradio interface created successfully")
335
 
336
- # Launch the application
337
- logger.info("Launching application...")
338
  demo.launch()
339
 
340
  except Exception as e:
341
- logger.error(f"Error in Gradio interface creation: {str(e)}")
342
  raise
 
3
  import gradio as gr
4
  import torch
5
  import logging
 
6
  import os
7
  from accelerate import infer_auto_device_map, init_empty_weights
8
 
9
+ # Configurar el registro
10
  logging.basicConfig(
11
  level=logging.INFO,
12
  format='%(asctime)s - %(levelname)s - %(message)s'
13
  )
14
  logger = logging.getLogger(__name__)
15
 
16
+ # Obtener el token de HuggingFace desde la variable de entorno
17
  hf_token = os.environ.get('HUGGINGFACE_TOKEN')
18
  if not hf_token:
19
+ logger.error("La variable de entorno HUGGINGFACE_TOKEN no est谩 configurada")
20
+ raise ValueError("Por favor, configura la variable de entorno HUGGINGFACE_TOKEN")
21
 
22
+ # Definir el nombre del modelo
23
  model_name = "meta-llama/Llama-2-7b-hf"
24
 
25
  try:
26
+ logger.info("Iniciando la inicializaci贸n del modelo...")
27
 
28
+ # Comprobar la disponibilidad de CUDA
29
  device = "cuda" if torch.cuda.is_available() else "cpu"
30
+ logger.info(f"Usando el dispositivo: {device}")
31
 
32
+ # Configurar los ajustes de PyTorch
33
  if device == "cuda":
34
  torch.backends.cuda.matmul.allow_tf32 = True
35
  torch.backends.cudnn.allow_tf32 = True
36
 
37
+ # Cargar el tokenizer
38
+ logger.info("Cargando el tokenizer...")
39
  tokenizer = AutoTokenizer.from_pretrained(
40
  model_name,
41
  trust_remote_code=True,
42
+ use_auth_token=hf_token
43
  )
44
  tokenizer.pad_token = tokenizer.eos_token
45
+ logger.info("Tokenizer cargado exitosamente")
46
 
47
+ # Cargar el modelo con la configuraci贸n b谩sica
48
+ logger.info("Cargando el modelo...")
49
  model = AutoModelForCausalLM.from_pretrained(
50
  model_name,
51
  torch_dtype=torch.float16 if device == "cuda" else torch.float32,
52
  trust_remote_code=True,
53
+ use_auth_token=hf_token,
54
  device_map="auto"
55
  )
56
+ logger.info("Modelo cargado exitosamente")
57
 
58
+ # Crear el pipeline
59
+ logger.info("Creando el pipeline de generaci贸n...")
60
  model_gen = pipeline(
61
  "text-generation",
62
  model=model,
 
68
  repetition_penalty=1.1,
69
  device_map="auto"
70
  )
71
+ logger.info("Pipeline creado exitosamente")
72
 
73
  except Exception as e:
74
+ logger.error(f"Error durante la inicializaci贸n: {str(e)}")
75
  raise
76
 
77
+ # Configurar el mensaje del sistema
78
+ system_message = (
79
+ "You are a helpful AI assistant called AQuaBot. "
80
+ "You provide direct, clear, and detailed answers to questions while being aware of environmental impact. "
81
+ "Keep your responses natural and informative, but concise. "
82
+ "Always provide context and explanations with your answers. "
83
+ "Respond directly to questions without using any special tags or markers."
84
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ # Constantes para el c谩lculo de consumo de agua
87
  WATER_PER_TOKEN = {
88
  "input_training": 0.0000309,
89
  "output_training": 0.0000309,
 
91
  "output_inference": 0.05
92
  }
93
 
94
+ # Inicializar variables
95
  total_water_consumption = 0
96
 
97
  def calculate_tokens(text):
98
  try:
99
  return len(tokenizer.encode(text))
100
  except Exception as e:
101
+ logger.error(f"Error al calcular los tokens: {str(e)}")
102
+ return len(text.split()) + len(text) // 4 # Aproximaci贸n en caso de fallo
103
 
104
  def calculate_water_consumption(text, is_input=True):
105
  tokens = calculate_tokens(text)
 
114
  @torch.inference_mode()
115
  def generate_response(user_input, chat_history):
116
  try:
117
+ logger.info("Generando respuesta para la entrada del usuario...")
118
  global total_water_consumption
119
 
120
+ # Calcular el consumo de agua para la entrada
121
  input_water_consumption = calculate_water_consumption(user_input, True)
122
  total_water_consumption += input_water_consumption
123
 
124
+ # Crear el historial de conversaci贸n
125
  conversation_history = ""
126
  if chat_history:
127
  for message in chat_history:
128
+ user_msg = message[0].strip()
129
+ assistant_msg = message[1].strip()
130
+ conversation_history += f"[INST] {user_msg} [/INST] {assistant_msg} "
131
 
132
+ # Construir el prompt siguiendo el formato correcto
133
+ prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n{conversation_history}[INST] {user_input} [/INST]"
134
+
135
+ logger.info("Generando respuesta del modelo...")
136
  outputs = model_gen(
137
  prompt,
138
  max_new_tokens=256,
139
  return_full_text=False,
140
  pad_token_id=tokenizer.eos_token_id,
141
+ do_sample=True,
142
+ temperature=0.7,
143
+ top_p=0.9,
144
+ repetition_penalty=1.1
145
  )
146
+ logger.info("Respuesta del modelo generada exitosamente")
147
 
148
+ # Obtener la respuesta del asistente y limpiar etiquetas
149
  assistant_response = outputs[0]['generated_text'].strip()
150
 
151
+ # Limpiar las etiquetas [INST] y [/INST]
152
+ if '[INST]' in assistant_response:
153
+ assistant_response = assistant_response.split('[/INST]')[-1].strip()
154
+ assistant_response = assistant_response.replace("[INST]", "").replace("[/INST]", "").strip()
155
+
156
+ # Calcular el consumo de agua para la respuesta
157
  output_water_consumption = calculate_water_consumption(assistant_response, False)
158
  total_water_consumption += output_water_consumption
159
 
160
+ # Actualizar el historial de chat
161
  chat_history.append([user_input, assistant_response])
162
 
163
+ # Preparar el mensaje de consumo de agua
164
  water_message = f"""
165
  <div style="position: fixed; top: 20px; right: 20px;
166
  background-color: white; padding: 15px;
 
170
  馃挧 {total_water_consumption:.4f} ml
171
  </div>
172
  <div style="color: #666; font-size: 14px;">
173
+ Consumo de Agua
174
  </div>
175
  </div>
176
  """
 
178
  return chat_history, water_message
179
 
180
  except Exception as e:
181
+ logger.error(f"Error en generate_response: {str(e)}")
182
+ error_message = f"Ocurri贸 un error: {str(e)}"
183
  chat_history.append([user_input, error_message])
184
  return chat_history, show_water
185
 
186
+ # Crear la interfaz de Gradio
187
  try:
188
+ logger.info("Creando la interfaz de Gradio...")
189
  with gr.Blocks(css="div.gradio-container {background-color: #f0f2f6}") as demo:
190
  gr.HTML("""
191
  <div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
192
  <h1 style="color: #2d333a;">AQuaBot</h1>
193
  <p style="color: #4a5568;">
194
+ Bienvenido a AQuaBot - Un asistente de IA que ayuda a concienciar
195
+ sobre el consumo de agua en los modelos de lenguaje.
196
  </p>
197
  </div>
198
  """)
199
 
200
  chatbot = gr.Chatbot()
201
  message = gr.Textbox(
202
+ placeholder="Escribe tu mensaje aqu铆...",
203
  show_label=False
204
  )
205
  show_water = gr.HTML(f"""
 
211
  馃挧 0.0000 ml
212
  </div>
213
  <div style="color: #666; font-size: 14px;">
214
+ Consumo de Agua
215
  </div>
216
  </div>
217
  """)
218
+ clear = gr.Button("Limpiar Chat")
219
 
220
+ # A帽adir pie de p谩gina con cita y descargo de responsabilidad
221
  gr.HTML("""
222
  <div style="text-align: center; max-width: 800px; margin: 20px auto; padding: 20px;
223
  background-color: #f8f9fa; border-radius: 10px;">
224
  <div style="margin-bottom: 15px;">
225
  <p style="color: #666; font-size: 14px; font-style: italic;">
226
+ Los c谩lculos de consumo de agua se basan en el estudio:<br>
227
  Li, P. et al. (2023). Making AI Less Thirsty: Uncovering and Addressing the Secret Water
228
  Footprint of AI Models. ArXiv Preprint,
229
  <a href="https://arxiv.org/abs/2304.03271" target="_blank">https://arxiv.org/abs/2304.03271</a>
 
231
  </div>
232
  <div style="border-top: 1px solid #ddd; padding-top: 15px;">
233
  <p style="color: #666; font-size: 14px;">
234
+ <strong>Nota importante:</strong> Esta aplicaci贸n utiliza el modelo Llama 2 de Meta (7B par谩metros).
235
+ Los c锟斤拷lculos de consumo de agua por token (entrada/salida) se basan en las
236
+ conclusiones generales del art铆culo citado sobre modelos de lenguaje grandes.
 
237
  </p>
238
  </div>
239
  </div>
 
242
  def submit(user_input, chat_history):
243
  return generate_response(user_input, chat_history)
244
 
245
+ # Configurar los controladores de eventos
246
  message.submit(submit, [message, chatbot], [chatbot, show_water])
247
  clear.click(
248
  lambda: ([], f"""
 
254
  馃挧 0.0000 ml
255
  </div>
256
  <div style="color: #666; font-size: 14px;">
257
+ Consumo de Agua
258
  </div>
259
  </div>
260
  """),
 
262
  [chatbot, show_water]
263
  )
264
 
265
+ logger.info("Interfaz de Gradio creada exitosamente")
266
 
267
+ # Lanzar la aplicaci贸n
268
+ logger.info("Lanzando la aplicaci贸n...")
269
  demo.launch()
270
 
271
  except Exception as e:
272
+ logger.error(f"Error en la creaci贸n de la interfaz de Gradio: {str(e)}")
273
  raise