Spaces:
Running
on
Zero
Running
on
Zero
Delete app.py
Browse files
app.py
DELETED
@@ -1,273 +0,0 @@
|
|
1 |
-
import spaces
|
2 |
-
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
|
3 |
-
import gradio as gr
|
4 |
-
import torch
|
5 |
-
import logging
|
6 |
-
import os
|
7 |
-
from accelerate import infer_auto_device_map, init_empty_weights
|
8 |
-
|
9 |
-
# Configurar el registro
|
10 |
-
logging.basicConfig(
|
11 |
-
level=logging.INFO,
|
12 |
-
format='%(asctime)s - %(levelname)s - %(message)s'
|
13 |
-
)
|
14 |
-
logger = logging.getLogger(__name__)
|
15 |
-
|
16 |
-
# Obtener el token de HuggingFace desde la variable de entorno
|
17 |
-
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
|
18 |
-
if not hf_token:
|
19 |
-
logger.error("La variable de entorno HUGGINGFACE_TOKEN no est谩 configurada")
|
20 |
-
raise ValueError("Por favor, configura la variable de entorno HUGGINGFACE_TOKEN")
|
21 |
-
|
22 |
-
# Definir el nombre del modelo
|
23 |
-
model_name = "meta-llama/Llama-2-7b-hf"
|
24 |
-
|
25 |
-
try:
|
26 |
-
logger.info("Iniciando la inicializaci贸n del modelo...")
|
27 |
-
|
28 |
-
# Comprobar la disponibilidad de CUDA
|
29 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
30 |
-
logger.info(f"Usando el dispositivo: {device}")
|
31 |
-
|
32 |
-
# Configurar los ajustes de PyTorch
|
33 |
-
if device == "cuda":
|
34 |
-
torch.backends.cuda.matmul.allow_tf32 = True
|
35 |
-
torch.backends.cudnn.allow_tf32 = True
|
36 |
-
|
37 |
-
# Cargar el tokenizer
|
38 |
-
logger.info("Cargando el tokenizer...")
|
39 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
40 |
-
model_name,
|
41 |
-
trust_remote_code=True,
|
42 |
-
use_auth_token=hf_token
|
43 |
-
)
|
44 |
-
tokenizer.pad_token = tokenizer.eos_token
|
45 |
-
logger.info("Tokenizer cargado exitosamente")
|
46 |
-
|
47 |
-
# Cargar el modelo con la configuraci贸n b谩sica
|
48 |
-
logger.info("Cargando el modelo...")
|
49 |
-
model = AutoModelForCausalLM.from_pretrained(
|
50 |
-
model_name,
|
51 |
-
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
52 |
-
trust_remote_code=True,
|
53 |
-
use_auth_token=hf_token,
|
54 |
-
device_map="auto"
|
55 |
-
)
|
56 |
-
logger.info("Modelo cargado exitosamente")
|
57 |
-
|
58 |
-
# Crear el pipeline
|
59 |
-
logger.info("Creando el pipeline de generaci贸n...")
|
60 |
-
model_gen = pipeline(
|
61 |
-
"text-generation",
|
62 |
-
model=model,
|
63 |
-
tokenizer=tokenizer,
|
64 |
-
max_new_tokens=256,
|
65 |
-
do_sample=True,
|
66 |
-
temperature=0.7,
|
67 |
-
top_p=0.9,
|
68 |
-
repetition_penalty=1.1,
|
69 |
-
device_map="auto"
|
70 |
-
)
|
71 |
-
logger.info("Pipeline creado exitosamente")
|
72 |
-
|
73 |
-
except Exception as e:
|
74 |
-
logger.error(f"Error durante la inicializaci贸n: {str(e)}")
|
75 |
-
raise
|
76 |
-
|
77 |
-
# Configurar el mensaje del sistema
|
78 |
-
system_message = (
|
79 |
-
"You are a helpful AI assistant called AQuaBot. "
|
80 |
-
"You provide direct, clear, and detailed answers to questions while being aware of environmental impact. "
|
81 |
-
"Keep your responses natural and informative, but concise. "
|
82 |
-
"Always provide context and explanations with your answers. "
|
83 |
-
"Respond directly to questions without using any special tags or markers."
|
84 |
-
)
|
85 |
-
|
86 |
-
# Constantes para el c谩lculo de consumo de agua
|
87 |
-
WATER_PER_TOKEN = {
|
88 |
-
"input_training": 0.0000309,
|
89 |
-
"output_training": 0.0000309,
|
90 |
-
"input_inference": 0.05,
|
91 |
-
"output_inference": 0.05
|
92 |
-
}
|
93 |
-
|
94 |
-
# Inicializar variables
|
95 |
-
total_water_consumption = 0
|
96 |
-
|
97 |
-
def calculate_tokens(text):
|
98 |
-
try:
|
99 |
-
return len(tokenizer.encode(text))
|
100 |
-
except Exception as e:
|
101 |
-
logger.error(f"Error al calcular los tokens: {str(e)}")
|
102 |
-
return len(text.split()) + len(text) // 4 # Aproximaci贸n en caso de fallo
|
103 |
-
|
104 |
-
def calculate_water_consumption(text, is_input=True):
|
105 |
-
tokens = calculate_tokens(text)
|
106 |
-
if is_input:
|
107 |
-
return tokens * (WATER_PER_TOKEN["input_training"] + WATER_PER_TOKEN["input_inference"])
|
108 |
-
return tokens * (WATER_PER_TOKEN["output_training"] + WATER_PER_TOKEN["output_inference"])
|
109 |
-
|
110 |
-
def format_message(role, content):
|
111 |
-
return {"role": role, "content": content}
|
112 |
-
|
113 |
-
@spaces.GPU(duration=60)
|
114 |
-
@torch.inference_mode()
|
115 |
-
def generate_response(user_input, chat_history):
|
116 |
-
try:
|
117 |
-
logger.info("Generando respuesta para la entrada del usuario...")
|
118 |
-
global total_water_consumption
|
119 |
-
|
120 |
-
# Calcular el consumo de agua para la entrada
|
121 |
-
input_water_consumption = calculate_water_consumption(user_input, True)
|
122 |
-
total_water_consumption += input_water_consumption
|
123 |
-
|
124 |
-
# Crear el historial de conversaci贸n
|
125 |
-
conversation_history = ""
|
126 |
-
if chat_history:
|
127 |
-
for message in chat_history:
|
128 |
-
user_msg = message[0].strip()
|
129 |
-
assistant_msg = message[1].strip()
|
130 |
-
conversation_history += f"[INST] {user_msg} [/INST] {assistant_msg} "
|
131 |
-
|
132 |
-
# Construir el prompt siguiendo el formato correcto
|
133 |
-
prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n{conversation_history}[INST] {user_input} [/INST]"
|
134 |
-
|
135 |
-
logger.info("Generando respuesta del modelo...")
|
136 |
-
outputs = model_gen(
|
137 |
-
prompt,
|
138 |
-
max_new_tokens=256,
|
139 |
-
return_full_text=False,
|
140 |
-
pad_token_id=tokenizer.eos_token_id,
|
141 |
-
do_sample=True,
|
142 |
-
temperature=0.7,
|
143 |
-
top_p=0.9,
|
144 |
-
repetition_penalty=1.1
|
145 |
-
)
|
146 |
-
logger.info("Respuesta del modelo generada exitosamente")
|
147 |
-
|
148 |
-
# Obtener la respuesta del asistente y limpiar etiquetas
|
149 |
-
assistant_response = outputs[0]['generated_text'].strip()
|
150 |
-
|
151 |
-
# Limpiar las etiquetas [INST] y [/INST]
|
152 |
-
if '[INST]' in assistant_response:
|
153 |
-
assistant_response = assistant_response.split('[/INST]')[-1].strip()
|
154 |
-
assistant_response = assistant_response.replace("[INST]", "").replace("[/INST]", "").strip()
|
155 |
-
|
156 |
-
# Calcular el consumo de agua para la respuesta
|
157 |
-
output_water_consumption = calculate_water_consumption(assistant_response, False)
|
158 |
-
total_water_consumption += output_water_consumption
|
159 |
-
|
160 |
-
# Actualizar el historial de chat
|
161 |
-
chat_history.append([user_input, assistant_response])
|
162 |
-
|
163 |
-
# Preparar el mensaje de consumo de agua
|
164 |
-
water_message = f"""
|
165 |
-
<div style="position: fixed; top: 20px; right: 20px;
|
166 |
-
background-color: white; padding: 15px;
|
167 |
-
border: 2px solid #ff0000; border-radius: 10px;
|
168 |
-
box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
169 |
-
<div style="color: #ff0000; font-size: 24px; font-weight: bold;">
|
170 |
-
馃挧 {total_water_consumption:.4f} ml
|
171 |
-
</div>
|
172 |
-
<div style="color: #666; font-size: 14px;">
|
173 |
-
Consumo de Agua
|
174 |
-
</div>
|
175 |
-
</div>
|
176 |
-
"""
|
177 |
-
|
178 |
-
return chat_history, water_message
|
179 |
-
|
180 |
-
except Exception as e:
|
181 |
-
logger.error(f"Error en generate_response: {str(e)}")
|
182 |
-
error_message = f"Ocurri贸 un error: {str(e)}"
|
183 |
-
chat_history.append([user_input, error_message])
|
184 |
-
return chat_history, show_water
|
185 |
-
|
186 |
-
# Crear la interfaz de Gradio
|
187 |
-
try:
|
188 |
-
logger.info("Creando la interfaz de Gradio...")
|
189 |
-
with gr.Blocks(css="div.gradio-container {background-color: #f0f2f6}") as demo:
|
190 |
-
gr.HTML("""
|
191 |
-
<div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 20px;">
|
192 |
-
<h1 style="color: #2d333a;">AQuaBot</h1>
|
193 |
-
<p style="color: #4a5568;">
|
194 |
-
Bienvenido a AQuaBot - Un asistente de IA que ayuda a concienciar
|
195 |
-
sobre el consumo de agua en los modelos de lenguaje.
|
196 |
-
</p>
|
197 |
-
</div>
|
198 |
-
""")
|
199 |
-
|
200 |
-
chatbot = gr.Chatbot()
|
201 |
-
message = gr.Textbox(
|
202 |
-
placeholder="Escribe tu mensaje aqu铆...",
|
203 |
-
show_label=False
|
204 |
-
)
|
205 |
-
show_water = gr.HTML(f"""
|
206 |
-
<div style="position: fixed; top: 20px; right: 20px;
|
207 |
-
background-color: white; padding: 15px;
|
208 |
-
border: 2px solid #ff0000; border-radius: 10px;
|
209 |
-
box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
210 |
-
<div style="color: #ff0000; font-size: 24px; font-weight: bold;">
|
211 |
-
馃挧 0.0000 ml
|
212 |
-
</div>
|
213 |
-
<div style="color: #666; font-size: 14px;">
|
214 |
-
Consumo de Agua
|
215 |
-
</div>
|
216 |
-
</div>
|
217 |
-
""")
|
218 |
-
clear = gr.Button("Limpiar Chat")
|
219 |
-
|
220 |
-
# A帽adir pie de p谩gina con cita y descargo de responsabilidad
|
221 |
-
gr.HTML("""
|
222 |
-
<div style="text-align: center; max-width: 800px; margin: 20px auto; padding: 20px;
|
223 |
-
background-color: #f8f9fa; border-radius: 10px;">
|
224 |
-
<div style="margin-bottom: 15px;">
|
225 |
-
<p style="color: #666; font-size: 14px; font-style: italic;">
|
226 |
-
Los c谩lculos de consumo de agua se basan en el estudio:<br>
|
227 |
-
Li, P. et al. (2023). Making AI Less Thirsty: Uncovering and Addressing the Secret Water
|
228 |
-
Footprint of AI Models. ArXiv Preprint,
|
229 |
-
<a href="https://arxiv.org/abs/2304.03271" target="_blank">https://arxiv.org/abs/2304.03271</a>
|
230 |
-
</p>
|
231 |
-
</div>
|
232 |
-
<div style="border-top: 1px solid #ddd; padding-top: 15px;">
|
233 |
-
<p style="color: #666; font-size: 14px;">
|
234 |
-
<strong>Nota importante:</strong> Esta aplicaci贸n utiliza el modelo Llama 2 de Meta (7B par谩metros).
|
235 |
-
Los c谩lculos de consumo de agua por token (entrada/salida) se basan en las
|
236 |
-
conclusiones generales del art铆culo citado sobre modelos de lenguaje grandes.
|
237 |
-
</p>
|
238 |
-
</div>
|
239 |
-
</div>
|
240 |
-
""")
|
241 |
-
|
242 |
-
def submit(user_input, chat_history):
|
243 |
-
return generate_response(user_input, chat_history)
|
244 |
-
|
245 |
-
# Configurar los controladores de eventos
|
246 |
-
message.submit(submit, [message, chatbot], [chatbot, show_water])
|
247 |
-
clear.click(
|
248 |
-
lambda: ([], f"""
|
249 |
-
<div style="position: fixed; top: 20px; right: 20px;
|
250 |
-
background-color: white; padding: 15px;
|
251 |
-
border: 2px solid #ff0000; border-radius: 10px;
|
252 |
-
box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
253 |
-
<div style="color: #ff0000; font-size: 24px; font-weight: bold;">
|
254 |
-
馃挧 0.0000 ml
|
255 |
-
</div>
|
256 |
-
<div style="color: #666; font-size: 14px;">
|
257 |
-
Consumo de Agua
|
258 |
-
</div>
|
259 |
-
</div>
|
260 |
-
"""),
|
261 |
-
None,
|
262 |
-
[chatbot, show_water]
|
263 |
-
)
|
264 |
-
|
265 |
-
logger.info("Interfaz de Gradio creada exitosamente")
|
266 |
-
|
267 |
-
# Lanzar la aplicaci贸n
|
268 |
-
logger.info("Lanzando la aplicaci贸n...")
|
269 |
-
demo.launch()
|
270 |
-
|
271 |
-
except Exception as e:
|
272 |
-
logger.error(f"Error en la creaci贸n de la interfaz de Gradio: {str(e)}")
|
273 |
-
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|