Uhhy commited on
Commit
6f52053
verified
1 Parent(s): 0b58d90

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -48
app.py CHANGED
@@ -13,7 +13,6 @@ import numpy as np
13
  from functools import lru_cache
14
  from cachetools import TTLCache
15
  from multiprocessing import cpu_count
16
- import threading
17
  import queue
18
 
19
  # Configuraci贸n de logging para suprimir mensajes de depuraci贸n innecesarios
@@ -194,53 +193,11 @@ def pre_load_models():
194
  pre_load_models()
195
 
196
  # Optimizaci贸n de la carga de modelos en lotes
197
- def optimize_model_loading():
198
- # Implementar carga de modelos en lotes con manejo eficiente de recursos
199
- batch_size = min(len(model_configs), cpu_count() * 2)
200
- for i in range(0, len(model_configs), batch_size):
201
- batch_configs = model_configs[i:i + batch_size]
202
- with ThreadPoolExecutor(max_workers=batch_size) as executor:
203
- futures = [executor.submit(model_manager.load_model, config) for config in batch_configs]
204
- for future in tqdm(as_completed(futures), total=len(batch_configs), desc="Optimizando carga de modelos", unit="modelo"):
205
- try:
206
- model = future.result()
207
- global_data['models'][batch_configs[futures.index(future)]['name']] = model
208
- except Exception as e:
209
- logging.error(f"Error al optimizar la carga del modelo: {e}")
210
-
211
- optimize_model_loading()
212
-
213
- # Implementar t茅cnicas de paralelizaci贸n en la generaci贸n de respuestas
214
- def parallelize_response_generation(request: ChatRequest):
215
- response_queue = queue.Queue()
216
- with ThreadPoolExecutor(max_workers=min(len(global_data['models']), cpu_count())) as executor:
217
- futures = [executor.submit(worker_function, model_name, request, response_queue) for model_name in global_data['models']]
218
- for future in tqdm(as_completed(futures), total=len(futures), desc="Generando respuestas en paralelo", unit="modelo"):
219
- future.result()
220
-
221
- responses = []
222
- while not response_queue.empty():
223
- responses.append(response_queue.get())
224
- return responses
225
-
226
- @app.post("/generate_chat_parallel")
227
- async def generate_chat_parallel(request: ChatRequest):
228
- if not request.message.strip():
229
- raise HTTPException(status_code=400, detail="The message cannot be empty.")
230
-
231
- responses = parallelize_response_generation(request)
232
- best_response = select_best_response(responses)
233
-
234
- return {
235
- "best_response": best_response,
236
- "all_responses": responses
237
- }
238
-
239
- # Optimizar el uso de memoria
240
- def optimize_memory_usage():
241
- import gc
242
- gc.collect()
243
 
244
- # Ejecutar el servidor FastAPI
245
  if __name__ == "__main__":
246
  uvicorn.run(app, host="0.0.0.0", port=8000)
 
13
  from functools import lru_cache
14
  from cachetools import TTLCache
15
  from multiprocessing import cpu_count
 
16
  import queue
17
 
18
  # Configuraci贸n de logging para suprimir mensajes de depuraci贸n innecesarios
 
193
  pre_load_models()
194
 
195
  # Optimizaci贸n de la carga de modelos en lotes
196
+ def batch_load_models(model_configs):
197
+ for i in range(0, len(model_configs), cpu_count()):
198
+ batch = model_configs[i:i + cpu_count()]
199
+ for config in batch:
200
+ model_manager.load_model(config)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
 
202
  if __name__ == "__main__":
203
  uvicorn.run(app, host="0.0.0.0", port=8000)