asasasText-servicekdjdjjd

Sleeping

App Files Files Community

Yhhxhfh commited on Sep 29, 2024

Commit

05c34a8

verified ·

1 Parent(s): ae5e30e

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -13

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from pydantic import BaseModel
 from llama_cpp import Llama
-from concurrent.futures import ThreadPoolExecutor
 import re
 import gradio as gr
 import os
@@ -9,6 +9,8 @@ from functools import lru_cache
 from dotenv import load_dotenv
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
@@ -35,21 +37,43 @@ global_data = {
 }
 response_cache = {}
 class ModelManager:
-    def __init__(self):
         self.models = {}
     def load_model(self, model_config):
         model_name = model_config['name']
         if model_name not in self.models:
             try:
-                self.models[model_name] = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
-            except Exception:
                 self.models[model_name] = None
     def unload_model(self, model_name):
         if model_name in self.models and self.models[model_name] is not None:
             del self.models[model_name]
 model_manager = ModelManager()
@@ -86,19 +110,21 @@ async def process_message(message):
         return response_cache[inputs]
     responses = {}
-    for config in global_data['model_configs']:
-        model_name = config['name']
-        model_manager.load_model(config)
-        model = model_manager.models.get(model_name)
-        if model:
-            responses[model_name] = generate_model_response(model, inputs)
-        model_manager.unload_model(model_name) #Unload immediately after use
     formatted_response = "\n\n".join([f"**{model}:**\n{response}" for model, response in responses.items()])
     response_cache[inputs] = formatted_response
     return formatted_response
 @app.post("/generate_multimodel")
 async def api_generate_multimodel(request: Request):
     try:

 from pydantic import BaseModel
 from llama_cpp import Llama
+from concurrent.futures import ThreadPoolExecutor, as_completed
 import re
 import gradio as gr
 import os
 from dotenv import load_dotenv
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
+from queue import Queue
+import pickle  #Para persistencia
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 }
 response_cache = {}
+model_cache_dir = "model_cache" # Directorio para guardar modelos en disco
+os.makedirs(model_cache_dir, exist_ok=True)
 class ModelManager:
+    def __init__(self, max_models=10):
         self.models = {}
+        self.max_models = max_models
+        self.model_cache_dir = model_cache_dir
     def load_model(self, model_config):
         model_name = model_config['name']
+        cache_file = os.path.join(self.model_cache_dir, f"{model_name}.pkl")
         if model_name not in self.models:
             try:
+                if os.path.exists(cache_file):
+                    with open(cache_file, "rb") as f:
+                        self.models[model_name] = pickle.load(f)
+                        print(f"Modelo {model_name} cargado desde caché.")
+                else:
+                    self.models[model_name] = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
+                    with open(cache_file, "wb") as f:
+                        pickle.dump(self.models[model_name], f)
+                    print(f"Modelo {model_name} cargado y guardado en caché.")
+            except Exception as e:
+                print(f"Error al cargar el modelo {model_name}: {e}")
                 self.models[model_name] = None
+    def get_model(self, model_name):
+        return self.models.get(model_name)
     def unload_model(self, model_name):
         if model_name in self.models and self.models[model_name] is not None:
+            cache_file = os.path.join(self.model_cache_dir, f"{model_name}.pkl")
+            with open(cache_file, "wb") as f:
+                pickle.dump(self.models[model_name], f)
             del self.models[model_name]
+            print(f"Modelo {model_name} descargado y guardado en caché.")
 model_manager = ModelManager()
         return response_cache[inputs]
     responses = {}
+    with ThreadPoolExecutor(max_workers=model_manager.max_models) as executor:
+        futures = [executor.submit(model_manager.load_model, config) for config in global_data['model_configs']]
+        for future in as_completed(futures):
+            future.result()
+        for config in global_data['model_configs']:
+            model = model_manager.get_model(config['name'])
+            if model:
+                responses[config['name']] = generate_model_response(model, inputs)
+                model_manager.unload_model(config['name'])
     formatted_response = "\n\n".join([f"**{model}:**\n{response}" for model, response in responses.items()])
     response_cache[inputs] = formatted_response
     return formatted_response
 @app.post("/generate_multimodel")
 async def api_generate_multimodel(request: Request):
     try: