Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -1,21 +1,24 @@
|
|
|
|
1 |
from pydantic import BaseModel
|
2 |
-
from
|
3 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
4 |
import re
|
5 |
import httpx
|
6 |
import asyncio
|
7 |
import gradio as gr
|
8 |
import os
|
9 |
-
import gptcache
|
10 |
from dotenv import load_dotenv
|
11 |
from fastapi import FastAPI, Request
|
12 |
from fastapi.responses import JSONResponse
|
13 |
import uvicorn
|
14 |
from threading import Thread
|
|
|
15 |
|
16 |
load_dotenv()
|
17 |
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
18 |
|
|
|
|
|
19 |
global_data = {
|
20 |
'models': {},
|
21 |
'tokens': {
|
@@ -124,10 +127,10 @@ def remove_duplicates(text):
|
|
124 |
def cache_response(func):
|
125 |
def wrapper(*args, **kwargs):
|
126 |
cache_key = f"{args}-{kwargs}"
|
127 |
-
if
|
128 |
-
return
|
129 |
response = func(*args, **kwargs)
|
130 |
-
|
131 |
return response
|
132 |
return wrapper
|
133 |
|
@@ -155,13 +158,13 @@ async def process_message(message):
|
|
155 |
]
|
156 |
responses = [
|
157 |
{'model': model_name, 'response': future.result()}
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
|
166 |
app = FastAPI()
|
167 |
|
@@ -175,7 +178,7 @@ async def generate(request: ChatRequest):
|
|
175 |
|
176 |
def run_uvicorn():
|
177 |
try:
|
178 |
-
uvicorn.run(app, host="0.0.0.0", port=
|
179 |
except Exception as e:
|
180 |
print(f"Error al ejecutar uvicorn: {e}")
|
181 |
|
@@ -184,7 +187,7 @@ iface = gr.Interface(
|
|
184 |
inputs=gr.Textbox(lines=2, placeholder="Enter your message here..."),
|
185 |
outputs=gr.Markdown(),
|
186 |
title="Multi-Model LLM API (CPU Optimized)",
|
187 |
-
description="
|
188 |
)
|
189 |
|
190 |
def run_gradio():
|
@@ -193,4 +196,4 @@ def run_gradio():
|
|
193 |
if __name__ == "__main__":
|
194 |
Thread(target=run_uvicorn).start()
|
195 |
Thread(target=run_gradio).start()
|
196 |
-
asyncio.get_event_loop().run_forever()
|
|
|
1 |
+
import cachetools
|
2 |
from pydantic import BaseModel
|
3 |
+
from llama_cpp_agent import Llama
|
4 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
5 |
import re
|
6 |
import httpx
|
7 |
import asyncio
|
8 |
import gradio as gr
|
9 |
import os
|
|
|
10 |
from dotenv import load_dotenv
|
11 |
from fastapi import FastAPI, Request
|
12 |
from fastapi.responses import JSONResponse
|
13 |
import uvicorn
|
14 |
from threading import Thread
|
15 |
+
import gptcache
|
16 |
|
17 |
load_dotenv()
|
18 |
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
19 |
|
20 |
+
cache = cachetools.TTLCache(maxsize=100, ttl=60)
|
21 |
+
|
22 |
global_data = {
|
23 |
'models': {},
|
24 |
'tokens': {
|
|
|
127 |
def cache_response(func):
|
128 |
def wrapper(*args, **kwargs):
|
129 |
cache_key = f"{args}-{kwargs}"
|
130 |
+
if cache_key in cache:
|
131 |
+
return cache[cache_key]
|
132 |
response = func(*args, **kwargs)
|
133 |
+
cache[cache_key] = response
|
134 |
return response
|
135 |
return wrapper
|
136 |
|
|
|
158 |
]
|
159 |
responses = [
|
160 |
{'model': model_name, 'response': future.result()}
|
161 |
+
for model_name, future in zip(global_data['models'].keys(), as_completed(futures))
|
162 |
+
]
|
163 |
+
unique_responses = remove_repetitive_responses(responses)
|
164 |
+
formatted_response = ""
|
165 |
+
for model, response in unique_responses.items():
|
166 |
+
formatted_response += f"**{model}:**\n{response}\n\n"
|
167 |
+
return formatted_response
|
168 |
|
169 |
app = FastAPI()
|
170 |
|
|
|
178 |
|
179 |
def run_uvicorn():
|
180 |
try:
|
181 |
+
uvicorn.run(app, host="0.0.0.0", port=7861)
|
182 |
except Exception as e:
|
183 |
print(f"Error al ejecutar uvicorn: {e}")
|
184 |
|
|
|
187 |
inputs=gr.Textbox(lines=2, placeholder="Enter your message here..."),
|
188 |
outputs=gr.Markdown(),
|
189 |
title="Multi-Model LLM API (CPU Optimized)",
|
190 |
+
description=""
|
191 |
)
|
192 |
|
193 |
def run_gradio():
|
|
|
196 |
if __name__ == "__main__":
|
197 |
Thread(target=run_uvicorn).start()
|
198 |
Thread(target=run_gradio).start()
|
199 |
+
asyncio.get_event_loop().run_forever()
|