Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import re
|
|
5 |
import httpx
|
6 |
from spaces import GPU
|
7 |
import asyncio
|
|
|
8 |
|
9 |
global_data = {
|
10 |
'models': {},
|
@@ -90,39 +91,6 @@ def generate_model_response(model, inputs):
|
|
90 |
print(f"Error generating model response: {e}")
|
91 |
return ""
|
92 |
|
93 |
-
async def handle_request(request):
|
94 |
-
if request.method == "POST" and request.url.path == "/generate":
|
95 |
-
try:
|
96 |
-
chat_request = ChatRequest(**request.json())
|
97 |
-
inputs = normalize_input(chat_request.message)
|
98 |
-
with ThreadPoolExecutor() as executor:
|
99 |
-
futures = [
|
100 |
-
executor.submit(generate_model_response, model, inputs)
|
101 |
-
for model in global_data['models'].values()
|
102 |
-
]
|
103 |
-
responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(global_data['models'].keys(), as_completed(futures))]
|
104 |
-
unique_responses = remove_repetitive_responses(responses)
|
105 |
-
return httpx.Response(status_code=200, json=unique_responses)
|
106 |
-
except Exception as e:
|
107 |
-
print(f"Error handling request: {e}")
|
108 |
-
return httpx.Response(status_code=500, json={"error": f"Error handling request: {e}"})
|
109 |
-
|
110 |
-
else:
|
111 |
-
return httpx.Response(status_code=404, text="Not Found")
|
112 |
-
|
113 |
-
|
114 |
-
async def run_server(port: int):
|
115 |
-
async def serve_request(request: httpx.Request) -> httpx.Response:
|
116 |
-
return await handle_request(request)
|
117 |
-
|
118 |
-
from uvicorn.config import Config
|
119 |
-
from uvicorn.main import Server
|
120 |
-
|
121 |
-
config = Config(app=serve_request, host="127.0.0.1", port=port, log_level="info")
|
122 |
-
server = Server(config=config)
|
123 |
-
|
124 |
-
await server.serve()
|
125 |
-
|
126 |
def remove_repetitive_responses(responses):
|
127 |
unique_responses = {}
|
128 |
for response in responses:
|
@@ -130,6 +98,33 @@ def remove_repetitive_responses(responses):
|
|
130 |
unique_responses[response['model']] = response['response']
|
131 |
return unique_responses
|
132 |
|
133 |
-
|
134 |
-
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
import httpx
|
6 |
from spaces import GPU
|
7 |
import asyncio
|
8 |
+
import gradio as gr
|
9 |
|
10 |
global_data = {
|
11 |
'models': {},
|
|
|
91 |
print(f"Error generating model response: {e}")
|
92 |
return ""
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
def remove_repetitive_responses(responses):
|
95 |
unique_responses = {}
|
96 |
for response in responses:
|
|
|
98 |
unique_responses[response['model']] = response['response']
|
99 |
return unique_responses
|
100 |
|
101 |
+
async def process_message(message):
|
102 |
+
inputs = normalize_input(message)
|
103 |
+
with ThreadPoolExecutor() as executor:
|
104 |
+
futures = [
|
105 |
+
executor.submit(generate_model_response, model, inputs)
|
106 |
+
for model in global_data['models'].values()
|
107 |
+
]
|
108 |
+
responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(global_data['models'].keys(), as_completed(futures))]
|
109 |
+
unique_responses = remove_repetitive_responses(responses)
|
110 |
+
formatted_response = ""
|
111 |
+
for model, response in unique_responses.items():
|
112 |
+
formatted_response += f"**{model}:**\n{response}\n\n"
|
113 |
+
|
114 |
+
curl_command = f"""
|
115 |
+
curl -X POST -H "Content-Type: application/json" \\
|
116 |
+
-d '{{"message": "{message}"}}' \\
|
117 |
+
http://localhost:7860/generate
|
118 |
+
"""
|
119 |
+
return formatted_response, curl_command
|
120 |
+
|
121 |
+
|
122 |
+
iface = gr.Interface(
|
123 |
+
fn=process_message,
|
124 |
+
inputs=gr.Textbox(lines=2, placeholder="Enter your message here..."),
|
125 |
+
outputs=[gr.Markdown(), gr.Textbox(label="cURL command")],
|
126 |
+
title="Multi-Model LLM API",
|
127 |
+
description="Enter a message and get responses from multiple LLMs.",
|
128 |
+
)
|
129 |
+
|
130 |
+
iface.launch(server_port=7860)
|