com201
Browse files
app.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
-
from fastapi import FastAPI, Request
|
2 |
from fastapi.responses import HTMLResponse, StreamingResponse
|
3 |
from fastapi.staticfiles import StaticFiles
|
4 |
from modules.pmbl import PMBL
|
5 |
import torch
|
|
|
|
|
6 |
|
7 |
print(f"CUDA available: {torch.cuda.is_available()}")
|
8 |
print(f"CUDA device count: {torch.cuda.device_count()}")
|
@@ -15,6 +17,7 @@ app.mount("/static", StaticFiles(directory="static"), name="static")
|
|
15 |
app.mount("/templates", StaticFiles(directory="templates"), name="templates")
|
16 |
|
17 |
pmbl = PMBL("./PMB-7b.Q6_K.gguf", gpu_layers=50)
|
|
|
18 |
|
19 |
@app.head("/")
|
20 |
@app.get("/")
|
@@ -22,15 +25,23 @@ def index() -> HTMLResponse:
|
|
22 |
with open("templates/index.html") as f:
|
23 |
return HTMLResponse(content=f.read())
|
24 |
|
|
|
|
|
|
|
|
|
|
|
25 |
@app.post("/chat")
|
26 |
-
async def chat(request: Request):
|
27 |
try:
|
28 |
data = await request.json()
|
29 |
user_input = data["user_input"]
|
30 |
mode = data["mode"]
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
34 |
except Exception as e:
|
35 |
print(f"[SYSTEM] Error: {str(e)}")
|
36 |
return {"error": str(e)}
|
|
|
1 |
+
from fastapi import FastAPI, Request, BackgroundTasks
|
2 |
from fastapi.responses import HTMLResponse, StreamingResponse
|
3 |
from fastapi.staticfiles import StaticFiles
|
4 |
from modules.pmbl import PMBL
|
5 |
import torch
|
6 |
+
from queue import Queue
|
7 |
+
import asyncio
|
8 |
|
9 |
print(f"CUDA available: {torch.cuda.is_available()}")
|
10 |
print(f"CUDA device count: {torch.cuda.device_count()}")
|
|
|
17 |
app.mount("/templates", StaticFiles(directory="templates"), name="templates")
|
18 |
|
19 |
pmbl = PMBL("./PMB-7b.Q6_K.gguf", gpu_layers=50)
|
20 |
+
request_queue = Queue()
|
21 |
|
22 |
@app.head("/")
|
23 |
@app.get("/")
|
|
|
25 |
with open("templates/index.html") as f:
|
26 |
return HTMLResponse(content=f.read())
|
27 |
|
28 |
+
async def process_request(user_input: str, mode: str):
|
29 |
+
history = pmbl.get_chat_history(mode, user_input)
|
30 |
+
async for chunk in pmbl.generate_response(user_input, history, mode):
|
31 |
+
yield chunk
|
32 |
+
|
33 |
@app.post("/chat")
|
34 |
+
async def chat(request: Request, background_tasks: BackgroundTasks):
|
35 |
try:
|
36 |
data = await request.json()
|
37 |
user_input = data["user_input"]
|
38 |
mode = data["mode"]
|
39 |
+
|
40 |
+
async def stream_response():
|
41 |
+
async for chunk in process_request(user_input, mode):
|
42 |
+
yield chunk
|
43 |
+
|
44 |
+
return StreamingResponse(stream_response(), media_type="text/plain")
|
45 |
except Exception as e:
|
46 |
print(f"[SYSTEM] Error: {str(e)}")
|
47 |
return {"error": str(e)}
|