PMBeta / app.py
Sergidev's picture
Update app.py
de91fd4 verified
raw
history blame
2.25 kB
from fastapi import FastAPI, Request, BackgroundTasks
from fastapi.responses import HTMLResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from modules.pmbl import PMBL
import torch
import asyncio
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
print(f"CUDA device name: {torch.cuda.get_device_name(0)}")
app = FastAPI(docs_url=None, redoc_url=None)
app.mount("/static", StaticFiles(directory="static"), name="static")
app.mount("/templates", StaticFiles(directory="templates"), name="templates")
pmbl = PMBL("./PMB-7b.Q6_K.gguf", gpu_layers=50)
request_queue = asyncio.Queue()
processing_lock = asyncio.Lock()
@app.head("/")
@app.get("/")
def index() -> HTMLResponse:
with open("templates/index.html") as f:
return HTMLResponse(content=f.read())
async def process_request(user_input: str, mode: str):
async with processing_lock:
history = pmbl.get_chat_history(mode, user_input)
async for chunk in pmbl.generate_response(user_input, history, mode):
yield chunk
@app.post("/chat")
async def chat(request: Request, background_tasks: BackgroundTasks):
try:
data = await request.json()
user_input = data["user_input"]
mode = data["mode"]
async def response_generator():
await request_queue.put((user_input, mode))
async for chunk in await process_request(user_input, mode):
yield chunk
return StreamingResponse(response_generator(), media_type="text/plain")
except Exception as e:
print(f"[SYSTEM] Error: {str(e)}")
return {"error": str(e)}
async def queue_worker():
while True:
user_input, mode = await request_queue.get()
async for _ in process_request(user_input, mode):
pass
request_queue.task_done()
@app.on_event("startup")
async def startup_event():
asyncio.create_task(queue_worker())
@app.post("/sleep")
async def sleep():
try:
pmbl.sleep_mode()
return {"message": "Sleep mode completed successfully"}
except Exception as e:
print(f"[SYSTEM] Error: {str(e)}")
return {"error": str(e)}