from fastapi import FastAPI, Request, BackgroundTasks
from fastapi.responses import HTMLResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from modules.pmbl import PMBL
import torch
import asyncio

print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device count: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"CUDA device name: {torch.cuda.get_device_name(0)}")

app = FastAPI(docs_url=None, redoc_url=None)

app.mount("/static", StaticFiles(directory="static"), name="static")
app.mount("/templates", StaticFiles(directory="templates"), name="templates")

pmbl = PMBL("./PMB-7b.Q6_K.gguf", gpu_layers=50)
request_queue = asyncio.Queue()
processing_lock = asyncio.Lock()

@app.head("/")
@app.get("/")
def index() -> HTMLResponse:
    with open("templates/index.html") as f:
        return HTMLResponse(content=f.read())

async def process_request(user_input: str, mode: str):
    async with processing_lock:
        history = pmbl.get_chat_history(mode, user_input)
        async for chunk in pmbl.generate_response(user_input, history, mode):
            yield chunk

@app.post("/chat")
async def chat(request: Request, background_tasks: BackgroundTasks):
    try:
        data = await request.json()
        user_input = data["user_input"]
        mode = data["mode"]

        async def response_generator():
            await request_queue.put((user_input, mode))
            async for chunk in await process_request(user_input, mode):
                yield chunk

        return StreamingResponse(response_generator(), media_type="text/plain")
    except Exception as e:
        print(f"[SYSTEM] Error: {str(e)}")
        return {"error": str(e)}

async def queue_worker():
    while True:
        user_input, mode = await request_queue.get()
        async for _ in process_request(user_input, mode):
            pass
        request_queue.task_done()

@app.on_event("startup")
async def startup_event():
    asyncio.create_task(queue_worker())

@app.post("/sleep")
async def sleep():
    try:
        pmbl.sleep_mode()
        return {"message": "Sleep mode completed successfully"}
    except Exception as e:
        print(f"[SYSTEM] Error: {str(e)}")
        return {"error": str(e)}