Spaces:
Runtime error
Runtime error
File size: 1,613 Bytes
9dd4f46 874a573 9dd4f46 874a573 9dd4f46 874a573 9dd4f46 874a573 9dd4f46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse
import gradio as gr
app = FastAPI()
llm = gr.Llama(model_path="model.gguf", n_ctx=4000, n_threads=2, chat_format="chatml")
@app.post("/api/v1/chat")
async def chat_post(request: Request):
data = await request.json()
message = data.get("message")
history = data.get("history", [])
temperature = data.get("temperature", 0.3)
max_tokens = data.get("max_tokens", 512)
async def generate():
system_prompt = "You are OpenChat, a useful AI assistant."
formatted_prompt = [{"role": "system", "content": system_prompt}]
for user_prompt, bot_response in history:
formatted_prompt.append({"role": "user", "content": user_prompt})
formatted_prompt.append({"role": "assistant", "content": bot_response })
formatted_prompt.append({"role": "user", "content": message})
stream_response = llm.create_chat_completion(messages=formatted_prompt, temperature=temperature, max_tokens=max_tokens, stream=True)
response = ""
for chunk in stream_response:
if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
response += chunk['choices'][0]["delta"]["content"]
yield response
return JSONResponse(content={"response": await generate()})
@app.get("/api/v1/chat")
async def chat_get():
return {"message": "Send a POST request to this endpoint to chat."}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
|