Spaces:
Runtime error
Runtime error
File size: 954 Bytes
e5e2748 dcd2d54 e5e2748 dcd2d54 e5e2748 dcd2d54 e5e2748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import time
import copy
import asyncio
import requests
from fastapi import FastAPI, Request
from llama_cpp import Llama
from sse_starlette import EventSourceResponse
# Load the model
print("Loading model...")
llm = Llama(model_path="./llama-2-13b-chat.ggmlv3.q4_1.bin") # change based on the location of models
print("Model loaded!")
app = FastAPI()
@app.get("/llama")
async def llama(request: Request, question:str):
stream = llm(
f"""{question}""",
max_tokens=100,
stop=["\n", " Q:"],
stream=True,
)
async def async_generator():
for item in stream:
yield item
async def server_sent_events():
async for item in async_generator():
if await request.is_disconnected():
break
result = copy.deepcopy(item)
text = result["choices"][0]["text"]
yield {"data": text}
return EventSourceResponse(server_sent_events()) |