Spaces:
Sleeping
Sleeping
from quart import Quart, request | |
from llama_cpp import Llama | |
app = Quart(__name__) | |
with open('system.prompt', 'r', encoding='utf-8') as f: | |
prompt = f.read() | |
async def echo(): | |
try: | |
data = await request.get_json() | |
if data.get("max_tokens") != None and data.get("max_tokens") > 500: data['max_tokens'] = 500 | |
userPrompt = prompt + "\n\nUser: " + data['request'] + "\nAssistant: " | |
except: return {"error": "Not enough data"}, 400 | |
return {"output": output} | |
async def get(): | |
return '''<h1>Hello, world!</h1> | |
This is showcase how to make own server with OpenBuddy's model.<br> | |
I'm using here 3b model just for example. Also here's only CPU power.<br> | |
But you can use GPU power as well!<br> | |
<br> | |
<h1>How to GPU?</h1> | |
''' |