Spaces:
Sleeping
Sleeping
import gradio as gr | |
from llama_cpp import Llama | |
print("Downloading model") | |
llm = Llama.from_pretrained( | |
repo_id="bartowski/gemma-2-2b-it-abliterated-GGUF", | |
filename="gemma-2-2b-it-abliterated-IQ4_XS.gguf", | |
numa=True, | |
n_ctx=4096, | |
) | |
def respond(prompt: str): | |
stream = llm.create_chat_completion(stream=True, messages=[{"role": "user", "content": prompt}]) | |
response = "" | |
for chunk in stream: | |
if "content" in chunk["choices"][0]["delta"]: | |
response += chunk["choices"][0]["delta"]["content"] | |
yield response | |
demo = gr.Interface(fn=respond, inputs=[gr.TextArea("What is the capital of France?")], outputs=[gr.TextArea()]) | |
demo.launch(server_name="0.0.0.0", server_port=7860) | |