Spaces:
Runtime error
Runtime error
File size: 930 Bytes
cb89b5e 7460a2e cb89b5e 7460a2e 4e97e1e 7460a2e 4e97e1e 7460a2e 4e97e1e 7460a2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
import gradio as gr
import time
from llm import Gemma2B
llm = Gemma2B()
def inference(message, history, system_prompt, tokens):
chat_template = []
for user, model in history:
chat_template = [
{"role": "user", "content": user},
{"role": "model", "content": model},
]
response = llm.inference_cpu(
chat_template + [{"role": "user", "content": message}
]).split("<start_of_turn>")[-1].strip("model").strip("<eos>")
for i in range(max(len(response), int(tokens))):
time.sleep(0.05)
yield response[: i + 1]
demo = gr.ChatInterface(inference,
additional_inputs=[
gr.Textbox("You are helpful AI.", label="System Prompt"),
gr.Slider(10, 200, 100, label="Tokens")
]
)
demo.queue().launch(debug=True)
|