File size: 930 Bytes
cb89b5e
7460a2e
 
cb89b5e
7460a2e
 
 
4e97e1e
7460a2e
 
 
 
 
 
4e97e1e
7460a2e
 
 
 
 
 
 
4e97e1e
7460a2e
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import gradio as gr
import time
from llm import Gemma2B

llm = Gemma2B()


def inference(message, history, system_prompt, tokens):
    chat_template = []
    for user, model in history:
        chat_template = [
            {"role": "user", "content": user},
            {"role": "model", "content": model},
        ]
    response = llm.inference_cpu(
        chat_template + [{"role": "user", "content": message}
                         ]).split("<start_of_turn>")[-1].strip("model").strip("<eos>")
    for i in range(max(len(response), int(tokens))):
        time.sleep(0.05)
        yield response[: i + 1]


demo = gr.ChatInterface(inference,
                        additional_inputs=[
                            gr.Textbox("You are helpful AI.", label="System Prompt"),
                            gr.Slider(10, 200, 100, label="Tokens")
                        ]
                        )
demo.queue().launch(debug=True)