thunder-007 commited on
Commit
7460a2e
·
verified ·
1 Parent(s): 5542274

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -1
app.py CHANGED
@@ -1,3 +1,29 @@
1
  import gradio as gr
 
 
2
 
3
- gr.load("models/google/gemma-7b-it").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import time
3
+ from llm import Gemma2B
4
 
5
+ llm = Gemma2B()
6
+
7
+
8
+ def echo(message, history, system_prompt, tokens):
9
+ chat_template = []
10
+ for user, model in history:
11
+ chat_template = [
12
+ {"role": "user", "content": user},
13
+ {"role": "model", "content": model},
14
+ ]
15
+ response = llm.inference_quantized_4bit(
16
+ chat_template + [{"role": "user", "content": message}
17
+ ]).split("<start_of_turn>")[-1].strip("model").strip("<eos>")
18
+ for i in range(max(len(response), int(tokens))):
19
+ time.sleep(0.05)
20
+ yield response[: i + 1]
21
+
22
+
23
+ demo = gr.ChatInterface(echo,
24
+ additional_inputs=[
25
+ gr.Textbox("You are helpful AI.", label="System Prompt"),
26
+ gr.Slider(10, 200, 100, label="Tokens")
27
+ ]
28
+ )
29
+ demo.queue().launch(debug=True)