Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -51,7 +51,7 @@ def generate(
|
|
51 |
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
52 |
conversation.append({"role": "user", "content": message})
|
53 |
|
54 |
-
chat = tokenizer.apply_chat_template(
|
55 |
inputs = tokenizer(chat, return_tensors="pt", add_special_tokens=False).to("cuda")
|
56 |
if len(inputs) > MAX_INPUT_TOKEN_LENGTH:
|
57 |
inputs = inputs[-MAX_INPUT_TOKEN_LENGTH:]
|
@@ -94,21 +94,21 @@ chat_interface = gr.ChatInterface(
|
|
94 |
minimum=0.1,
|
95 |
maximum=4.0,
|
96 |
step=0.1,
|
97 |
-
value=0.
|
98 |
),
|
99 |
gr.Slider(
|
100 |
label="Top-p (nucleus sampling)",
|
101 |
minimum=0.05,
|
102 |
maximum=1.0,
|
103 |
step=0.05,
|
104 |
-
value=0.
|
105 |
),
|
106 |
gr.Slider(
|
107 |
label="Top-k",
|
108 |
minimum=1,
|
109 |
maximum=1000,
|
110 |
step=1,
|
111 |
-
value=
|
112 |
),
|
113 |
gr.Slider(
|
114 |
label="Repetition penalty",
|
|
|
51 |
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
52 |
conversation.append({"role": "user", "content": message})
|
53 |
|
54 |
+
chat = tokenizer.apply_chat_template([{"role": "user", "content": message}], tokenize=False)
|
55 |
inputs = tokenizer(chat, return_tensors="pt", add_special_tokens=False).to("cuda")
|
56 |
if len(inputs) > MAX_INPUT_TOKEN_LENGTH:
|
57 |
inputs = inputs[-MAX_INPUT_TOKEN_LENGTH:]
|
|
|
94 |
minimum=0.1,
|
95 |
maximum=4.0,
|
96 |
step=0.1,
|
97 |
+
value=0.1,
|
98 |
),
|
99 |
gr.Slider(
|
100 |
label="Top-p (nucleus sampling)",
|
101 |
minimum=0.05,
|
102 |
maximum=1.0,
|
103 |
step=0.05,
|
104 |
+
value=0.05,
|
105 |
),
|
106 |
gr.Slider(
|
107 |
label="Top-k",
|
108 |
minimum=1,
|
109 |
maximum=1000,
|
110 |
step=1,
|
111 |
+
value=1000,
|
112 |
),
|
113 |
gr.Slider(
|
114 |
label="Repetition penalty",
|