Spaces:
Runtime error
Runtime error
generic reliability update 1
Browse files
app.py
CHANGED
@@ -17,6 +17,16 @@ headers = {
|
|
17 |
|
18 |
session = requests.Session()
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
def get_timestamp():
|
21 |
return datetime.datetime.now().strftime("%H:%M:%S")
|
22 |
|
@@ -32,6 +42,23 @@ def predict(message, history, system_prompt, temperature, top_p, top_k, frequenc
|
|
32 |
history_format.append({"role": "user", "content": message})
|
33 |
|
34 |
print(f"<|system|> {system_prompt}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
data = {
|
37 |
"model": "meta-llama/Meta-Llama-3.1-405B-Instruct",
|
@@ -69,7 +96,6 @@ def predict(message, history, system_prompt, temperature, top_p, top_k, frequenc
|
|
69 |
continue
|
70 |
|
71 |
if partial_message:
|
72 |
-
print(f"<|assistant|> {partial_message}")
|
73 |
yield partial_message
|
74 |
|
75 |
except RequestException as e:
|
@@ -139,10 +165,9 @@ with gr.Blocks(theme='gradio/monochrome') as demo:
|
|
139 |
frequency_penalty = gr.Slider(-2, 2, value=0, step=0.1, label="Frequency Penalty")
|
140 |
presence_penalty = gr.Slider(-2, 2, value=0, step=0.1, label="Presence Penalty")
|
141 |
repetition_penalty = gr.Slider(0.01, 5, value=1.1, step=0.01, label="Repetition Penalty")
|
142 |
-
max_tokens = gr.Slider(1, 4096, value=
|
143 |
|
144 |
def user(user_message, history):
|
145 |
-
print(f"{get_timestamp()} <|user|> {user_message}")
|
146 |
history = history or []
|
147 |
return "", history + [[user_message, None]]
|
148 |
|
|
|
17 |
|
18 |
session = requests.Session()
|
19 |
|
20 |
+
DEFAULT_PARAMS = {
|
21 |
+
"temperature": 0.8,
|
22 |
+
"top_p": 0.95,
|
23 |
+
"top_k": 40,
|
24 |
+
"frequency_penalty": 0,
|
25 |
+
"presence_penalty": 0,
|
26 |
+
"repetition_penalty": 1.1,
|
27 |
+
"max_tokens": 256
|
28 |
+
}
|
29 |
+
|
30 |
def get_timestamp():
|
31 |
return datetime.datetime.now().strftime("%H:%M:%S")
|
32 |
|
|
|
42 |
history_format.append({"role": "user", "content": message})
|
43 |
|
44 |
print(f"<|system|> {system_prompt}")
|
45 |
+
print(f"{get_timestamp()} <|user|> {message}")
|
46 |
+
|
47 |
+
current_params = {
|
48 |
+
"temperature": temperature,
|
49 |
+
"top_p": top_p,
|
50 |
+
"top_k": top_k,
|
51 |
+
"frequency_penalty": frequency_penalty,
|
52 |
+
"presence_penalty": presence_penalty,
|
53 |
+
"repetition_penalty": repetition_penalty,
|
54 |
+
"max_tokens": max_tokens
|
55 |
+
}
|
56 |
+
|
57 |
+
non_default_params = {k: v for k, v in current_params.items() if v != DEFAULT_PARAMS[k]}
|
58 |
+
|
59 |
+
if non_default_params:
|
60 |
+
for param, value in non_default_params.items():
|
61 |
+
print(f"{param}={value}")
|
62 |
|
63 |
data = {
|
64 |
"model": "meta-llama/Meta-Llama-3.1-405B-Instruct",
|
|
|
96 |
continue
|
97 |
|
98 |
if partial_message:
|
|
|
99 |
yield partial_message
|
100 |
|
101 |
except RequestException as e:
|
|
|
165 |
frequency_penalty = gr.Slider(-2, 2, value=0, step=0.1, label="Frequency Penalty")
|
166 |
presence_penalty = gr.Slider(-2, 2, value=0, step=0.1, label="Presence Penalty")
|
167 |
repetition_penalty = gr.Slider(0.01, 5, value=1.1, step=0.01, label="Repetition Penalty")
|
168 |
+
max_tokens = gr.Slider(1, 4096, value=512, step=1, label="Max Output (max_tokens)")
|
169 |
|
170 |
def user(user_message, history):
|
|
|
171 |
history = history or []
|
172 |
return "", history + [[user_message, None]]
|
173 |
|