Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
-
import time
|
3 |
import json
|
4 |
from cerebras.cloud.sdk import Cerebras
|
5 |
from typing import List, Dict, Tuple, Any, Generator
|
@@ -24,14 +24,25 @@ def make_api_call(api_key: str, messages: List[Dict[str, str]], max_tokens: int,
|
|
24 |
|
25 |
content = json.loads(response.choices[0].message.content)
|
26 |
|
27 |
-
#
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
content['token_info'] = {
|
33 |
-
'
|
34 |
-
'tokens_per_second': tokens_per_second
|
|
|
|
|
|
|
|
|
35 |
}
|
36 |
|
37 |
return content
|
@@ -55,51 +66,50 @@ def generate_response(api_key: str, prompt: str) -> Generator[Tuple[List[Tuple[s
|
|
55 |
]
|
56 |
|
57 |
steps = []
|
58 |
-
step_count =
|
59 |
total_thinking_time = 0
|
60 |
-
|
61 |
-
total_tokens_per_second = 0
|
62 |
|
63 |
while True:
|
64 |
-
start_time = time.time()
|
65 |
step_data = make_api_call(api_key, messages, 300)
|
66 |
-
|
67 |
-
total_thinking_time += thinking_time
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
|
|
|
73 |
step_title = f"Step {step_count}: {step_data['title']}"
|
74 |
-
step_content = f"{step_data['content']}\n\n**
|
75 |
steps.append((step_title, step_content))
|
76 |
messages.append({"role": "assistant", "content": json.dumps(step_data)})
|
77 |
|
78 |
-
#
|
79 |
-
|
|
|
|
|
|
|
80 |
|
81 |
if step_data.get('next_action') == 'final_answer':
|
82 |
break
|
83 |
-
|
84 |
-
step_count += 1
|
85 |
|
86 |
# Request the final answer
|
87 |
messages.append({"role": "user", "content": "Please provide the final answer based on your reasoning above."})
|
88 |
|
89 |
-
start_time = time.time()
|
90 |
final_data = make_api_call(api_key, messages, 200, is_final_answer=True)
|
91 |
-
|
92 |
-
total_thinking_time += thinking_time
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
total_tokens_per_second += token_info['tokens_per_second']
|
97 |
|
98 |
-
final_content = f"{final_data.get('content', 'No final answer provided.')}\n\n**Final answer
|
99 |
steps.append(("Final Answer", final_content))
|
100 |
|
101 |
-
#
|
102 |
-
|
|
|
|
|
|
|
103 |
|
104 |
def respond(api_key: str, message: str, history: List[Tuple[str, str]]) -> Generator[Tuple[List[Tuple[str, str]], str], None, None]:
|
105 |
"""
|
@@ -112,14 +122,14 @@ def respond(api_key: str, message: str, history: List[Tuple[str, str]]) -> Gener
|
|
112 |
# Initialize the generator
|
113 |
response_generator = generate_response(api_key, message)
|
114 |
|
115 |
-
for steps, total_time,
|
116 |
conversation = history.copy()
|
117 |
for title, content in steps[len(conversation):]:
|
118 |
if title.startswith("Step") or title == "Final Answer":
|
119 |
conversation.append((title, content))
|
120 |
else:
|
121 |
conversation.append((title, content))
|
122 |
-
yield conversation, f"**Total
|
123 |
|
124 |
def main():
|
125 |
with gr.Blocks() as demo:
|
|
|
1 |
import gradio as gr
|
2 |
+
import time
|
3 |
import json
|
4 |
from cerebras.cloud.sdk import Cerebras
|
5 |
from typing import List, Dict, Tuple, Any, Generator
|
|
|
24 |
|
25 |
content = json.loads(response.choices[0].message.content)
|
26 |
|
27 |
+
# Access time_info attributes directly
|
28 |
+
queue_time = response.time_info.queue_time
|
29 |
+
prompt_time = response.time_info.prompt_time
|
30 |
+
completion_time = response.time_info.completion_time
|
31 |
+
total_time = response.time_info.total_time
|
32 |
+
|
33 |
+
# Use the provided usage information
|
34 |
+
completion_tokens = response.usage.completion_tokens
|
35 |
+
|
36 |
+
# Calculate tokens per second using completion tokens
|
37 |
+
tokens_per_second = completion_tokens / total_time if total_time > 0 else 0
|
38 |
|
39 |
content['token_info'] = {
|
40 |
+
'completion_tokens': completion_tokens,
|
41 |
+
'tokens_per_second': tokens_per_second,
|
42 |
+
'queue_time': queue_time,
|
43 |
+
'prompt_time': prompt_time,
|
44 |
+
'completion_time': completion_time,
|
45 |
+
'total_time': total_time # Use total_time as the 'duration'
|
46 |
}
|
47 |
|
48 |
return content
|
|
|
66 |
]
|
67 |
|
68 |
steps = []
|
69 |
+
step_count = 0
|
70 |
total_thinking_time = 0
|
71 |
+
total_completion_tokens = 0
|
|
|
72 |
|
73 |
while True:
|
|
|
74 |
step_data = make_api_call(api_key, messages, 300)
|
75 |
+
token_info = step_data.pop('token_info', {'completion_tokens': 0, 'tokens_per_second': 0, 'duration': step_data.get('total_time', 0)})
|
|
|
76 |
|
77 |
+
# Use total_time from token_info as the duration
|
78 |
+
total_thinking_time += token_info.get('total_time', 0)
|
79 |
+
total_completion_tokens += token_info['completion_tokens']
|
80 |
|
81 |
+
step_count += 1
|
82 |
step_title = f"Step {step_count}: {step_data['title']}"
|
83 |
+
step_content = f"{step_data['content']}\n\n**API Call Duration: {token_info['total_time']:.2f} seconds**\n**Completion Tokens: {token_info['completion_tokens']}, Tokens/s: {token_info['tokens_per_second']:.2f}**"
|
84 |
steps.append((step_title, step_content))
|
85 |
messages.append({"role": "assistant", "content": json.dumps(step_data)})
|
86 |
|
87 |
+
# Calculate the overall average tokens per second using completion tokens
|
88 |
+
overall_tokens_per_second = total_completion_tokens / total_thinking_time if total_thinking_time > 0 else 0
|
89 |
+
|
90 |
+
# Yield the current conversation, total thinking time, total completion tokens, and overall average tokens per second
|
91 |
+
yield steps, total_thinking_time, total_completion_tokens, overall_tokens_per_second
|
92 |
|
93 |
if step_data.get('next_action') == 'final_answer':
|
94 |
break
|
|
|
|
|
95 |
|
96 |
# Request the final answer
|
97 |
messages.append({"role": "user", "content": "Please provide the final answer based on your reasoning above."})
|
98 |
|
|
|
99 |
final_data = make_api_call(api_key, messages, 200, is_final_answer=True)
|
100 |
+
token_info = final_data.pop('token_info', {'completion_tokens': 0, 'tokens_per_second': 0, 'duration': final_data.get('total_time', 0)})
|
|
|
101 |
|
102 |
+
total_thinking_time += token_info.get('total_time', 0)
|
103 |
+
total_completion_tokens += token_info['completion_tokens']
|
|
|
104 |
|
105 |
+
final_content = f"{final_data.get('content', 'No final answer provided.')}\n\n**Final answer API call duration: {token_info['total_time']:.2f} seconds**\n**Completion Tokens: {token_info['completion_tokens']}, Tokens/s: {token_info['tokens_per_second']:.2f}**"
|
106 |
steps.append(("Final Answer", final_content))
|
107 |
|
108 |
+
# Calculate the final overall average tokens per second using completion tokens
|
109 |
+
overall_tokens_per_second = total_completion_tokens / total_thinking_time if total_thinking_time > 0 else 0
|
110 |
+
|
111 |
+
# Yield the final conversation, total thinking time, total completion tokens, and overall average tokens per second
|
112 |
+
yield steps, total_thinking_time, total_completion_tokens, overall_tokens_per_second
|
113 |
|
114 |
def respond(api_key: str, message: str, history: List[Tuple[str, str]]) -> Generator[Tuple[List[Tuple[str, str]], str], None, None]:
|
115 |
"""
|
|
|
122 |
# Initialize the generator
|
123 |
response_generator = generate_response(api_key, message)
|
124 |
|
125 |
+
for steps, total_time, total_completion_tokens, avg_tokens_per_second in response_generator:
|
126 |
conversation = history.copy()
|
127 |
for title, content in steps[len(conversation):]:
|
128 |
if title.startswith("Step") or title == "Final Answer":
|
129 |
conversation.append((title, content))
|
130 |
else:
|
131 |
conversation.append((title, content))
|
132 |
+
yield conversation, f"**Total API call time:** {total_time:.2f} seconds\n**Completion tokens:** {total_completion_tokens}\n**Overall average tokens/s:** {avg_tokens_per_second:.2f}"
|
133 |
|
134 |
def main():
|
135 |
with gr.Blocks() as demo:
|