Threatthriver commited on
Commit
72139be
·
verified ·
1 Parent(s): b5a485f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -31
app.py CHANGED
@@ -2,61 +2,222 @@ import gradio as gr
2
  import os
3
  import time
4
  from cerebras.cloud.sdk import Cerebras
 
 
 
 
 
 
 
5
 
6
- # Set up the Cerebras client
7
- api_key = os.getenv("CEREBRAS_API_KEY")
8
- if not api_key:
9
  raise ValueError("CEREBRAS_API_KEY environment variable is not set.")
10
 
11
- client = Cerebras(api_key=api_key)
 
12
 
13
- def chat_with_cerebras(user_input):
14
- """
15
- Handles interaction with the Cerebras model.
16
- Sends user input and returns the model's response along with compute time and chain-of-thought reasoning.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  """
18
- # Start compute time measurement
19
- start_time = time.time()
20
 
 
 
 
 
21
  try:
22
- # Create a chat stream with Cerebras
23
- stream = client.chat.completions.create(
24
- messages=[
25
- {"role": "system", "content": "You are IntellijMind, an advanced AI designed to assist users with detailed insights, problem-solving, and chain-of-thought reasoning."},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  {"role": "user", "content": user_input}
27
- ],
28
- model="llama-3.3-70b",
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  stream=True,
30
- max_completion_tokens=1024,
31
- temperature=0.2,
32
- top_p=1
33
  )
34
 
35
- # Collect response from the stream
36
  response = ""
37
  chain_of_thought = ""
38
- for chunk in stream:
39
  if chunk.choices[0].delta and chunk.choices[0].delta.content:
40
  content = chunk.choices[0].delta.content
41
  response += content
42
  if "Chain of Thought:" in content:
43
  chain_of_thought += content.split("Chain of Thought:", 1)[-1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- # End compute time measurement
46
- compute_time = time.time() - start_time
47
 
48
- # Simulate token usage tracking (placeholder for real implementation)
49
  token_usage = len(user_input.split()) + len(response.split())
50
-
51
  return response, chain_of_thought, f"Compute Time: {compute_time:.2f} seconds", f"Tokens used: {token_usage}"
52
 
53
  except Exception as e:
54
  return "Error: Unable to process your request.", "", str(e), ""
55
 
56
- # Gradio interface
 
57
  def gradio_ui():
58
  with gr.Blocks() as demo:
59
- gr.Markdown("""# 🚀 IntellijMind: The Future of AI Chatbots\nExperience the most advanced chatbot for deep insights, chain-of-thought reasoning, and unmatched clarity!""")
60
 
61
  with gr.Row():
62
  with gr.Column(scale=6):
@@ -68,15 +229,19 @@ def gradio_ui():
68
 
69
  user_input = gr.Textbox(label="Type your message", placeholder="Ask me anything...", lines=2)
70
 
 
71
  with gr.Row():
72
  send_button = gr.Button("Send", variant="primary")
73
  clear_button = gr.Button("Clear Chat")
74
  export_button = gr.Button("Export Chat History")
75
 
76
- def handle_chat(chat_history, user_input):
77
  if not user_input.strip():
78
  return chat_history, "", "", "", "Please enter a valid message."
79
- ai_response, chain_of_thought, compute_info, token_usage = chat_with_cerebras(user_input)
 
 
 
80
  chat_history.append((user_input, ai_response))
81
  return chat_history, chain_of_thought, compute_info, token_usage
82
 
@@ -98,10 +263,10 @@ def gradio_ui():
98
 
99
  user_input.submit(handle_chat, inputs=[chat_history, user_input], outputs=[chat_history, chain_of_thought_display, compute_time, token_usage_display])
100
 
101
- gr.Markdown("""---\n### 🌟 Features:\n- **Advanced Reasoning**: Chain-of-thought explanations for complex queries.\n- **Real-Time Performance Metrics**: Measure response compute time instantly.\n- **Token Usage Tracking**: Monitor token usage per response for transparency.\n- **Export Chat History**: Save your conversation as a text file for future reference.\n- **User-Friendly Design**: Intuitive chatbot interface with powerful features.\n- **Insightful Chain of Thought**: See the reasoning process behind AI decisions.\n- **Submit on Enter**: Seamless interaction with keyboard support.\n""")
102
 
103
  return demo
104
 
105
  # Run the Gradio app
106
  demo = gradio_ui()
107
- demo.launch()
 
2
  import os
3
  import time
4
  from cerebras.cloud.sdk import Cerebras
5
+ import requests
6
+ from bs4 import BeautifulSoup
7
+ from urllib.parse import urljoin, urlparse
8
+ from groq import Groq
9
+ import asyncio
10
+ import re
11
+ import json
12
 
13
+ # --- Constants and API Setup ---
14
+ CEREBRAS_API_KEY = os.getenv("CEREBRAS_API_KEY")
15
+ if not CEREBRAS_API_KEY:
16
  raise ValueError("CEREBRAS_API_KEY environment variable is not set.")
17
 
18
+ client_cerebras = Cerebras(api_key=CEREBRAS_API_KEY)
19
+ client_groq = Groq()
20
 
21
+ # --- Rate Limiting ---
22
+ CEREBRAS_REQUESTS_PER_MINUTE = 30
23
+ CEREBRAS_TOKENS_PER_MINUTE = 6000 # using lowest token limit for versatile model
24
+ GROQ_REQUESTS_PER_MINUTE = 30
25
+ GROQ_TOKENS_PER_MINUTE = 15000 # using token limit for tool-use-preview model
26
+
27
+
28
+ cerebras_request_queue = asyncio.Queue()
29
+ groq_request_queue = asyncio.Queue()
30
+
31
+ last_cerebras_request_time = 0
32
+ last_groq_request_time = 0
33
+ cerebras_token_count = 0
34
+ groq_token_count = 0
35
+
36
+ # --- Model Rate Limit Info ---
37
+ CHAT_COMPLETION_MODELS_INFO = """
38
+ Chat Completion
39
+ ID Requests per Minute Requests per Day Tokens per Minute Tokens per Day
40
+ gemma-7b-it 30 14,400 15,000 500,000
41
+ gemma2-9b-it 30 14,400 15,000 500,000
42
+ llama-3.1-70b-versatile 30 14,400 6,000 200,000
43
+ llama-3.1-8b-instant 30 14,400 20,000 500,000
44
+ llama-3.2-11b-text-preview 30 7,000 7,000 500,000
45
+ llama-3.2-11b-vision-preview 30 7,000 7,000 500,000
46
+ llama-3.2-1b-preview 30 7,000 7,000 500,000
47
+ llama-3.2-3b-preview 30 7,000 7,000 500,000
48
+ llama-3.2-90b-text-preview 30 7,000 7,000 500,000
49
+ llama-3.2-90b-vision-preview 15 3,500 7,000 250,000
50
+ llama-3.3-70b-specdec 30 1,000 6,000 100,000
51
+ llama-3.3-70b-versatile 30 1,000 6,000 100,000
52
+ llama-guard-3-8b 30 14,400 15,000 500,000
53
+ llama3-70b-8192 30 14,400 6,000 500,000
54
+ llama3-8b-8192 30 14,400 30,000 500,000
55
+ llama3-groq-70b-8192-tool-use-preview 30 14,400 15,000 500,000
56
+ llama3-groq-8b-8192-tool-use-preview 30 14,400 15,000 500,000
57
+ llava-v1.5-7b-4096-preview 30 14,400 30,000 (No limit)
58
+ mixtral-8x7b-32768 30 14,400 5,000 500,000
59
+ """
60
+
61
+ SPEECH_TO_TEXT_MODELS_INFO = """
62
+ Speech To Text
63
+ ID Requests per Minute Requests per Day Audio Seconds per Hour Audio Seconds per Day
64
+ distil-whisper-large-v3-en 20 2,000 7,200 28,800
65
+ whisper-large-v3 20 2,000 7,200 28,800
66
+ whisper-large-v3-turbo 20 2,000 7,200 28,800
67
+ """
68
+
69
+ def get_model_info():
70
+ return f"""
71
+ {CHAT_COMPLETION_MODELS_INFO}
72
+
73
+ {SPEECH_TO_TEXT_MODELS_INFO}
74
  """
 
 
75
 
76
+
77
+ # --- Helper Functions ---
78
+
79
+ def is_valid_url(url):
80
  try:
81
+ result = urlparse(url)
82
+ return all([result.scheme, result.netloc])
83
+ except ValueError:
84
+ return False
85
+
86
+
87
+ def fetch_webpage(url):
88
+ try:
89
+ response = requests.get(url, timeout=10)
90
+ response.raise_for_status() # Raise an exception for bad status codes
91
+ return response.text
92
+ except requests.exceptions.RequestException as e:
93
+ return f"Error fetching URL: {e}"
94
+
95
+
96
+ def extract_text_from_html(html):
97
+ soup = BeautifulSoup(html, 'html.parser')
98
+ text = soup.get_text(separator=' ', strip=True)
99
+ return text
100
+
101
+ # --- Asynchronous Rate Limit Logic ---
102
+
103
+ async def check_cerebras_rate_limit(num_tokens):
104
+ global last_cerebras_request_time
105
+ global cerebras_token_count
106
+ current_time = time.time()
107
+ elapsed_time = current_time - last_cerebras_request_time
108
+
109
+ if elapsed_time < 60 and cerebras_request_queue.qsize() >= CEREBRAS_REQUESTS_PER_MINUTE:
110
+ await asyncio.sleep(60-elapsed_time)
111
+
112
+
113
+ if elapsed_time < 60 and (cerebras_token_count + num_tokens) > CEREBRAS_TOKENS_PER_MINUTE :
114
+ time_to_wait = 60 - elapsed_time
115
+ await asyncio.sleep(time_to_wait)
116
+
117
+ cerebras_request_queue.put_nowait(current_time)
118
+ last_cerebras_request_time = time.time()
119
+ cerebras_token_count = num_tokens if (elapsed_time > 60) else (cerebras_token_count + num_tokens)
120
+
121
+ async def check_groq_rate_limit(num_tokens):
122
+ global last_groq_request_time
123
+ global groq_token_count
124
+ current_time = time.time()
125
+ elapsed_time = current_time - last_groq_request_time
126
+ if elapsed_time < 60 and groq_request_queue.qsize() >= GROQ_REQUESTS_PER_MINUTE:
127
+ await asyncio.sleep(60 - elapsed_time)
128
+
129
+ if elapsed_time < 60 and (groq_token_count + num_tokens) > GROQ_TOKENS_PER_MINUTE :
130
+ time_to_wait = 60 - elapsed_time
131
+ await asyncio.sleep(time_to_wait)
132
+
133
+ groq_request_queue.put_nowait(current_time)
134
+ last_groq_request_time = time.time()
135
+ groq_token_count = num_tokens if (elapsed_time > 60) else (groq_token_count + num_tokens)
136
+
137
+
138
+ # --- Chat Logic with Groq ---
139
+ async def chat_with_groq(user_input, chat_history):
140
+ start_time = time.time()
141
+ try:
142
+ # Prepare chat history for the prompt
143
+ formatted_history = "\n".join([f"User: {msg[0]}\nAI: {msg[1]}" for msg in chat_history[-5:]])
144
+ # Check for web scraping command
145
+ if user_input.lower().startswith("scrape"):
146
+ parts = user_input.split(maxsplit=1)
147
+ if len(parts) > 1:
148
+ url = parts[1].strip()
149
+ if is_valid_url(url):
150
+ html_content = fetch_webpage(url)
151
+ if not html_content.startswith("Error"):
152
+ webpage_text = extract_text_from_html(html_content)
153
+ user_input = f"The content from the webpage: {webpage_text}. {user_input}"
154
+ else:
155
+ user_input = f"{html_content}. {user_input}"
156
+ else:
157
+ user_input = "Invalid URL provided. " + user_input
158
+
159
+ messages = [
160
+ {"role": "system", "content": f"""You are IntellijMind, an advanced AI designed to assist users with detailed insights, problem-solving, and chain-of-thought reasoning. You have access to various tools to help the user, and can initiate actions when needed. Be creative and inject humor when appropriate. You can use tools to browse the web when instructed with a 'scrape' command followed by a URL. If there is a request for model info, use the get_model_info function. Current conversation: {formatted_history} Available actions: take_action: 'scrape', parameters: url. Example action: Action: take_action, Parameters: {{"action":"scrape", "url":"https://example.com"}} """},
161
  {"role": "user", "content": user_input}
162
+ ]
163
+ if user_input.lower() == "model info":
164
+ response = get_model_info()
165
+ return response, "", f"Compute Time: {time.time() - start_time:.2f} seconds", f"Tokens used: {len(user_input.split()) + len(response.split())}"
166
+
167
+
168
+ num_tokens = len(user_input.split())
169
+ await check_groq_rate_limit(num_tokens)
170
+
171
+ completion = client_groq.chat.completions.create(
172
+ model="llama3-groq-70b-8192-tool-use-preview",
173
+ messages=messages,
174
+ temperature=1,
175
+ max_tokens=1024,
176
+ top_p=1,
177
  stream=True,
178
+ stop=None,
 
 
179
  )
180
 
 
181
  response = ""
182
  chain_of_thought = ""
183
+ for chunk in completion:
184
  if chunk.choices[0].delta and chunk.choices[0].delta.content:
185
  content = chunk.choices[0].delta.content
186
  response += content
187
  if "Chain of Thought:" in content:
188
  chain_of_thought += content.split("Chain of Thought:", 1)[-1]
189
+ # Check if action needs to be taken
190
+ if "Action:" in content:
191
+ action_match = re.search(r"Action: (\w+), Parameters: (\{.*\})", content)
192
+ if action_match:
193
+ action = action_match.group(1)
194
+ parameters = json.loads(action_match.group(2))
195
+ if action == "take_action":
196
+ if parameters.get("action") == "scrape":
197
+ url = parameters.get("url")
198
+ if is_valid_url(url):
199
+ html_content = fetch_webpage(url)
200
+ if not html_content.startswith("Error"):
201
+ webpage_text = extract_text_from_html(html_content)
202
+ response += f"\nWebpage Content: {webpage_text}\n"
203
+ else:
204
+ response += f"\nError scraping webpage: {html_content}\n"
205
+ else:
206
+ response += "\nInvalid URL provided.\n"
207
 
 
 
208
 
209
+ compute_time = time.time() - start_time
210
  token_usage = len(user_input.split()) + len(response.split())
 
211
  return response, chain_of_thought, f"Compute Time: {compute_time:.2f} seconds", f"Tokens used: {token_usage}"
212
 
213
  except Exception as e:
214
  return "Error: Unable to process your request.", "", str(e), ""
215
 
216
+
217
+ # --- Gradio Interface ---
218
  def gradio_ui():
219
  with gr.Blocks() as demo:
220
+ gr.Markdown("""# 🚀 IntellijMind: The Crazy Agent Chatbot\nExperience the most advanced chatbot for deep insights, chain-of-thought reasoning, and unmatched clarity! Get ready for some proactive action!""")
221
 
222
  with gr.Row():
223
  with gr.Column(scale=6):
 
229
 
230
  user_input = gr.Textbox(label="Type your message", placeholder="Ask me anything...", lines=2)
231
 
232
+
233
  with gr.Row():
234
  send_button = gr.Button("Send", variant="primary")
235
  clear_button = gr.Button("Clear Chat")
236
  export_button = gr.Button("Export Chat History")
237
 
238
+ async def handle_chat(chat_history, user_input):
239
  if not user_input.strip():
240
  return chat_history, "", "", "", "Please enter a valid message."
241
+
242
+ ai_response, chain_of_thought, compute_info, token_usage = await chat_with_groq(user_input, chat_history)
243
+
244
+
245
  chat_history.append((user_input, ai_response))
246
  return chat_history, chain_of_thought, compute_info, token_usage
247
 
 
263
 
264
  user_input.submit(handle_chat, inputs=[chat_history, user_input], outputs=[chat_history, chain_of_thought_display, compute_time, token_usage_display])
265
 
266
+ gr.Markdown("""---\n### 🌟 Features:\n- **Advanced Reasoning**: Chain-of-thought explanations for complex queries.\n- **Proactive Actions**: The agent will take actions without being explicitly asked.\n- **Web Scraping**: The agent will use the scrape command if needed\n- **Humor and Creativity**: Enjoy a more engaging and creative experience.\n- **Real-Time Performance Metrics**: Measure response compute time instantly.\n- **Token Usage Tracking**: Monitor token usage per response for transparency.\n- **Export Chat History**: Save your conversation as a text file for future reference.\n- **User-Friendly Design**: Intuitive chatbot interface with powerful features.\n- **Insightful Chain of Thought**: See the reasoning process behind AI decisions.\n- **Submit on Enter**: Seamless interaction with keyboard support.\n""")
267
 
268
  return demo
269
 
270
  # Run the Gradio app
271
  demo = gradio_ui()
272
+ demo.launch()