Nymbo commited on
Commit
e45083a
·
verified ·
1 Parent(s): f99b2be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +668 -346
app.py CHANGED
@@ -5,91 +5,15 @@ import json
5
  import base64
6
  from PIL import Image
7
  import io
8
- import atexit
9
-
10
- # Ensure smolagents and mcp are installed: pip install "smolagents[mcp]" mcp
11
- from smolagents import ToolCollection, CodeAgent
12
- from smolagents.mcp_client import MCPClient as SmolMCPClient # For connecting to MCP SSE servers
13
 
14
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
15
  print("Access token loaded.")
16
 
17
- # --- MCP Client Integration ---
18
- mcp_tools_collection = ToolCollection(tools=[]) # Global store for loaded MCP tools
19
- mcp_client_instances = [] # To keep track of client instances for proper closing
20
-
21
- DEFAULT_MCP_SERVERS = [
22
- {"name": "KokoroTTS (Example)", "type": "sse", "url": "https://fdaudens-kokoro-mcp.hf.space/gradio_api/mcp/sse"}
23
- ]
24
-
25
- def load_mcp_tools(server_configs_list):
26
- global mcp_tools_collection, mcp_client_instances
27
-
28
- # Close any existing client instances before loading new ones
29
- for client_instance in mcp_client_instances:
30
- try:
31
- client_instance.close()
32
- print(f"Closed existing MCP client: {client_instance}")
33
- except Exception as e:
34
- print(f"Error closing existing MCP client {client_instance}: {e}")
35
- mcp_client_instances = []
36
-
37
- all_discovered_tools = []
38
- if not server_configs_list:
39
- print("No MCP server configurations provided. Clearing MCP tools.")
40
- mcp_tools_collection = ToolCollection(tools=all_discovered_tools)
41
- return
42
-
43
- print(f"Loading MCP tools from {len(server_configs_list)} server configurations...")
44
- for config in server_configs_list:
45
- server_name = config.get('name', config.get('url', 'Unknown Server'))
46
- try:
47
- if config.get("type") == "sse":
48
- sse_url = config["url"]
49
- print(f"Attempting to connect to MCP SSE server: {server_name} at {sse_url}")
50
-
51
- # Using SmolMCPClient for SSE servers as shown in documentation
52
- # The constructor expects server_parameters={"url": sse_url}
53
- smol_mcp_client = SmolMCPClient(server_parameters={"url": sse_url})
54
- mcp_client_instances.append(smol_mcp_client) # Keep track to close later
55
-
56
- discovered_tools_from_server = smol_mcp_client.get_tools() # Returns a list of Tool objects
57
-
58
- if discovered_tools_from_server:
59
- all_discovered_tools.extend(list(discovered_tools_from_server))
60
- print(f"Discovered {len(discovered_tools_from_server)} tools from {server_name}.")
61
- else:
62
- print(f"No tools discovered from {server_name}.")
63
- # Add elif for "stdio" type if needed in the future, though it's more complex for Gradio apps
64
- else:
65
- print(f"Unsupported MCP server type '{config.get('type')}' for {server_name}. Skipping.")
66
- except Exception as e:
67
- print(f"Error loading MCP tools from {server_name}: {e}")
68
-
69
- mcp_tools_collection = ToolCollection(tools=all_discovered_tools)
70
- if mcp_tools_collection and len(mcp_tools_collection.tools) > 0:
71
- print(f"Successfully loaded a total of {len(mcp_tools_collection.tools)} MCP tools:")
72
- for tool in mcp_tools_collection.tools:
73
- print(f" - {tool.name}: {tool.description[:100]}...") # Print short description
74
- else:
75
- print("No MCP tools were loaded, or an error occurred.")
76
-
77
- def cleanup_mcp_client_instances_on_exit():
78
- global mcp_client_instances
79
- print("Attempting to clean up MCP client instances on application exit...")
80
- for client_instance in mcp_client_instances:
81
- try:
82
- client_instance.close()
83
- print(f"Closed MCP client: {client_instance}")
84
- except Exception as e:
85
- print(f"Error closing MCP client {client_instance} on exit: {e}")
86
- mcp_client_instances = []
87
- print("MCP client cleanup finished.")
88
-
89
- atexit.register(cleanup_mcp_client_instances_on_exit)
90
- # --- End MCP Client Integration ---
91
-
92
- # Function to encode image to base64 (remains the same)
93
  def encode_image(image_path):
94
  if not image_path:
95
  print("No image path provided")
@@ -97,14 +21,19 @@ def encode_image(image_path):
97
 
98
  try:
99
  print(f"Encoding image from path: {image_path}")
 
 
100
  if isinstance(image_path, Image.Image):
101
  image = image_path
102
  else:
 
103
  image = Image.open(image_path)
104
 
 
105
  if image.mode == 'RGBA':
106
  image = image.convert('RGB')
107
 
 
108
  buffered = io.BytesIO()
109
  image.save(buffered, format="JPEG")
110
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
@@ -114,10 +43,111 @@ def encode_image(image_path):
114
  print(f"Error encoding image: {e}")
115
  return None
116
 
117
- # Modified respond function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  def respond(
119
- message_input_text, # From multimodal textbox's text part
120
- image_files_list, # From multimodal textbox's files part
121
  history: list[tuple[str, str]],
122
  system_message,
123
  max_tokens,
@@ -128,314 +158,606 @@ def respond(
128
  provider,
129
  custom_api_key,
130
  custom_model,
131
- model_search_term, # Not directly used in this function but passed by UI
132
- selected_model # From radio
 
 
 
133
  ):
134
- global mcp_tools_collection # Access the loaded MCP tools
135
-
136
- print(f"Received message text: {message_input_text}")
137
- print(f"Received {len(image_files_list) if image_files_list else 0} images")
138
- # ... (keep other prints for debugging)
139
-
 
 
 
 
 
 
 
 
 
140
  token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
141
- hf_inference_client = InferenceClient(token=token_to_use, provider=provider)
 
 
 
 
 
 
 
142
  print(f"Hugging Face Inference Client initialized with {provider} provider.")
143
 
144
- if seed == -1: seed = None
 
 
145
 
146
- # --- Prepare current user message (potentially multimodal) ---
147
- current_user_content_parts = []
148
- if message_input_text and message_input_text.strip():
149
- current_user_content_parts.append({"type": "text", "text": message_input_text.strip()})
150
-
151
- if image_files_list:
152
- for img_path in image_files_list:
153
- if img_path: # img_path is the path to the uploaded file
154
- encoded_img = encode_image(img_path)
155
- if encoded_img:
156
- current_user_content_parts.append({
157
- "type": "image_url",
158
- "image_url": {"url": f"data:image/jpeg;base64,{encoded_img}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  })
160
-
161
- if not current_user_content_parts: # If message is truly empty
162
- print("Skipping empty message.")
163
- for item in history: yield item # hack to make gradio update with history
164
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
- # --- Construct messages for LLM ---
167
- llm_messages = [{"role": "system", "content": system_message}]
168
- for hist_user, hist_assistant in history:
169
- # Assuming history user part is already formatted (string or list of dicts)
170
- if hist_user:
171
- # Handle complex history items (tuples of text, list_of_image_paths)
172
- if isinstance(hist_user, tuple) and len(hist_user) == 2:
173
- hist_user_text, hist_user_images = hist_user
174
- hist_user_parts = []
175
- if hist_user_text: hist_user_parts.append({"type": "text", "text": hist_user_text})
176
- for img_p in hist_user_images:
177
- enc_img = encode_image(img_p)
178
- if enc_img: hist_user_parts.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{enc_img}"}})
179
- if hist_user_parts: llm_messages.append({"role": "user", "content": hist_user_parts})
180
- elif isinstance(hist_user, str): # Simple text history
181
- llm_messages.append({"role": "user", "content": hist_user})
182
- # else: could be already formatted list of dicts from previous multimodal turn
183
 
184
- if hist_assistant:
185
- llm_messages.append({"role": "assistant", "content": hist_assistant})
186
-
187
- llm_messages.append({"role": "user", "content": current_user_content_parts if len(current_user_content_parts) > 1 else current_user_content_parts[0] if current_user_content_parts else ""})
188
-
189
- model_to_use = custom_model.strip() if custom_model.strip() else selected_model
190
  print(f"Model selected for inference: {model_to_use}")
191
-
192
- # --- Agent Logic or Direct LLM Call ---
193
- active_mcp_tools = list(mcp_tools_collection.tools) if mcp_tools_collection else []
194
-
195
- if active_mcp_tools:
196
- print(f"MCP tools are active ({len(active_mcp_tools)} tools). Using CodeAgent.")
197
-
198
- # Wrapper for smolagents.CodeAgent to use our configured HF InferenceClient
199
- class HFClientWrapperForAgent:
200
- def __init__(self, hf_client, model_id, outer_scope_params):
201
- self.client = hf_client
202
- self.model_id = model_id
203
- self.params = outer_scope_params
204
-
205
- def generate(self, agent_llm_messages, tools=None, tool_choice=None, **kwargs):
206
- # agent_llm_messages is from the agent. tools/tool_choice also from agent.
207
- api_params = {
208
- "model": self.model_id,
209
- "messages": agent_llm_messages,
210
- "stream": False, # CodeAgent's .run() expects a full response object
211
- "max_tokens": self.params['max_tokens'],
212
- "temperature": self.params['temperature'],
213
- "top_p": self.params['top_p'],
214
- "frequency_penalty": self.params['frequency_penalty'],
215
- }
216
- if self.params['seed'] is not None: api_params["seed"] = self.params['seed']
217
- if tools: api_params["tools"] = tools
218
- if tool_choice: api_params["tool_choice"] = tool_choice
219
-
220
- print(f"Agent's HFClientWrapper calling LLM: {self.model_id}")
221
- completion = self.client.chat_completion(**api_params)
222
- return completion
223
-
224
- outer_scope_llm_params = {
225
- "max_tokens": max_tokens, "temperature": temperature, "top_p": top_p,
226
- "frequency_penalty": frequency_penalty, "seed": seed
227
- }
228
- agent_model_adapter = HFClientWrapperForAgent(hf_inference_client, model_to_use, outer_scope_llm_params)
229
-
230
- agent = CodeAgent(tools=active_mcp_tools, model=agent_model_adapter)
231
-
232
- # Prime agent with history (all messages except the current user query)
233
- agent.messages = llm_messages[:-1]
234
-
235
- # CodeAgent.run expects a string query. Extract text from current user message.
236
- current_query_for_agent = message_input_text.strip() if message_input_text else "User provided image(s)."
237
- if not current_query_for_agent and image_files_list: # If only image, provide a generic text
238
- current_query_for_agent = "Describe the image(s) or follow instructions related to them."
239
- elif not current_query_for_agent and not image_files_list: # Should not happen due to earlier check
240
- current_query_for_agent = "..."
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
- print(f"Query for CodeAgent.run: '{current_query_for_agent}' with {len(agent.messages)} history messages.")
244
- try:
245
- agent_final_text_response = agent.run(current_query_for_agent)
246
- # Note: agent.run() is blocking and returns the final string.
247
- # It won't stream token by token if tools are used.
248
- yield agent_final_text_response
249
- print("Completed response generation via CodeAgent.")
250
- except Exception as e:
251
- print(f"Error during CodeAgent execution: {e}")
252
- yield f"Error using tools: {str(e)}"
253
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
- else: # No MCP tools, use original streaming logic
256
- print("No MCP tools active. Proceeding with direct LLM call (streaming).")
257
- response_stream_content = ""
 
258
  try:
259
- stream = hf_inference_client.chat_completion(
260
- model=model_to_use,
261
- messages=llm_messages,
262
- stream=True,
263
- max_tokens=max_tokens, temperature=temperature, top_p=top_p,
264
- frequency_penalty=frequency_penalty, seed=seed
265
- )
266
- for chunk in stream:
267
- if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
268
- delta = chunk.choices[0].delta
269
- if hasattr(delta, 'content') and delta.content:
270
- token_text = delta.content
271
- response_stream_content += token_text
272
- yield response_stream_content
273
- print("\nCompleted streaming response generation.")
274
  except Exception as e:
275
- print(f"Error during direct LLM inference: {e}")
276
- yield response_stream_content + f"\nError: {str(e)}"
277
 
278
- # Function to validate provider (remains the same)
279
  def validate_provider(api_key, provider):
280
  if not api_key.strip() and provider != "hf-inference":
281
  return gr.update(value="hf-inference")
282
  return gr.update(value=provider)
283
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  # GRADIO UI
285
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
 
286
  chatbot = gr.Chatbot(
287
- label="Serverless TextGen Hub",
288
- height=600, show_copy_button=True,
289
- placeholder="Select a model, (optionally) load MCP Tools, and begin chatting.",
290
- layout="panel",
291
- bubble_full_width=False
292
  )
 
293
 
294
- msg_input_box = gr.MultimodalTextbox(
 
295
  placeholder="Type a message or upload images...",
296
- show_label=False, container=False, scale=12,
297
- file_types=["image"], file_count="multiple", sources=["upload"]
 
 
 
 
298
  )
299
 
 
300
  with gr.Accordion("Settings", open=False):
301
- system_message_box = gr.Textbox(value="You are a helpful AI assistant.", label="System Prompt")
302
- with gr.Row():
303
- # ... (max_tokens, temperature, top_p sliders remain the same)
304
- max_tokens_slider = gr.Slider(1, 4096, value=512, step=1, label="Max tokens")
305
- temperature_slider = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
306
- top_p_slider = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P")
 
 
307
  with gr.Row():
308
- # ... (frequency_penalty, seed sliders remain the same)
309
- frequency_penalty_slider = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty")
310
- seed_slider = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 for random)")
311
-
312
- providers_list = ["hf-inference", "cerebras", "together", "sambanova", "novita", "cohere", "fireworks-ai", "hyperbolic", "nebius"]
313
- provider_radio = gr.Radio(choices=providers_list, value="hf-inference", label="Inference Provider")
314
- byok_textbox = gr.Textbox(label="BYOK (Hugging Face API Key)", type="password", placeholder="Enter token if not using 'hf-inference'")
315
- custom_model_box = gr.Textbox(label="Custom Model ID", placeholder="org/model-name (overrides selection below)")
316
- model_search_box = gr.Textbox(label="Filter Featured Models", placeholder="Search...")
317
-
318
- models_list = [ # Keep your extensive model list
319
- "meta-llama/Llama-3.2-11B-Vision-Instruct", "meta-llama/Llama-3.3-70B-Instruct",
320
- # ... (include all your models) ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
  "microsoft/Phi-3-mini-4k-instruct",
322
  ]
323
- featured_model_radio = gr.Radio(label="Select a Featured Model", choices=models_list, value="meta-llama/Llama-3.2-11B-Vision-Instruct", interactive=True)
324
- gr.Markdown("[All Text models](https://huggingface.co/models?pipeline_tag=text-generation) | [All Multimodal models](https://huggingface.co/models?pipeline_tag=image-text-to-text)")
325
 
326
- # --- MCP Client Settings UI ---
327
- with gr.Accordion("MCP Client Settings (Connect to External Tools)", open=False):
328
- gr.Markdown("Configure connections to MCP Servers to allow the LLM to use external tools. The LLM will decide when to use these tools based on your prompts.")
329
- mcp_server_config_input = gr.Textbox(
330
- label="MCP Server Configurations (JSON Array)",
331
- info='Example: [{"name": "MyToolServer", "type": "sse", "url": "http://server_url/gradio_api/mcp/sse"}]',
332
- lines=3,
333
- placeholder='Enter a JSON list of server configurations here.',
334
- value=json.dumps(DEFAULT_MCP_SERVERS, indent=2) # Pre-fill with defaults
335
  )
336
- mcp_load_status_display = gr.Textbox(label="MCP Load Status", interactive=False)
337
- load_mcp_tools_btn = gr.Button("Load/Reload MCP Tools")
338
 
339
- def handle_load_mcp_tools_click(config_str_from_ui):
340
- if not config_str_from_ui:
341
- load_mcp_tools([]) # Clear tools if config is empty
342
- return "MCP tool loading attempted with empty config. Tools cleared."
343
- try:
344
- parsed_configs = json.loads(config_str_from_ui)
345
- if not isinstance(parsed_configs, list):
346
- return "Error: MCP configuration must be a valid JSON list."
347
- load_mcp_tools(parsed_configs) # Call the main loading function
348
-
349
- if mcp_tools_collection and len(mcp_tools_collection.tools) > 0:
350
- loaded_tool_names = [t.name for t in mcp_tools_collection.tools]
351
- return f"Successfully loaded {len(loaded_tool_names)} MCP tools: {', '.join(loaded_tool_names)}"
352
- else:
353
- return "No MCP tools loaded, or an error occurred during loading. Check console for details."
354
- except json.JSONDecodeError:
355
- return "Error: Invalid JSON format in MCP server configurations."
356
- except Exception as e:
357
- print(f"Unhandled error in handle_load_mcp_tools_click: {e}")
358
- return f"Error loading MCP tools: {str(e)}. Check console."
359
-
360
- load_mcp_tools_btn.click(
361
- handle_load_mcp_tools_click,
362
- inputs=[mcp_server_config_input],
363
- outputs=mcp_load_status_display
364
  )
365
- # --- End MCP Client Settings UI ---
366
-
367
- # Chat history state (remains the same)
368
- # chat_history = gr.State([]) # Not explicitly used if chatbot manages history directly
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
 
370
- # Function to filter models (remains the same)
 
 
 
 
 
 
 
 
 
 
371
  def filter_models(search_term):
372
- return gr.update(choices=[m for m in models_list if search_term.lower() in m.lower()])
 
 
 
373
 
374
- # Function to set custom model from radio (remains the same)
375
  def set_custom_model_from_radio(selected):
376
- return selected # Updates custom_model_box with the selected featured model
 
377
 
378
- # Gradio's MultimodalTextbox submit action
379
- # The `user` function is simplified as msg_input_box directly gives text and files
380
- # The `bot` function is where the main logic of `respond` is called.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
 
382
- def handle_submit(msg_content_dict, current_chat_history):
383
- # msg_content_dict = {"text": "...", "files": ["path1", "path2"]}
384
- text = msg_content_dict.get("text", "")
385
- files = msg_content_dict.get("files", [])
386
-
387
- # Add user message to history for display
388
- # For multimodal, we might want to display text and images separately or combined
389
- user_display_entry = []
390
- if text:
391
- user_display_entry.append(text)
392
- if files:
393
- # For display, Gradio chatbot can render markdown images
394
- for f_path in files:
395
- user_display_entry.append(f"![{os.path.basename(f_path)}]({f_path})")
396
-
397
- # Construct a representation for history that `respond` can unpack
398
- # For simplicity, let's pass text and files separately to `respond`
399
- # and the history will store the user input as (text, files_list_for_display)
400
-
401
- history_entry_user_part = (text, files) # Store as tuple for `respond` to process easily later
402
- current_chat_history.append([history_entry_user_part, None]) # Add user part, assistant is None for now
403
-
404
- # Prepare for streaming response
405
- # The `respond` function is a generator
406
- assistant_response_accumulator = ""
407
- for streamed_chunk in respond(
408
- text, files,
409
- current_chat_history[:-1], # Pass history *before* current turn
410
- system_message_box.value, max_tokens_slider.value, temperature_slider.value,
411
- top_p_slider.value, frequency_penalty_slider.value, seed_slider.value,
412
- provider_radio.value, byok_textbox.value, custom_model_box.value,
413
- model_search_box.value, featured_model_radio.value
414
- ):
415
- assistant_response_accumulator = streamed_chunk
416
- current_chat_history[-1][1] = assistant_response_accumulator # Update last assistant message
417
- yield current_chat_history, {"text": "", "files": []} # Update chatbot, clear input
418
-
419
- # Final update after stream (already done by last yield)
420
- # yield current_chat_history, {"text": "", "files": []}
421
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
 
423
- msg_input_box.submit(
424
- handle_submit,
425
- [msg_input_box, chatbot],
426
- [chatbot, msg_input_box] # Output to chatbot and clear msg_input_box
 
427
  )
 
428
 
429
- model_search_box.change(filter_models, model_search_box, featured_model_radio)
430
- featured_model_radio.change(set_custom_model_from_radio, featured_model_radio, custom_model_box)
431
- byok_textbox.change(validate_provider, [byok_textbox, provider_radio], provider_radio)
432
- provider_radio.change(validate_provider, [byok_textbox, provider_radio], provider_radio)
 
 
 
433
 
434
- # Load default MCP tools on startup
435
- load_mcp_tools(DEFAULT_MCP_SERVERS)
436
- print(f"Initial MCP tools loaded: {len(mcp_tools_collection.tools) if mcp_tools_collection else 0} tools.")
 
 
 
 
437
 
438
  print("Gradio interface initialized.")
 
439
  if __name__ == "__main__":
440
- print("Launching the Serverless TextGen Hub demo application.")
441
- demo.launch(show_api=False) # show_api can be True if needed for other purposes
 
5
  import base64
6
  from PIL import Image
7
  import io
8
+ import requests
9
+ from mcp.client.sse import SSEServerParameters
10
+ from mcp.jsonrpc.client import JsonRpcClient
11
+ from mcp.client.base import ServerCapabilities
 
12
 
13
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
14
  print("Access token loaded.")
15
 
16
+ # Function to encode image to base64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def encode_image(image_path):
18
  if not image_path:
19
  print("No image path provided")
 
21
 
22
  try:
23
  print(f"Encoding image from path: {image_path}")
24
+
25
+ # If it's already a PIL Image
26
  if isinstance(image_path, Image.Image):
27
  image = image_path
28
  else:
29
+ # Try to open the image file
30
  image = Image.open(image_path)
31
 
32
+ # Convert to RGB if image has an alpha channel (RGBA)
33
  if image.mode == 'RGBA':
34
  image = image.convert('RGB')
35
 
36
+ # Encode to base64
37
  buffered = io.BytesIO()
38
  image.save(buffered, format="JPEG")
39
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
 
43
  print(f"Error encoding image: {e}")
44
  return None
45
 
46
+ # MCP Client class for handling MCP server connections
47
+ class MCPClient:
48
+ def __init__(self, url):
49
+ self.url = url
50
+ self.client = None
51
+ self.capabilities = None
52
+ self.tools = None
53
+
54
+ def connect(self):
55
+ try:
56
+ # Connect to the MCP server using SSE
57
+ server_params = SSEServerParameters(url=self.url)
58
+ self.client = JsonRpcClient(server_params)
59
+ self.client.connect()
60
+
61
+ # Get server capabilities
62
+ self.capabilities = ServerCapabilities(self.client)
63
+
64
+ # List available tools
65
+ self.tools = self.capabilities.list_tools()
66
+ print(f"Connected to MCP Server. Available tools: {[tool.name for tool in self.tools]}")
67
+ return True
68
+ except Exception as e:
69
+ print(f"Error connecting to MCP server: {e}")
70
+ return False
71
+
72
+ def call_tool(self, tool_name, **kwargs):
73
+ if not self.client or not self.tools:
74
+ print("MCP client not initialized or no tools available")
75
+ return None
76
+
77
+ # Find the tool with the given name
78
+ tool = next((t for t in self.tools if t.name == tool_name), None)
79
+ if not tool:
80
+ print(f"Tool '{tool_name}' not found")
81
+ return None
82
+
83
+ try:
84
+ # Call the tool with the given arguments
85
+ result = self.client.call_method("tools/call", {"name": tool_name, "arguments": kwargs})
86
+ return result
87
+ except Exception as e:
88
+ print(f"Error calling tool '{tool_name}': {e}")
89
+ return None
90
+
91
+ def close(self):
92
+ if self.client:
93
+ try:
94
+ self.client.close()
95
+ print("MCP client connection closed")
96
+ except Exception as e:
97
+ print(f"Error closing MCP client connection: {e}")
98
+
99
+ # Function to convert text to audio using Kokoro MCP server
100
+ def text_to_audio(text, speed=1.0, mcp_url=None):
101
+ """Convert text to audio using Kokoro MCP server if available.
102
+
103
+ Args:
104
+ text (str): Text to convert to speech
105
+ speed (float): Speed multiplier for speech
106
+ mcp_url (str): URL of the Kokoro MCP server
107
+
108
+ Returns:
109
+ tuple: (sample_rate, audio_array) or None if conversion fails
110
+ """
111
+ if not text or not mcp_url:
112
+ return None
113
+
114
+ try:
115
+ # Connect to MCP server
116
+ mcp_client = MCPClient(mcp_url)
117
+ if not mcp_client.connect():
118
+ return None
119
+
120
+ # Call the text_to_audio tool
121
+ result = mcp_client.call_tool("text_to_audio", text=text, speed=speed)
122
+ mcp_client.close()
123
+
124
+ if not result:
125
+ return None
126
+
127
+ # Process the result - convert base64 audio to numpy array
128
+ import numpy as np
129
+ import base64
130
+
131
+ # Assuming the result contains base64-encoded WAV data
132
+ audio_b64 = result
133
+ audio_data = base64.b64decode(audio_b64)
134
+
135
+ # Convert to numpy array - this is simplified and may need adjustment
136
+ # based on the actual output format from the Kokoro MCP server
137
+ import io
138
+ import soundfile as sf
139
+
140
+ audio_io = io.BytesIO(audio_data)
141
+ audio_array, sample_rate = sf.read(audio_io)
142
+
143
+ return (sample_rate, audio_array)
144
+ except Exception as e:
145
+ print(f"Error converting text to audio: {e}")
146
+ return None
147
+
148
  def respond(
149
+ message,
150
+ image_files,
151
  history: list[tuple[str, str]],
152
  system_message,
153
  max_tokens,
 
158
  provider,
159
  custom_api_key,
160
  custom_model,
161
+ model_search_term,
162
+ selected_model,
163
+ mcp_server_url=None,
164
+ tts_enabled=False,
165
+ tts_speed=1.0
166
  ):
167
+ print(f"Received message: {message}")
168
+ print(f"Received {len(image_files) if image_files else 0} images")
169
+ print(f"History: {history}")
170
+ print(f"System message: {system_message}")
171
+ print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
172
+ print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
173
+ print(f"Selected provider: {provider}")
174
+ print(f"Custom API Key provided: {bool(custom_api_key.strip())}")
175
+ print(f"Selected model (custom_model): {custom_model}")
176
+ print(f"Model search term: {model_search_term}")
177
+ print(f"Selected model from radio: {selected_model}")
178
+ print(f"MCP Server URL: {mcp_server_url}")
179
+ print(f"TTS Enabled: {tts_enabled}")
180
+
181
+ # Determine which token to use
182
  token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
183
+
184
+ if custom_api_key.strip() != "":
185
+ print("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication")
186
+ else:
187
+ print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication")
188
+
189
+ # Initialize the Inference Client with the provider and appropriate token
190
+ client = InferenceClient(token=token_to_use, provider=provider)
191
  print(f"Hugging Face Inference Client initialized with {provider} provider.")
192
 
193
+ # Convert seed to None if -1 (meaning random)
194
+ if seed == -1:
195
+ seed = None
196
 
197
+ # Create multimodal content if images are present
198
+ if image_files and len(image_files) > 0:
199
+ # Process the user message to include images
200
+ user_content = []
201
+
202
+ # Add text part if there is any
203
+ if message and message.strip():
204
+ user_content.append({
205
+ "type": "text",
206
+ "text": message
207
+ })
208
+
209
+ # Add image parts
210
+ for img in image_files:
211
+ if img is not None:
212
+ # Get raw image data from path
213
+ try:
214
+ encoded_image = encode_image(img)
215
+ if encoded_image:
216
+ user_content.append({
217
+ "type": "image_url",
218
+ "image_url": {
219
+ "url": f"data:image/jpeg;base64,{encoded_image}"
220
+ }
221
+ })
222
+ except Exception as e:
223
+ print(f"Error encoding image: {e}")
224
+ else:
225
+ # Text-only message
226
+ user_content = message
227
+
228
+ # Prepare messages in the format expected by the API
229
+ messages = [{"role": "system", "content": system_message}]
230
+ print("Initial messages array constructed.")
231
+
232
+ # Add conversation history to the context
233
+ for val in history:
234
+ user_part = val[0]
235
+ assistant_part = val[1]
236
+ if user_part:
237
+ # Handle both text-only and multimodal messages in history
238
+ if isinstance(user_part, tuple) and len(user_part) == 2:
239
+ # This is a multimodal message with text and images
240
+ history_content = []
241
+ if user_part[0]: # Text
242
+ history_content.append({
243
+ "type": "text",
244
+ "text": user_part[0]
245
  })
246
+
247
+ for img in user_part[1]: # Images
248
+ if img:
249
+ try:
250
+ encoded_img = encode_image(img)
251
+ if encoded_img:
252
+ history_content.append({
253
+ "type": "image_url",
254
+ "image_url": {
255
+ "url": f"data:image/jpeg;base64,{encoded_img}"
256
+ }
257
+ })
258
+ except Exception as e:
259
+ print(f"Error encoding history image: {e}")
260
+
261
+ messages.append({"role": "user", "content": history_content})
262
+ else:
263
+ # Regular text message
264
+ messages.append({"role": "user", "content": user_part})
265
+ print(f"Added user message to context (type: {type(user_part)})")
266
+
267
+ if assistant_part:
268
+ messages.append({"role": "assistant", "content": assistant_part})
269
+ print(f"Added assistant message to context: {assistant_part}")
270
 
271
+ # Append the latest user message
272
+ messages.append({"role": "user", "content": user_content})
273
+ print(f"Latest user message appended (content type: {type(user_content)})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
+ # Determine which model to use, prioritizing custom_model if provided
276
+ model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
 
 
 
 
277
  print(f"Model selected for inference: {model_to_use}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
+ # Start with an empty string to build the response as tokens stream in
280
+ response = ""
281
+ print(f"Sending request to {provider} provider.")
282
+
283
+ # Prepare parameters for the chat completion request
284
+ parameters = {
285
+ "max_tokens": max_tokens,
286
+ "temperature": temperature,
287
+ "top_p": top_p,
288
+ "frequency_penalty": frequency_penalty,
289
+ }
290
+
291
+ if seed is not None:
292
+ parameters["seed"] = seed
293
 
294
+ # Use the InferenceClient for making the request
295
+ try:
296
+ # Create a generator for the streaming response
297
+ stream = client.chat_completion(
298
+ model=model_to_use,
299
+ messages=messages,
300
+ stream=True,
301
+ **parameters
302
+ )
303
+
304
+ print("Received tokens: ", end="", flush=True)
305
+
306
+ # Process the streaming response
307
+ for chunk in stream:
308
+ if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
309
+ # Extract the content from the response
310
+ if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
311
+ token_text = chunk.choices[0].delta.content
312
+ if token_text:
313
+ print(token_text, end="", flush=True)
314
+ response += token_text
315
+ yield response
316
+
317
+ print()
318
+ except Exception as e:
319
+ print(f"Error during inference: {e}")
320
+ response += f"\nError: {str(e)}"
321
+ yield response
322
 
323
+ print("Completed response generation.")
324
+
325
+ # If TTS is enabled and we have a valid MCP server URL, convert response to audio
326
+ if tts_enabled and mcp_server_url and response:
327
  try:
328
+ print(f"Converting response to audio using MCP server: {mcp_server_url}")
329
+ audio_data = text_to_audio(response, tts_speed, mcp_server_url)
330
+ if audio_data:
331
+ # Here we would need to handle returning both text and audio
332
+ # This would require modifying the Gradio interface to support this
333
+ print("Successfully converted text to audio")
334
+ # For now, we'll just return the text response
 
 
 
 
 
 
 
 
335
  except Exception as e:
336
+ print(f"Error converting text to audio: {e}")
 
337
 
338
+ # Function to validate provider selection based on BYOK
339
  def validate_provider(api_key, provider):
340
  if not api_key.strip() and provider != "hf-inference":
341
  return gr.update(value="hf-inference")
342
  return gr.update(value=provider)
343
 
344
+ # Function to test MCP server connection
345
+ def test_mcp_connection(mcp_url):
346
+ if not mcp_url or not mcp_url.strip():
347
+ return "Please enter an MCP server URL"
348
+
349
+ try:
350
+ mcp_client = MCPClient(mcp_url)
351
+ if mcp_client.connect():
352
+ tools = [tool.name for tool in mcp_client.tools]
353
+ mcp_client.close()
354
+ return f"Successfully connected to MCP server. Available tools: {', '.join(tools)}"
355
+ else:
356
+ return "Failed to connect to MCP server"
357
+ except Exception as e:
358
+ return f"Error connecting to MCP server: {str(e)}"
359
+
360
  # GRADIO UI
361
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
362
+ # Create the chatbot component
363
  chatbot = gr.Chatbot(
364
+ height=600,
365
+ show_copy_button=True,
366
+ placeholder="Select a model and begin chatting. Now supports multiple inference providers and multimodal inputs",
367
+ layout="panel"
 
368
  )
369
+ print("Chatbot interface created.")
370
 
371
+ # Multimodal textbox for messages (combines text and file uploads)
372
+ msg = gr.MultimodalTextbox(
373
  placeholder="Type a message or upload images...",
374
+ show_label=False,
375
+ container=False,
376
+ scale=12,
377
+ file_types=["image"],
378
+ file_count="multiple",
379
+ sources=["upload"]
380
  )
381
 
382
+ # Create accordion for settings
383
  with gr.Accordion("Settings", open=False):
384
+ # System message
385
+ system_message_box = gr.Textbox(
386
+ value="You are a helpful AI assistant that can understand images and text.",
387
+ placeholder="You are a helpful assistant.",
388
+ label="System Prompt"
389
+ )
390
+
391
+ # Generation parameters
392
  with gr.Row():
393
+ with gr.Column():
394
+ max_tokens_slider = gr.Slider(
395
+ minimum=1,
396
+ maximum=4096,
397
+ value=512,
398
+ step=1,
399
+ label="Max tokens"
400
+ )
401
+
402
+ temperature_slider = gr.Slider(
403
+ minimum=0.1,
404
+ maximum=4.0,
405
+ value=0.7,
406
+ step=0.1,
407
+ label="Temperature"
408
+ )
409
+
410
+ top_p_slider = gr.Slider(
411
+ minimum=0.1,
412
+ maximum=1.0,
413
+ value=0.95,
414
+ step=0.05,
415
+ label="Top-P"
416
+ )
417
+
418
+ with gr.Column():
419
+ frequency_penalty_slider = gr.Slider(
420
+ minimum=-2.0,
421
+ maximum=2.0,
422
+ value=0.0,
423
+ step=0.1,
424
+ label="Frequency Penalty"
425
+ )
426
+
427
+ seed_slider = gr.Slider(
428
+ minimum=-1,
429
+ maximum=65535,
430
+ value=-1,
431
+ step=1,
432
+ label="Seed (-1 for random)"
433
+ )
434
+
435
+ # Provider selection
436
+ providers_list = [
437
+ "hf-inference", # Default Hugging Face Inference
438
+ "cerebras", # Cerebras provider
439
+ "together", # Together AI
440
+ "sambanova", # SambaNova
441
+ "novita", # Novita AI
442
+ "cohere", # Cohere
443
+ "fireworks-ai", # Fireworks AI
444
+ "hyperbolic", # Hyperbolic
445
+ "nebius", # Nebius
446
+ ]
447
+
448
+ provider_radio = gr.Radio(
449
+ choices=providers_list,
450
+ value="hf-inference",
451
+ label="Inference Provider",
452
+ )
453
+
454
+ # New BYOK textbox
455
+ byok_textbox = gr.Textbox(
456
+ value="",
457
+ label="BYOK (Bring Your Own Key)",
458
+ info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
459
+ placeholder="Enter your Hugging Face API token",
460
+ type="password" # Hide the API key for security
461
+ )
462
+
463
+ # Custom model box
464
+ custom_model_box = gr.Textbox(
465
+ value="",
466
+ label="Custom Model",
467
+ info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
468
+ placeholder="meta-llama/Llama-3.3-70B-Instruct"
469
+ )
470
+
471
+ # Model search
472
+ model_search_box = gr.Textbox(
473
+ label="Filter Models",
474
+ placeholder="Search for a featured model...",
475
+ lines=1
476
+ )
477
+
478
+ # Featured models list
479
+ # Updated to include multimodal models
480
+ models_list = [
481
+ "meta-llama/Llama-3.2-11B-Vision-Instruct",
482
+ "meta-llama/Llama-3.3-70B-Instruct",
483
+ "meta-llama/Llama-3.1-70B-Instruct",
484
+ "meta-llama/Llama-3.0-70B-Instruct",
485
+ "meta-llama/Llama-3.2-3B-Instruct",
486
+ "meta-llama/Llama-3.2-1B-Instruct",
487
+ "meta-llama/Llama-3.1-8B-Instruct",
488
+ "NousResearch/Hermes-3-Llama-3.1-8B",
489
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
490
+ "mistralai/Mistral-Nemo-Instruct-2407",
491
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
492
+ "mistralai/Mistral-7B-Instruct-v0.3",
493
+ "mistralai/Mistral-7B-Instruct-v0.2",
494
+ "Qwen/Qwen3-235B-A22B",
495
+ "Qwen/Qwen3-32B",
496
+ "Qwen/Qwen2.5-72B-Instruct",
497
+ "Qwen/Qwen2.5-3B-Instruct",
498
+ "Qwen/Qwen2.5-0.5B-Instruct",
499
+ "Qwen/QwQ-32B",
500
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
501
+ "microsoft/Phi-3.5-mini-instruct",
502
+ "microsoft/Phi-3-mini-128k-instruct",
503
  "microsoft/Phi-3-mini-4k-instruct",
504
  ]
 
 
505
 
506
+ featured_model_radio = gr.Radio(
507
+ label="Select a model below",
508
+ choices=models_list,
509
+ value="meta-llama/Llama-3.2-11B-Vision-Instruct", # Default to a multimodal model
510
+ interactive=True
 
 
 
 
511
  )
 
 
512
 
513
+ gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
514
+
515
+ # New Accordion for MCP Settings
516
+ with gr.Accordion("MCP Server Settings", open=False):
517
+ mcp_server_url = gr.Textbox(
518
+ value="",
519
+ label="MCP Server URL",
520
+ info="Enter the URL of an MCP server to connect to (e.g., https://example-kokoro-mcp.hf.space/gradio_api/mcp/sse)",
521
+ placeholder="https://fdaudens-kokoro-mcp.hf.space/gradio_api/mcp/sse"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
  )
523
+
524
+ test_connection_btn = gr.Button("Test Connection")
525
+ connection_status = gr.Textbox(
526
+ label="Connection Status",
527
+ interactive=False
528
+ )
529
+
530
+ tts_enabled = gr.Checkbox(
531
+ label="Enable Text-to-Speech",
532
+ value=False,
533
+ info="Convert AI responses to speech using the Kokoro TTS service"
534
+ )
535
+
536
+ tts_speed = gr.Slider(
537
+ minimum=0.5,
538
+ maximum=2.0,
539
+ value=1.0,
540
+ step=0.1,
541
+ label="Speech Speed"
542
+ )
543
+
544
+ gr.Markdown("""
545
+ ### About MCP Support
546
+
547
+ This app can connect to Model Context Protocol (MCP) servers to extend its capabilities.
548
+
549
+ For example, connecting to a Kokoro MCP server allows for text-to-speech conversion.
550
+
551
+ To use this feature:
552
+ 1. Enter the MCP server URL
553
+ 2. Test the connection
554
+ 3. Enable the desired features (e.g., TTS)
555
+ 4. Chat normally with the AI
556
+
557
+ Note: TTS functionality requires an active connection to a Kokoro MCP server.
558
+ """)
559
 
560
+ # Chat history state
561
+ chat_history = gr.State([])
562
+
563
+ # Connect the test connection button
564
+ test_connection_btn.click(
565
+ fn=test_mcp_connection,
566
+ inputs=[mcp_server_url],
567
+ outputs=[connection_status]
568
+ )
569
+
570
+ # Function to filter models
571
  def filter_models(search_term):
572
+ print(f"Filtering models with search term: {search_term}")
573
+ filtered = [m for m in models_list if search_term.lower() in m.lower()]
574
+ print(f"Filtered models: {filtered}")
575
+ return gr.update(choices=filtered)
576
 
577
+ # Function to set custom model from radio
578
  def set_custom_model_from_radio(selected):
579
+ print(f"Featured model selected: {selected}")
580
+ return selected
581
 
582
+ # Function for the chat interface
583
+ def user(user_message, history):
584
+ # Debug logging for troubleshooting
585
+ print(f"User message received: {user_message}")
586
+
587
+ # Skip if message is empty (no text and no files)
588
+ if not user_message or (not user_message.get("text") and not user_message.get("files")):
589
+ print("Empty message, skipping")
590
+ return history
591
+
592
+ # Prepare multimodal message format
593
+ text_content = user_message.get("text", "").strip()
594
+ files = user_message.get("files", [])
595
+
596
+ print(f"Text content: {text_content}")
597
+ print(f"Files: {files}")
598
+
599
+ # If both text and files are empty, skip
600
+ if not text_content and not files:
601
+ print("No content to display")
602
+ return history
603
+
604
+ # Add message with images to history
605
+ if files and len(files) > 0:
606
+ # Add text message first if it exists
607
+ if text_content:
608
+ # Add a separate text message
609
+ print(f"Adding text message: {text_content}")
610
+ history.append([text_content, None])
611
+
612
+ # Then add each image file separately
613
+ for file_path in files:
614
+ if file_path and isinstance(file_path, str):
615
+ print(f"Adding image: {file_path}")
616
+ # Add image as a separate message with no text
617
+ history.append([f"![Image]({file_path})", None])
618
+
619
+ return history
620
+ else:
621
+ # For text-only messages
622
+ print(f"Adding text-only message: {text_content}")
623
+ history.append([text_content, None])
624
+ return history
625
 
626
+ # Define bot response function
627
+ def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model, mcp_url, tts_on, tts_spd):
628
+ # Check if history is valid
629
+ if not history or len(history) == 0:
630
+ print("No history to process")
631
+ return history
632
+
633
+ # Get the most recent message and detect if it's an image
634
+ user_message = history[-1][0]
635
+ print(f"Processing user message: {user_message}")
636
+
637
+ is_image = False
638
+ image_path = None
639
+ text_content = user_message
640
+
641
+ # Check if this is an image message (marked with ![Image])
642
+ if isinstance(user_message, str) and user_message.startswith("![Image]("):
643
+ is_image = True
644
+ # Extract image path from markdown format ![Image](path)
645
+ image_path = user_message.replace("![Image](", "").replace(")", "")
646
+ print(f"Image detected: {image_path}")
647
+ text_content = "" # No text for image-only messages
648
+
649
+ # Look back for text context if this is an image
650
+ text_context = ""
651
+ if is_image and len(history) > 1:
652
+ # Use the previous message as context if it's text
653
+ prev_message = history[-2][0]
654
+ if isinstance(prev_message, str) and not prev_message.startswith("![Image]("):
655
+ text_context = prev_message
656
+ print(f"Using text context from previous message: {text_context}")
657
+
658
+ # Process message through respond function
659
+ history[-1][1] = ""
660
+
661
+ # Use either the image or text for the API
662
+ if is_image:
663
+ # For image messages
664
+ for response in respond(
665
+ text_context, # Text context from previous message if any
666
+ [image_path], # Current image
667
+ history[:-1], # Previous history
668
+ system_msg,
669
+ max_tokens,
670
+ temperature,
671
+ top_p,
672
+ freq_penalty,
673
+ seed,
674
+ provider,
675
+ api_key,
676
+ custom_model,
677
+ search_term,
678
+ selected_model,
679
+ mcp_url,
680
+ tts_on,
681
+ tts_spd
682
+ ):
683
+ history[-1][1] = response
684
+ yield history
685
+ else:
686
+ # For text-only messages
687
+ for response in respond(
688
+ text_content, # Text message
689
+ None, # No image
690
+ history[:-1], # Previous history
691
+ system_msg,
692
+ max_tokens,
693
+ temperature,
694
+ top_p,
695
+ freq_penalty,
696
+ seed,
697
+ provider,
698
+ api_key,
699
+ custom_model,
700
+ search_term,
701
+ selected_model,
702
+ mcp_url,
703
+ tts_on,
704
+ tts_spd
705
+ ):
706
+ history[-1][1] = response
707
+ yield history
708
+
709
+ # Event handlers - only using the MultimodalTextbox's built-in submit functionality
710
+ msg.submit(
711
+ user,
712
+ [msg, chatbot],
713
+ [chatbot],
714
+ queue=False
715
+ ).then(
716
+ bot,
717
+ [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
718
+ frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
719
+ model_search_box, featured_model_radio, mcp_server_url, tts_enabled, tts_speed],
720
+ [chatbot]
721
+ ).then(
722
+ lambda: {"text": "", "files": []}, # Clear inputs after submission
723
+ None,
724
+ [msg]
725
+ )
726
+
727
+ # Connect the model filter to update the radio choices
728
+ model_search_box.change(
729
+ fn=filter_models,
730
+ inputs=model_search_box,
731
+ outputs=featured_model_radio
732
+ )
733
+ print("Model search box change event linked.")
734
 
735
+ # Connect the featured model radio to update the custom model box
736
+ featured_model_radio.change(
737
+ fn=set_custom_model_from_radio,
738
+ inputs=featured_model_radio,
739
+ outputs=custom_model_box
740
  )
741
+ print("Featured model radio button change event linked.")
742
 
743
+ # Connect the BYOK textbox to validate provider selection
744
+ byok_textbox.change(
745
+ fn=validate_provider,
746
+ inputs=[byok_textbox, provider_radio],
747
+ outputs=provider_radio
748
+ )
749
+ print("BYOK textbox change event linked.")
750
 
751
+ # Also validate provider when the radio changes to ensure consistency
752
+ provider_radio.change(
753
+ fn=validate_provider,
754
+ inputs=[byok_textbox, provider_radio],
755
+ outputs=provider_radio
756
+ )
757
+ print("Provider radio button change event linked.")
758
 
759
  print("Gradio interface initialized.")
760
+
761
  if __name__ == "__main__":
762
+ print("Launching the demo application.")
763
+ demo.launch(show_api=True)