Nymbo commited on
Commit
81286e1
·
verified ·
1 Parent(s): cb919f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +810 -292
app.py CHANGED
@@ -5,89 +5,191 @@ import json
5
  import base64
6
  from PIL import Image
7
  import io
8
- import atexit
9
-
10
- from smolagents import ToolCollection, CodeAgent
11
- from smolagents.mcp_client import MCPClient as SmolMCPClient
12
 
13
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
14
  print("Access token loaded.")
15
 
16
- mcp_tools_collection = ToolCollection(tools=[])
17
- mcp_client_instances = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- DEFAULT_MCP_SERVERS = [
20
- {"name": "KokoroTTS (Example)", "type": "sse", "url": "https://fdaudens-kokoro-mcp.hf.space/gradio_api/mcp/sse"}
21
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- def load_mcp_tools(server_configs_list):
24
- global mcp_tools_collection, mcp_client_instances
 
 
25
 
26
- # No explicit close for SmolMCPClient instances as it's not available directly
27
- # Rely on script termination or GC for now.
28
- # If you were using ToolCollection per server: tc.close() would be the way.
29
- print(f"Clearing {len(mcp_client_instances)} previous MCP client instance references.")
30
- mcp_client_instances = [] # Clear references; old objects will be GC'd if not referenced elsewhere
31
 
32
- all_discovered_tools = []
33
- if not server_configs_list:
34
- print("No MCP server configurations provided. Clearing MCP tools.")
35
- mcp_tools_collection = ToolCollection(tools=all_discovered_tools)
36
- return
37
-
38
- print(f"Loading MCP tools from {len(server_configs_list)} server configurations...")
39
- for config in server_configs_list:
40
- server_name = config.get('name', config.get('url', 'Unknown Server'))
41
- try:
42
- if config.get("type") == "sse":
43
- sse_url = config["url"]
44
- print(f"Attempting to connect to MCP SSE server: {server_name} at {sse_url}")
45
- smol_mcp_client = SmolMCPClient(server_parameters={"url": sse_url})
46
- mcp_client_instances.append(smol_mcp_client)
47
- discovered_tools_from_server = smol_mcp_client.get_tools()
48
- if discovered_tools_from_server:
49
- all_discovered_tools.extend(list(discovered_tools_from_server))
50
- print(f"Discovered {len(discovered_tools_from_server)} tools from {server_name}.")
51
- else:
52
- print(f"No tools discovered from {server_name}.")
53
- else:
54
- print(f"Unsupported MCP server type '{config.get('type')}' for {server_name}. Skipping.")
55
- except Exception as e:
56
- print(f"Error loading MCP tools from {server_name}: {e}")
57
 
58
- mcp_tools_collection = ToolCollection(tools=all_discovered_tools)
59
- if mcp_tools_collection and len(mcp_tools_collection.tools) > 0:
60
- print(f"Successfully loaded a total of {len(mcp_tools_collection.tools)} MCP tools:")
61
- for tool in mcp_tools_collection.tools:
62
- print(f" - {tool.name}: {tool.description[:100]}...")
63
- else:
64
- print("No MCP tools were loaded, or an error occurred.")
65
 
66
- def cleanup_mcp_client_instances_on_exit():
67
- global mcp_client_instances
68
- print("Attempting to clear MCP client instance references on application exit...")
69
- # No explicit close called here as per previous fix
70
- mcp_client_instances = []
71
- print("MCP client instance reference cleanup finished.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- atexit.register(cleanup_mcp_client_instances_on_exit)
 
 
 
 
74
 
75
- def encode_image(image_path):
76
- if not image_path: return None
 
 
 
 
 
 
77
  try:
78
- image = Image.open(image_path) if not isinstance(image_path, Image.Image) else image_path
79
- if image.mode == 'RGBA': image = image.convert('RGB')
80
- buffered = io.BytesIO()
81
- image.save(buffered, format="JPEG")
82
- return base64.b64encode(buffered.getvalue()).decode("utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  except Exception as e:
84
- print(f"Error encoding image {image_path}: {e}")
85
- return None
86
 
87
  def respond(
88
- message_input_text,
89
- image_files_list,
90
- history: list[tuple[str, str]], # history will be list of (user_str_display, assistant_str_display)
91
  system_message,
92
  max_tokens,
93
  temperature,
@@ -98,256 +200,672 @@ def respond(
98
  custom_api_key,
99
  custom_model,
100
  model_search_term,
101
- selected_model
 
 
 
102
  ):
103
- global mcp_tools_collection
104
- print(f"Respond: Text='{message_input_text}', Images={len(image_files_list) if image_files_list else 0}")
105
-
106
- token_to_use = custom_api_key if custom_api_key.strip() else ACCESS_TOKEN
107
- hf_inference_client = InferenceClient(token=token_to_use, provider=provider)
108
- if seed == -1: seed = None
109
-
110
- current_user_content_parts = []
111
- if message_input_text and message_input_text.strip():
112
- current_user_content_parts.append({"type": "text", "text": message_input_text.strip()})
113
- if image_files_list:
114
- for img_path in image_files_list:
115
- encoded_img = encode_image(img_path)
116
- if encoded_img:
117
- current_user_content_parts.append({
118
- "type": "image_url",
119
- "image_url": {"url": f"data:image/jpeg;base64,{encoded_img}"}
120
- })
121
- if not current_user_content_parts:
122
- for item in history: yield item # Should not happen if handle_submit filters empty
123
- return
124
-
125
- llm_messages = [{"role": "system", "content": system_message}]
126
- for hist_user_str, hist_assistant in history: # hist_user_str is display string
127
- # For LLM context, we only care about the text part of history if it was multimodal.
128
- # Current image handling is only for the *current* turn.
129
- # If you need to re-process history for multimodal context for LLM, this part needs more logic.
130
- # For now, assuming hist_user_str is sufficient as text context from past turns.
131
- if hist_user_str:
132
- llm_messages.append({"role": "user", "content": hist_user_str})
133
- if hist_assistant:
134
- llm_messages.append({"role": "assistant", "content": hist_assistant})
135
-
136
- llm_messages.append({"role": "user", "content": current_user_content_parts if len(current_user_content_parts) > 1 else (current_user_content_parts[0] if current_user_content_parts else "")})
137
 
138
- # FIX for Issue 1: 'NoneType' object has no attribute 'strip'
139
- model_to_use = (custom_model.strip() if custom_model else "") or selected_model
 
 
 
 
 
 
 
 
140
  print(f"Model selected for inference: {model_to_use}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
- active_mcp_tools = list(mcp_tools_collection.tools) if mcp_tools_collection else []
143
-
144
- if active_mcp_tools:
145
- print(f"MCP tools are active ({len(active_mcp_tools)} tools). Using CodeAgent.")
146
- class HFClientWrapperForAgent:
147
- def __init__(self, hf_client, model_id, outer_scope_params):
148
- self.client = hf_client
149
- self.model_id = model_id
150
- self.params = outer_scope_params
151
- def generate(self, agent_llm_messages, tools=None, tool_choice=None, **kwargs):
152
- api_params = {
153
- "model": self.model_id, "messages": agent_llm_messages, "stream": False,
154
- "max_tokens": self.params['max_tokens'], "temperature": self.params['temperature'],
155
- "top_p": self.params['top_p'], "frequency_penalty": self.params['frequency_penalty'],
156
- }
157
- if self.params['seed'] is not None: api_params["seed"] = self.params['seed']
158
- if tools: api_params["tools"] = tools
159
- if tool_choice: api_params["tool_choice"] = tool_choice
160
 
161
- print(f"Agent's HFClientWrapper calling LLM: {self.model_id} with params: {api_params}")
162
- completion = self.client.chat_completion(**api_params)
 
 
 
 
 
 
 
 
 
 
 
163
 
164
- # FIX for Issue 2 (Potential): Ensure content is not None for text responses
165
- if completion.choices and completion.choices[0].message and \
166
- completion.choices[0].message.content is None and \
167
- (not completion.choices[0].message.tool_calls or not completion.choices[0].message.tool_calls):
168
- print("Warning (HFClientWrapperForAgent): Model returned None content. Setting to empty string.")
169
- completion.choices[0].message.content = ""
170
- return completion
171
-
172
- outer_scope_llm_params = {
173
- "max_tokens": max_tokens, "temperature": temperature, "top_p": top_p,
174
- "frequency_penalty": frequency_penalty, "seed": seed
175
- }
176
- agent_model_adapter = HFClientWrapperForAgent(hf_inference_client, model_to_use, outer_scope_llm_params)
177
- agent = CodeAgent(tools=active_mcp_tools, model=agent_model_adapter, messages_constructor=lambda: llm_messages[:-1].copy()) # Prime with history
178
-
179
- current_query_for_agent = message_input_text.strip() if message_input_text else "User provided image(s)."
180
- if not current_query_for_agent and image_files_list:
181
- current_query_for_agent = "Process the provided image(s) or follow related instructions."
182
- elif not current_query_for_agent and not image_files_list:
183
- current_query_for_agent = "..." # Should be caught by earlier check
184
-
185
- print(f"Query for CodeAgent.run: '{current_query_for_agent}' with {len(llm_messages)-1} history messages for priming.")
186
- try:
187
- agent_final_text_response = agent.run(current_query_for_agent)
188
- yield agent_final_text_response
189
- print("Completed response generation via CodeAgent.")
190
- except Exception as e:
191
- print(f"Error during CodeAgent execution: {e}") # This will now print the actual underlying error
192
- yield f"Error using tools: {str(e)}" # The str(e) might be the user-facing error
193
- return
194
- else:
195
- print("No MCP tools active. Proceeding with direct LLM call (streaming).")
196
- response_stream_content = ""
197
- try:
198
- stream = hf_inference_client.chat_completion(
199
- model=model_to_use, messages=llm_messages, stream=True,
200
- max_tokens=max_tokens, temperature=temperature, top_p=top_p,
201
- frequency_penalty=frequency_penalty, seed=seed
202
- )
203
- for chunk in stream:
204
- if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
205
- delta = chunk.choices[0].delta
206
- if hasattr(delta, 'content') and delta.content:
207
- token_text = delta.content
208
- response_stream_content += token_text
209
- yield response_stream_content
210
- print("\nCompleted streaming response generation.")
211
- except Exception as e:
212
- print(f"Error during direct LLM inference: {e}")
213
- yield response_stream_content + f"\nError: {str(e)}"
214
-
215
- def validate_provider(api_key, provider):
216
- if not api_key.strip() and provider != "hf-inference":
217
- return gr.update(value="hf-inference")
218
- return gr.update(value=provider)
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
221
- # UserWarning for type='tuples' is known. Consider changing to type='messages' later for robustness.
222
  chatbot = gr.Chatbot(
223
- label="Serverless TextGen Hub", height=600, show_copy_button=True,
224
- placeholder="Select a model, (optionally) load MCP Tools, and begin chatting.",
225
- layout="panel", bubble_full_width=False
 
226
  )
227
- msg_input_box = gr.MultimodalTextbox(
228
- placeholder="Type a message or upload images...", show_label=False,
229
- container=False, scale=12, file_types=["image"],
230
- file_count="multiple", sources=["upload"]
 
 
 
 
 
 
 
231
  )
 
 
232
  with gr.Accordion("Settings", open=False):
233
- system_message_box = gr.Textbox(value="You are a helpful AI assistant.", label="System Prompt")
234
- with gr.Row():
235
- max_tokens_slider = gr.Slider(1, 4096, value=512, step=1, label="Max tokens")
236
- temperature_slider = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
237
- top_p_slider = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P")
 
 
 
238
  with gr.Row():
239
- frequency_penalty_slider = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty")
240
- seed_slider = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 for random)")
241
- providers_list = ["hf-inference", "cerebras", "together", "sambanova", "novita", "cohere", "fireworks-ai", "hyperbolic", "nebius"]
242
- provider_radio = gr.Radio(choices=providers_list, value="hf-inference", label="Inference Provider")
243
- byok_textbox = gr.Textbox(label="BYOK (Hugging Face API Key)", type="password", placeholder="Enter token if not using 'hf-inference'")
244
- custom_model_box = gr.Textbox(label="Custom Model ID", placeholder="org/model-name (overrides selection below)")
245
- model_search_box = gr.Textbox(label="Filter Featured Models", placeholder="Search...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  models_list = [
247
- "meta-llama/Llama-3.2-11B-Vision-Instruct", "meta-llama/Llama-3.3-70B-Instruct",
248
- "meta-llama/Llama-3.1-70B-Instruct", "meta-llama/Llama-3.0-70B-Instruct",
249
- "meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.2-1B-Instruct",
250
- "meta-llama/Llama-3.1-8B-Instruct", "NousResearch/Hermes-3-Llama-3.1-8B",
251
- "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "mistralai/Mistral-Nemo-Instruct-2407",
252
- "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3",
253
- "mistralai/Mistral-7B-Instruct-v0.2", "Qwen/Qwen3-235B-A22B", "Qwen/Qwen3-32B",
254
- "Qwen/Qwen2.5-72B-Instruct", "Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-0.5B-Instruct",
255
- "Qwen/QwQ-32B", "Qwen/Qwen2.5-Coder-32B-Instruct", "microsoft/Phi-3.5-mini-instruct",
256
- "microsoft/Phi-3-mini-128k-instruct", "microsoft/Phi-3-mini-4k-instruct",
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  ]
258
- featured_model_radio = gr.Radio(label="Select a Featured Model", choices=models_list, value="meta-llama/Llama-3.2-11B-Vision-Instruct", interactive=True)
259
- gr.Markdown("[All Text models](https://huggingface.co/models?pipeline_tag=text-generation) | [All Multimodal models](https://huggingface.co/models?pipeline_tag=image-text-to-text)")
260
-
261
- with gr.Accordion("MCP Client Settings (Connect to External Tools)", open=False):
262
- gr.Markdown("Configure connections to MCP Servers to allow the LLM to use external tools. The LLM will decide when to use these tools based on your prompts.")
263
- mcp_server_config_input = gr.Textbox(
264
- label="MCP Server Configurations (JSON Array)",
265
- info='Example: [{"name": "MyToolServer", "type": "sse", "url": "http://server_url/gradio_api/mcp/sse"}]',
266
- lines=3, placeholder='Enter a JSON list of server configurations here.',
267
- value=json.dumps(DEFAULT_MCP_SERVERS, indent=2)
268
  )
269
- mcp_load_status_display = gr.Textbox(label="MCP Load Status", interactive=False)
270
- load_mcp_tools_btn = gr.Button("Load/Reload MCP Tools")
271
 
272
- def handle_load_mcp_tools_click(config_str_from_ui):
273
- if not config_str_from_ui:
274
- load_mcp_tools([])
275
- return "MCP tool loading attempted with empty config. Tools cleared."
276
- try:
277
- parsed_configs = json.loads(config_str_from_ui)
278
- if not isinstance(parsed_configs, list): return "Error: MCP configuration must be a valid JSON list."
279
- load_mcp_tools(parsed_configs)
280
- if mcp_tools_collection and len(mcp_tools_collection.tools) > 0:
281
- loaded_tool_names = [t.name for t in mcp_tools_collection.tools]
282
- return f"Successfully loaded {len(loaded_tool_names)} MCP tools: {', '.join(loaded_tool_names)}"
283
- else: return "No MCP tools loaded, or an error occurred. Check console for details."
284
- except json.JSONDecodeError: return "Error: Invalid JSON format in MCP server configurations."
285
- except Exception as e:
286
- print(f"Unhandled error in handle_load_mcp_tools_click: {e}")
287
- return f"Error loading MCP tools: {str(e)}. Check console."
288
- load_mcp_tools_btn.click(handle_load_mcp_tools_click, inputs=[mcp_server_config_input], outputs=mcp_load_status_display)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
 
 
 
 
290
  def filter_models(search_term):
291
- return gr.update(choices=[m for m in models_list if search_term.lower() in m.lower()])
 
 
 
 
 
292
  def set_custom_model_from_radio(selected):
 
293
  return selected
294
 
295
- def handle_submit(msg_content_dict, current_chat_history):
296
- text = msg_content_dict.get("text", "").strip()
297
- files = msg_content_dict.get("files", []) # list of file paths
298
-
299
- if not text and not files: # Skip if both are empty
300
- print("Skipping empty submission from multimodal textbox.")
301
- # Yield current history to prevent Gradio from complaining about no output
302
- yield current_chat_history, {"text": "", "files": []} # Clear input
303
- return
304
-
305
- # FIX for Issue 4: Pydantic FileMessage error by ensuring user part of history is a string
306
- user_display_parts = []
307
- if text:
308
- user_display_parts.append(text)
309
- if files:
310
- for f_path in files:
311
- base_name = os.path.basename(f_path) if f_path else "file"
312
- f_path_str = f_path if f_path else ""
313
- user_display_parts.append(f"\n![{base_name}]({f_path_str})")
314
- user_display_message_for_chatbot = " ".join(user_display_parts).strip()
315
-
316
- current_chat_history.append([user_display_message_for_chatbot, None])
317
-
318
- # Prepare history for respond function (ensure user part is string)
319
- history_for_respond = []
320
- for user_h, assistant_h in current_chat_history[:-1]: # History before current turn
321
- history_for_respond.append((str(user_h) if user_h is not None else "", assistant_h))
322
-
323
-
324
- assistant_response_accumulator = ""
325
- for streamed_chunk in respond(
326
- text, files,
327
- history_for_respond,
328
- system_message_box.value, max_tokens_slider.value, temperature_slider.value,
329
- top_p_slider.value, frequency_penalty_slider.value, seed_slider.value,
330
- provider_radio.value, byok_textbox.value, custom_model_box.value,
331
- model_search_box.value, featured_model_radio.value
332
- ):
333
- assistant_response_accumulator = streamed_chunk
334
- current_chat_history[-1][1] = assistant_response_accumulator
335
- yield current_chat_history, {"text": "", "files": []}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
- msg_input_box.submit(
338
- handle_submit,
339
- [msg_input_box, chatbot],
340
- [chatbot, msg_input_box]
 
 
 
 
 
 
 
 
 
341
  )
342
- model_search_box.change(filter_models, model_search_box, featured_model_radio)
343
- featured_model_radio.change(set_custom_model_from_radio, featured_model_radio, custom_model_box)
344
- byok_textbox.change(validate_provider, [byok_textbox, provider_radio], provider_radio)
345
- provider_radio.change(validate_provider, [byok_textbox, provider_radio], provider_radio)
346
 
347
- load_mcp_tools(DEFAULT_MCP_SERVERS) # Load defaults on startup
348
- print(f"Initial MCP tools loaded: {len(mcp_tools_collection.tools) if mcp_tools_collection else 0} tools.")
349
  print("Gradio interface initialized.")
350
 
351
  if __name__ == "__main__":
352
- print("Launching the Serverless TextGen Hub demo application.")
353
- demo.launch(show_api=False)
 
5
  import base64
6
  from PIL import Image
7
  import io
8
+ import requests
9
+ from smolagents.mcp_client import MCPClient
 
 
10
 
11
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
12
  print("Access token loaded.")
13
 
14
+ # Function to encode image to base64
15
+ def encode_image(image_path):
16
+ if not image_path:
17
+ print("No image path provided")
18
+ return None
19
+
20
+ try:
21
+ print(f"Encoding image from path: {image_path}")
22
+
23
+ # If it's already a PIL Image
24
+ if isinstance(image_path, Image.Image):
25
+ image = image_path
26
+ else:
27
+ # Try to open the image file
28
+ image = Image.open(image_path)
29
+
30
+ # Convert to RGB if image has an alpha channel (RGBA)
31
+ if image.mode == 'RGBA':
32
+ image = image.convert('RGB')
33
+
34
+ # Encode to base64
35
+ buffered = io.BytesIO()
36
+ image.save(buffered, format="JPEG")
37
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
38
+ print("Image encoded successfully")
39
+ return img_str
40
+ except Exception as e:
41
+ print(f"Error encoding image: {e}")
42
+ return None
43
+
44
+ # Dictionary to store active MCP connections
45
+ mcp_connections = {}
46
 
47
+ def connect_to_mcp_server(server_url, server_name=None):
48
+ """Connect to an MCP server and return available tools"""
49
+ if not server_url:
50
+ return None, "No server URL provided"
51
+
52
+ try:
53
+ # Create an MCP client and connect to the server
54
+ client = MCPClient({"url": server_url})
55
+ # Get available tools
56
+ tools = client.get_tools()
57
+
58
+ # Store the connection for later use
59
+ name = server_name or f"Server_{len(mcp_connections)}"
60
+ mcp_connections[name] = {"client": client, "tools": tools, "url": server_url}
61
+
62
+ return name, f"Successfully connected to {name} with {len(tools)} available tools"
63
+ except Exception as e:
64
+ print(f"Error connecting to MCP server: {e}")
65
+ return None, f"Error connecting to MCP server: {str(e)}"
66
 
67
+ def list_mcp_tools(server_name):
68
+ """List available tools for a connected MCP server"""
69
+ if server_name not in mcp_connections:
70
+ return "Server not connected"
71
 
72
+ tools = mcp_connections[server_name]["tools"]
73
+ tool_info = []
74
+ for tool in tools:
75
+ tool_info.append(f"- {tool.name}: {tool.description}")
 
76
 
77
+ if not tool_info:
78
+ return "No tools available for this server"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ return "\n".join(tool_info)
 
 
 
 
 
 
81
 
82
+ def call_mcp_tool(server_name, tool_name, **kwargs):
83
+ """Call a specific tool from an MCP server"""
84
+ if server_name not in mcp_connections:
85
+ return f"Server '{server_name}' not connected"
86
+
87
+ client = mcp_connections[server_name]["client"]
88
+ tools = mcp_connections[server_name]["tools"]
89
+
90
+ # Find the requested tool
91
+ tool = next((t for t in tools if t.name == tool_name), None)
92
+ if not tool:
93
+ return f"Tool '{tool_name}' not found on server '{server_name}'"
94
+
95
+ try:
96
+ # Call the tool with provided arguments
97
+ result = client.call_tool(tool_name, kwargs)
98
+ return result
99
+ except Exception as e:
100
+ print(f"Error calling MCP tool: {e}")
101
+ return f"Error calling MCP tool: {str(e)}"
102
+
103
+ def analyze_message_for_tool_call(message, active_mcp_servers, client, model_to_use, system_message):
104
+ """Analyze a message to determine if an MCP tool should be called"""
105
+ # Skip analysis if message is empty
106
+ if not message or not message.strip():
107
+ return None, None
108
+
109
+ # Get information about available tools
110
+ tool_info = []
111
+ for server_name in active_mcp_servers:
112
+ if server_name in mcp_connections:
113
+ server_tools = mcp_connections[server_name]["tools"]
114
+ for tool in server_tools:
115
+ tool_info.append({
116
+ "server_name": server_name,
117
+ "tool_name": tool.name,
118
+ "description": tool.description
119
+ })
120
+
121
+ if not tool_info:
122
+ return None, None
123
+
124
+ # Create a structured query for the LLM to analyze if a tool call is needed
125
+ tools_desc = []
126
+ for info in tool_info:
127
+ tools_desc.append(f"{info['server_name']}.{info['tool_name']}: {info['description']}")
128
+
129
+ tools_string = "\n".join(tools_desc)
130
+
131
+ analysis_system_prompt = f"""You are an assistant that helps determine if a user message requires using an external tool.
132
+ Available tools:
133
+ {tools_string}
134
 
135
+ Your job is to:
136
+ 1. Analyze the user's message
137
+ 2. Determine if they're asking to use one of the tools
138
+ 3. If yes, respond with a JSON object with the server_name, tool_name, and parameters
139
+ 4. If no, respond with "NO_TOOL_NEEDED"
140
 
141
+ Example 1:
142
+ User: "Please turn this text into speech: Hello world"
143
+ Response: {{"server_name": "kokoroTTS", "tool_name": "text_to_audio", "parameters": {{"text": "Hello world", "speed": 1.0}}}}
144
+
145
+ Example 2:
146
+ User: "What is the capital of France?"
147
+ Response: NO_TOOL_NEEDED"""
148
+
149
  try:
150
+ # Call the LLM to analyze the message
151
+ response = client.chat_completion(
152
+ model=model_to_use,
153
+ messages=[
154
+ {"role": "system", "content": analysis_system_prompt},
155
+ {"role": "user", "content": message}
156
+ ],
157
+ temperature=0.2, # Low temperature for more deterministic responses
158
+ max_tokens=300
159
+ )
160
+
161
+ analysis = response.choices[0].message.content
162
+ print(f"Tool analysis: {analysis}")
163
+
164
+ if "NO_TOOL_NEEDED" in analysis:
165
+ return None, None
166
+
167
+ # Try to extract JSON from the response
168
+ json_start = analysis.find("{")
169
+ json_end = analysis.rfind("}") + 1
170
+
171
+ if json_start < 0 or json_end <= 0:
172
+ return None, None
173
+
174
+ json_str = analysis[json_start:json_end]
175
+ try:
176
+ tool_call = json.loads(json_str)
177
+ return tool_call.get("server_name"), {
178
+ "tool_name": tool_call.get("tool_name"),
179
+ "parameters": tool_call.get("parameters", {})
180
+ }
181
+ except json.JSONDecodeError:
182
+ print(f"Failed to parse tool call JSON: {json_str}")
183
+ return None, None
184
+
185
  except Exception as e:
186
+ print(f"Error analyzing message for tool calls: {str(e)}")
187
+ return None, None
188
 
189
  def respond(
190
+ message,
191
+ image_files,
192
+ history: list[tuple[str, str]],
193
  system_message,
194
  max_tokens,
195
  temperature,
 
200
  custom_api_key,
201
  custom_model,
202
  model_search_term,
203
+ selected_model,
204
+ mcp_enabled=False,
205
+ active_mcp_servers=None,
206
+ mcp_interaction_mode="Natural Language"
207
  ):
208
+ print(f"Received message: {message}")
209
+ print(f"Received {len(image_files) if image_files else 0} images")
210
+ print(f"History: {history}")
211
+ print(f"System message: {system_message}")
212
+ print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
213
+ print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
214
+ print(f"Selected provider: {provider}")
215
+ print(f"Custom API Key provided: {bool(custom_api_key.strip())}")
216
+ print(f"Selected model (custom_model): {custom_model}")
217
+ print(f"Model search term: {model_search_term}")
218
+ print(f"Selected model from radio: {selected_model}")
219
+ print(f"MCP enabled: {mcp_enabled}")
220
+ print(f"Active MCP servers: {active_mcp_servers}")
221
+ print(f"MCP interaction mode: {mcp_interaction_mode}")
222
+
223
+ # Determine which token to use
224
+ token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
225
+
226
+ if custom_api_key.strip() != "":
227
+ print("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication")
228
+ else:
229
+ print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication")
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
+ # Initialize the Inference Client with the provider and appropriate token
232
+ client = InferenceClient(token=token_to_use, provider=provider)
233
+ print(f"Hugging Face Inference Client initialized with {provider} provider.")
234
+
235
+ # Convert seed to None if -1 (meaning random)
236
+ if seed == -1:
237
+ seed = None
238
+
239
+ # Determine which model to use
240
+ model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
241
  print(f"Model selected for inference: {model_to_use}")
242
+
243
+ # Process MCP commands in command mode
244
+ if mcp_enabled and message:
245
+ if message.startswith("/mcp"): # Always handle explicit commands
246
+ # Handle MCP command
247
+ command_parts = message.split(" ", 3)
248
+ if len(command_parts) < 3:
249
+ return "Invalid MCP command. Format: /mcp <server_name> <tool_name> [arguments]"
250
+
251
+ _, server_name, tool_name = command_parts[:3]
252
+ args_json = "{}" if len(command_parts) < 4 else command_parts[3]
253
+
254
+ try:
255
+ args_dict = json.loads(args_json)
256
+ result = call_mcp_tool(server_name, tool_name, **args_dict)
257
+ if isinstance(result, dict):
258
+ return json.dumps(result, indent=2)
259
+ return str(result)
260
+ except json.JSONDecodeError:
261
+ return f"Invalid JSON arguments: {args_json}"
262
+ except Exception as e:
263
+ return f"Error executing MCP command: {str(e)}"
264
+ elif mcp_interaction_mode == "Natural Language" and active_mcp_servers:
265
+ # Use natural language processing to detect tool calls
266
+ server_name, tool_info = analyze_message_for_tool_call(
267
+ message,
268
+ active_mcp_servers,
269
+ client,
270
+ model_to_use,
271
+ system_message
272
+ )
273
+
274
+ if server_name and tool_info:
275
+ try:
276
+ # Call the detected tool
277
+ print(f"Calling tool via natural language: {server_name}.{tool_info['tool_name']} with parameters: {tool_info['parameters']}")
278
+ result = call_mcp_tool(server_name, tool_info['tool_name'], **tool_info['parameters'])
279
+
280
+ # Format the response to include what was done
281
+ if isinstance(result, dict):
282
+ result_str = json.dumps(result, indent=2)
283
+ else:
284
+ result_str = str(result)
285
+
286
+ return f"I used the {tool_info['tool_name']} tool from {server_name} with your request.\n\nResult:\n{result_str}"
287
+ except Exception as e:
288
+ print(f"Error executing MCP tool via natural language: {str(e)}")
289
+ # Continue with normal response if tool call fails
290
+
291
+ # Create multimodal content if images are present
292
+ if image_files and len(image_files) > 0:
293
+ # Process the user message to include images
294
+ user_content = []
295
+
296
+ # Add text part if there is any
297
+ if message and message.strip():
298
+ user_content.append({
299
+ "type": "text",
300
+ "text": message
301
+ })
302
+
303
+ # Add image parts
304
+ for img in image_files:
305
+ if img is not None:
306
+ # Get raw image data from path
307
+ try:
308
+ encoded_image = encode_image(img)
309
+ if encoded_image:
310
+ user_content.append({
311
+ "type": "image_url",
312
+ "image_url": {
313
+ "url": f"data:image/jpeg;base64,{encoded_image}"
314
+ }
315
+ })
316
+ except Exception as e:
317
+ print(f"Error encoding image: {e}")
318
+ else:
319
+ # Text-only message
320
+ user_content = message
321
+
322
+ # Add information about available MCP tools to the system message if MCP is enabled
323
+ augmented_system_message = system_message
324
+ if mcp_enabled and active_mcp_servers:
325
+ tool_info = []
326
+ for server_name in active_mcp_servers:
327
+ if server_name in mcp_connections:
328
+ server_tools = list_mcp_tools(server_name).split("\n")
329
+ tool_info.extend([f"{server_name}: {tool}" for tool in server_tools])
330
+
331
+ if tool_info:
332
+ mcp_tools_description = "\n".join(tool_info)
333
+
334
+ if mcp_interaction_mode == "Command Mode":
335
+ augmented_system_message += f"\n\nYou have access to the following MCP tools:\n{mcp_tools_description}\n\nTo use these tools, the user can type a command in the format: /mcp <server_name> <tool_name> <arguments_json>"
336
+ else:
337
+ augmented_system_message += f"\n\nYou have access to the following MCP tools:\n{mcp_tools_description}\n\nThe user can use these tools by describing what they want in natural language, and the system will automatically detect when to use a tool based on their request."
338
 
339
+ # Prepare messages in the format expected by the API
340
+ messages = [{"role": "system", "content": augmented_system_message}]
341
+ print("Initial messages array constructed.")
342
+
343
+ # Add conversation history to the context
344
+ for val in history:
345
+ user_part = val[0]
346
+ assistant_part = val[1]
347
+ if user_part:
348
+ # Handle both text-only and multimodal messages in history
349
+ if isinstance(user_part, tuple) and len(user_part) == 2:
350
+ # This is a multimodal message with text and images
351
+ history_content = []
352
+ if user_part[0]: # Text
353
+ history_content.append({
354
+ "type": "text",
355
+ "text": user_part[0]
356
+ })
357
 
358
+ for img in user_part[1]: # Images
359
+ if img:
360
+ try:
361
+ encoded_img = encode_image(img)
362
+ if encoded_img:
363
+ history_content.append({
364
+ "type": "image_url",
365
+ "image_url": {
366
+ "url": f"data:image/jpeg;base64,{encoded_img}"
367
+ }
368
+ })
369
+ except Exception as e:
370
+ print(f"Error encoding history image: {e}")
371
 
372
+ messages.append({"role": "user", "content": history_content})
373
+ else:
374
+ # Regular text message
375
+ messages.append({"role": "user", "content": user_part})
376
+ print(f"Added user message to context (type: {type(user_part)})")
377
+
378
+ if assistant_part:
379
+ messages.append({"role": "assistant", "content": assistant_part})
380
+ print(f"Added assistant message to context: {assistant_part}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
 
382
+ # Append the latest user message
383
+ messages.append({"role": "user", "content": user_content})
384
+ print(f"Latest user message appended (content type: {type(user_content)})")
385
+
386
+ # Determine which model to use, prioritizing custom_model if provided
387
+ model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
388
+ print(f"Model selected for inference: {model_to_use}")
389
+
390
+ # Start with an empty string to build the response as tokens stream in
391
+ response = ""
392
+ print(f"Sending request to {provider} provider.")
393
+
394
+ # Prepare parameters for the chat completion request
395
+ parameters = {
396
+ "max_tokens": max_tokens,
397
+ "temperature": temperature,
398
+ "top_p": top_p,
399
+ "frequency_penalty": frequency_penalty,
400
+ }
401
+
402
+ if seed is not None:
403
+ parameters["seed"] = seed
404
+
405
+ # Use the InferenceClient for making the request
406
+ try:
407
+ # Create a generator for the streaming response
408
+ stream = client.chat_completion(
409
+ model=model_to_use,
410
+ messages=messages,
411
+ stream=True,
412
+ **parameters
413
+ )
414
+
415
+ print("Received tokens: ", end="", flush=True)
416
+
417
+ # Process the streaming response
418
+ for chunk in stream:
419
+ if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
420
+ # Extract the content from the response
421
+ if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
422
+ token_text = chunk.choices[0].delta.content
423
+ if token_text:
424
+ print(token_text, end="", flush=True)
425
+ response += token_text
426
+ yield response
427
+
428
+ print()
429
+ except Exception as e:
430
+ print(f"Error during inference: {e}")
431
+ response += f"\nError: {str(e)}"
432
+ yield response
433
+
434
+ print("Completed response generation.")
435
+
436
+ # GRADIO UI
437
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
438
+ # Create the chatbot component
439
  chatbot = gr.Chatbot(
440
+ height=600,
441
+ show_copy_button=True,
442
+ placeholder="Select a model and begin chatting. Now supports multiple inference providers, multimodal inputs, and MCP tools",
443
+ layout="panel"
444
  )
445
+ print("Chatbot interface created.")
446
+
447
+ # Multimodal textbox for messages (combines text and file uploads)
448
+ msg = gr.MultimodalTextbox(
449
+ placeholder="Type a message or upload images...",
450
+ show_label=False,
451
+ container=False,
452
+ scale=12,
453
+ file_types=["image"],
454
+ file_count="multiple",
455
+ sources=["upload"]
456
  )
457
+
458
+ # Create accordion for settings
459
  with gr.Accordion("Settings", open=False):
460
+ # System message
461
+ system_message_box = gr.Textbox(
462
+ value="You are a helpful AI assistant that can understand images and text.",
463
+ placeholder="You are a helpful assistant.",
464
+ label="System Prompt"
465
+ )
466
+
467
+ # Generation parameters
468
  with gr.Row():
469
+ with gr.Column():
470
+ max_tokens_slider = gr.Slider(
471
+ minimum=1,
472
+ maximum=4096,
473
+ value=512,
474
+ step=1,
475
+ label="Max tokens"
476
+ )
477
+
478
+ temperature_slider = gr.Slider(
479
+ minimum=0.1,
480
+ maximum=4.0,
481
+ value=0.7,
482
+ step=0.1,
483
+ label="Temperature"
484
+ )
485
+
486
+ top_p_slider = gr.Slider(
487
+ minimum=0.1,
488
+ maximum=1.0,
489
+ value=0.95,
490
+ step=0.05,
491
+ label="Top-P"
492
+ )
493
+
494
+ with gr.Column():
495
+ frequency_penalty_slider = gr.Slider(
496
+ minimum=-2.0,
497
+ maximum=2.0,
498
+ value=0.0,
499
+ step=0.1,
500
+ label="Frequency Penalty"
501
+ )
502
+
503
+ seed_slider = gr.Slider(
504
+ minimum=-1,
505
+ maximum=65535,
506
+ value=-1,
507
+ step=1,
508
+ label="Seed (-1 for random)"
509
+ )
510
+
511
+ # Provider selection
512
+ providers_list = [
513
+ "hf-inference", # Default Hugging Face Inference
514
+ "cerebras", # Cerebras provider
515
+ "together", # Together AI
516
+ "sambanova", # SambaNova
517
+ "novita", # Novita AI
518
+ "cohere", # Cohere
519
+ "fireworks-ai", # Fireworks AI
520
+ "hyperbolic", # Hyperbolic
521
+ "nebius", # Nebius
522
+ ]
523
+
524
+ provider_radio = gr.Radio(
525
+ choices=providers_list,
526
+ value="hf-inference",
527
+ label="Inference Provider",
528
+ )
529
+
530
+ # New BYOK textbox
531
+ byok_textbox = gr.Textbox(
532
+ value="",
533
+ label="BYOK (Bring Your Own Key)",
534
+ info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
535
+ placeholder="Enter your Hugging Face API token",
536
+ type="password" # Hide the API key for security
537
+ )
538
+
539
+ # Custom model box
540
+ custom_model_box = gr.Textbox(
541
+ value="",
542
+ label="Custom Model",
543
+ info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
544
+ placeholder="meta-llama/Llama-3.3-70B-Instruct"
545
+ )
546
+
547
+ # Model search
548
+ model_search_box = gr.Textbox(
549
+ label="Filter Models",
550
+ placeholder="Search for a featured model...",
551
+ lines=1
552
+ )
553
+
554
+ # Featured models list
555
+ # Updated to include multimodal models
556
  models_list = [
557
+ "meta-llama/Llama-3.2-11B-Vision-Instruct",
558
+ "meta-llama/Llama-3.3-70B-Instruct",
559
+ "meta-llama/Llama-3.1-70B-Instruct",
560
+ "meta-llama/Llama-3.0-70B-Instruct",
561
+ "meta-llama/Llama-3.2-3B-Instruct",
562
+ "meta-llama/Llama-3.2-1B-Instruct",
563
+ "meta-llama/Llama-3.1-8B-Instruct",
564
+ "NousResearch/Hermes-3-Llama-3.1-8B",
565
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
566
+ "mistralai/Mistral-Nemo-Instruct-2407",
567
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
568
+ "mistralai/Mistral-7B-Instruct-v0.3",
569
+ "mistralai/Mistral-7B-Instruct-v0.2",
570
+ "Qwen/Qwen3-235B-A22B",
571
+ "Qwen/Qwen3-32B",
572
+ "Qwen/Qwen2.5-72B-Instruct",
573
+ "Qwen/Qwen2.5-3B-Instruct",
574
+ "Qwen/Qwen2.5-0.5B-Instruct",
575
+ "Qwen/QwQ-32B",
576
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
577
+ "microsoft/Phi-3.5-mini-instruct",
578
+ "microsoft/Phi-3-mini-128k-instruct",
579
+ "microsoft/Phi-3-mini-4k-instruct",
580
  ]
581
+
582
+ featured_model_radio = gr.Radio(
583
+ label="Select a model below",
584
+ choices=models_list,
585
+ value="meta-llama/Llama-3.2-11B-Vision-Instruct", # Default to a multimodal model
586
+ interactive=True
 
 
 
 
587
  )
 
 
588
 
589
+ gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
590
+
591
+ # Create accordion for MCP settings
592
+ with gr.Accordion("MCP Settings", open=False):
593
+ mcp_enabled_checkbox = gr.Checkbox(
594
+ label="Enable MCP Support",
595
+ value=False,
596
+ info="Enable Model Context Protocol support to connect to external tools and services"
597
+ )
598
+
599
+ with gr.Row():
600
+ mcp_server_url = gr.Textbox(
601
+ label="MCP Server URL",
602
+ placeholder="https://example-mcp-server.hf.space/gradio_api/mcp/sse",
603
+ info="URL of the MCP server to connect to"
604
+ )
605
+
606
+ mcp_server_name = gr.Textbox(
607
+ label="Server Name",
608
+ placeholder="Optional name for this server",
609
+ info="A friendly name to identify this server"
610
+ )
611
+
612
+ mcp_connect_button = gr.Button("Connect to MCP Server")
613
+
614
+ mcp_status = gr.Textbox(
615
+ label="MCP Connection Status",
616
+ placeholder="No MCP servers connected",
617
+ interactive=False
618
+ )
619
+
620
+ active_mcp_servers = gr.Dropdown(
621
+ label="Active MCP Servers",
622
+ choices=[],
623
+ multiselect=True,
624
+ info="Select which MCP servers to use in chat"
625
+ )
626
+
627
+ mcp_mode = gr.Radio(
628
+ label="MCP Interaction Mode",
629
+ choices=["Natural Language", "Command Mode"],
630
+ value="Natural Language",
631
+ info="Choose how to interact with MCP tools"
632
+ )
633
+
634
+ gr.Markdown("""
635
+ ### MCP Interaction Modes
636
+
637
+ **Natural Language Mode**: Simply describe what you want in plain English. Examples:
638
+ ```
639
+ Please convert the text "Hello world" to speech
640
+ Can you read this text aloud: "Welcome to MCP integration"
641
+ ```
642
+
643
+ **Command Mode**: Use structured commands (for advanced users)
644
+ ```
645
+ /mcp <server_name> <tool_name> {"param1": "value1", "param2": "value2"}
646
+ ```
647
+
648
+ Example:
649
+ ```
650
+ /mcp kokoroTTS text_to_audio {"text": "Hello world", "speed": 1.0}
651
+ ```
652
+ """)
653
 
654
+ # Chat history state
655
+ chat_history = gr.State([])
656
+
657
+ # Function to filter models
658
  def filter_models(search_term):
659
+ print(f"Filtering models with search term: {search_term}")
660
+ filtered = [m for m in models_list if search_term.lower() in m.lower()]
661
+ print(f"Filtered models: {filtered}")
662
+ return gr.update(choices=filtered)
663
+
664
+ # Function to set custom model from radio
665
  def set_custom_model_from_radio(selected):
666
+ print(f"Featured model selected: {selected}")
667
  return selected
668
 
669
+ # Function to connect to MCP server
670
+ def connect_mcp_server(url, name):
671
+ server_name, status = connect_to_mcp_server(url, name)
672
+
673
+ # Update the active servers dropdown
674
+ servers = list(mcp_connections.keys())
675
+
676
+ # Return the status message and updated server list
677
+ return status, gr.update(choices=servers)
678
+
679
+ # Function for the chat interface
680
+ def user(user_message, history):
681
+ # Debug logging for troubleshooting
682
+ print(f"User message received: {user_message}")
683
+
684
+ # Skip if message is empty (no text and no files)
685
+ if not user_message or (not user_message.get("text") and not user_message.get("files")):
686
+ print("Empty message, skipping")
687
+ return history
688
+
689
+ # Prepare multimodal message format
690
+ text_content = user_message.get("text", "").strip()
691
+ files = user_message.get("files", [])
692
+
693
+ print(f"Text content: {text_content}")
694
+ print(f"Files: {files}")
695
+
696
+ # If both text and files are empty, skip
697
+ if not text_content and not files:
698
+ print("No content to display")
699
+ return history
700
+
701
+ # Add message with images to history
702
+ if files and len(files) > 0:
703
+ # Add text message first if it exists
704
+ if text_content:
705
+ # Add a separate text message
706
+ print(f"Adding text message: {text_content}")
707
+ history.append([text_content, None])
708
+
709
+ # Then add each image file separately
710
+ for file_path in files:
711
+ if file_path and isinstance(file_path, str):
712
+ print(f"Adding image: {file_path}")
713
+ # Add image as a separate message with no text
714
+ history.append([f"![Image]({file_path})", None])
715
+
716
+ return history
717
+ else:
718
+ # For text-only messages
719
+ print(f"Adding text-only message: {text_content}")
720
+ history.append([text_content, None])
721
+ return history
722
+
723
+ # Define bot response function
724
+ def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model, mcp_enabled, selected_servers):
725
+ # Check if history is valid
726
+ if not history or len(history) == 0:
727
+ print("No history to process")
728
+ return history
729
+
730
+ # Get the most recent message and detect if it's an image
731
+ user_message = history[-1][0]
732
+ print(f"Processing user message: {user_message}")
733
+
734
+ is_image = False
735
+ image_path = None
736
+ text_content = user_message
737
+
738
+ # Check if this is an image message (marked with ![Image])
739
+ if isinstance(user_message, str) and user_message.startswith("![Image]("):
740
+ is_image = True
741
+ # Extract image path from markdown format ![Image](path)
742
+ image_path = user_message.replace("![Image](", "").replace(")", "")
743
+ print(f"Image detected: {image_path}")
744
+ text_content = "" # No text for image-only messages
745
+
746
+ # Look back for text context if this is an image
747
+ text_context = ""
748
+ if is_image and len(history) > 1:
749
+ # Use the previous message as context if it's text
750
+ prev_message = history[-2][0]
751
+ if isinstance(prev_message, str) and not prev_message.startswith("![Image]("):
752
+ text_context = prev_message
753
+ print(f"Using text context from previous message: {text_context}")
754
+
755
+ # Process message through respond function
756
+ history[-1][1] = ""
757
+
758
+ # Use either the image or text for the API
759
+ if is_image:
760
+ # For image messages
761
+ for response in respond(
762
+ text_context, # Text context from previous message if any
763
+ [image_path], # Current image
764
+ history[:-1], # Previous history
765
+ system_msg,
766
+ max_tokens,
767
+ temperature,
768
+ top_p,
769
+ freq_penalty,
770
+ seed,
771
+ provider,
772
+ api_key,
773
+ custom_model,
774
+ search_term,
775
+ selected_model,
776
+ mcp_enabled,
777
+ selected_servers
778
+ ):
779
+ history[-1][1] = response
780
+ yield history
781
+ else:
782
+ # For text-only messages
783
+ for response in respond(
784
+ text_content, # Text message
785
+ None, # No image
786
+ history[:-1], # Previous history
787
+ system_msg,
788
+ max_tokens,
789
+ temperature,
790
+ top_p,
791
+ freq_penalty,
792
+ seed,
793
+ provider,
794
+ api_key,
795
+ custom_model,
796
+ search_term,
797
+ selected_model,
798
+ mcp_enabled,
799
+ selected_servers
800
+ ):
801
+ history[-1][1] = response
802
+ yield history
803
+
804
+ # Update function for provider validation based on BYOK
805
+ def validate_provider(api_key, provider):
806
+ if not api_key.strip() and provider != "hf-inference":
807
+ return gr.update(value="hf-inference")
808
+ return gr.update(value=provider)
809
+
810
+ # Event handlers
811
+ msg.submit(
812
+ user,
813
+ [msg, chatbot],
814
+ [chatbot],
815
+ queue=False
816
+ ).then(
817
+ bot,
818
+ [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
819
+ frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
820
+ model_search_box, featured_model_radio, mcp_enabled_checkbox, active_mcp_servers, mcp_mode],
821
+ [chatbot]
822
+ ).then(
823
+ lambda: {"text": "", "files": []}, # Clear inputs after submission
824
+ None,
825
+ [msg]
826
+ )
827
+
828
+ # Connect MCP connect button
829
+ mcp_connect_button.click(
830
+ connect_mcp_server,
831
+ [mcp_server_url, mcp_server_name],
832
+ [mcp_status, active_mcp_servers]
833
+ )
834
+
835
+ # Connect the model filter to update the radio choices
836
+ model_search_box.change(
837
+ fn=filter_models,
838
+ inputs=model_search_box,
839
+ outputs=featured_model_radio
840
+ )
841
+ print("Model search box change event linked.")
842
+
843
+ # Connect the featured model radio to update the custom model box
844
+ featured_model_radio.change(
845
+ fn=set_custom_model_from_radio,
846
+ inputs=featured_model_radio,
847
+ outputs=custom_model_box
848
+ )
849
+ print("Featured model radio button change event linked.")
850
 
851
+ # Connect the BYOK textbox to validate provider selection
852
+ byok_textbox.change(
853
+ fn=validate_provider,
854
+ inputs=[byok_textbox, provider_radio],
855
+ outputs=provider_radio
856
+ )
857
+ print("BYOK textbox change event linked.")
858
+
859
+ # Also validate provider when the radio changes to ensure consistency
860
+ provider_radio.change(
861
+ fn=validate_provider,
862
+ inputs=[byok_textbox, provider_radio],
863
+ outputs=provider_radio
864
  )
865
+ print("Provider radio button change event linked.")
 
 
 
866
 
 
 
867
  print("Gradio interface initialized.")
868
 
869
  if __name__ == "__main__":
870
+ print("Launching the demo application.")
871
+ demo.launch(show_api=True, mcp_server=False) # Not launching as MCP server as we're the client