Nymbo commited on
Commit
6a6b98f
·
verified ·
1 Parent(s): 7c1212e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +252 -237
app.py CHANGED
@@ -15,7 +15,7 @@ def encode_image(image_path):
15
  print("No image path provided")
16
  return None
17
 
18
- try
19
  print(f"Encoding image from path: {image_path}")
20
 
21
  # If it's already a PIL Image
@@ -31,7 +31,7 @@ def encode_image(image_path):
31
 
32
  # Encode to base64
33
  buffered = io.BytesIO()
34
- image.save(buffered, format="JPEG")
35
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
36
  print("Image encoded successfully")
37
  return img_str
@@ -52,11 +52,33 @@ def respond(
52
  provider,
53
  custom_api_key,
54
  custom_model,
55
- model_search_term,
56
- selected_model
57
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  print(f"Received message: {message}")
59
- print(f"Received {len(image_files) if image_files else 0} images")
60
  print(f"History: {history}")
61
  print(f"System message: {system_message}")
62
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
@@ -83,90 +105,80 @@ def respond(
83
  if seed == -1:
84
  seed = None
85
 
86
- # Create multimodal content if images are present
 
 
 
 
 
 
 
 
 
87
  if image_files and len(image_files) > 0:
88
- # Process the user message to include images
89
- user_content = []
90
-
91
- # Add text part if there is any
92
- if message and message.strip():
93
- user_content.append({
94
- "type": "text",
95
- "text": message
96
- })
97
-
98
- # Add image parts
99
- for img in image_files:
100
- if img is not None:
101
- # Get raw image data from path
102
  try:
103
- encoded_image = encode_image(img)
104
  if encoded_image:
105
- user_content.append({
106
  "type": "image_url",
107
  "image_url": {
108
  "url": f"data:image/jpeg;base64,{encoded_image}"
109
  }
110
  })
111
  except Exception as e:
112
- print(f"Error encoding image: {e}")
 
 
 
 
 
 
113
  else:
114
- # Text-only message
115
- user_content = message
116
 
117
  # Prepare messages in the format expected by the API
118
- messages = [{"role": "system", "content": system_message}]
119
  print("Initial messages array constructed.")
120
 
121
  # Add conversation history to the context
122
- for val in history:
123
- user_part = val[0]
124
- assistant_part = val[1]
125
- if user_part:
126
- # Handle both text-only and multimodal messages in history
127
- if isinstance(user_part, tuple) and len(user_part) == 2:
128
- # This is a multimodal message with text and images
129
- history_content = []
130
- if user_part[0]: # Text
131
- history_content.append({
132
- "type": "text",
133
- "text": user_part[0]
134
- })
135
-
136
- for img in user_part[1]: # Images
137
- if img:
138
- try:
139
- encoded_img = encode_image(img)
140
- if encoded_img:
141
- history_content.append({
142
- "type": "image_url",
143
- "image_url": {
144
- "url": f"data:image/jpeg;base64,{encoded_img}"
145
- }
146
- })
147
- except Exception as e:
148
- print(f"Error encoding history image: {e}")
149
-
150
- messages.append({"role": "user", "content": history_content})
151
- else:
152
- # Regular text message
153
- messages.append({"role": "user", "content": user_part})
154
- print(f"Added user message to context (type: {type(user_part)})")
155
-
156
- if assistant_part:
157
- messages.append({"role": "assistant", "content": assistant_part})
158
- print(f"Added assistant message to context: {assistant_part}")
159
 
160
- # Append the latest user message
161
- messages.append({"role": "user", "content": user_content})
162
- print(f"Latest user message appended (content type: {type(user_content)})")
163
 
164
  # Determine which model to use, prioritizing custom_model if provided
165
  model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
166
  print(f"Model selected for inference: {model_to_use}")
167
 
168
  # Start with an empty string to build the response as tokens stream in
169
- response = ""
170
  print(f"Sending request to {provider} provider.")
171
 
172
  # Prepare parameters for the chat completion request
@@ -185,7 +197,7 @@ def respond(
185
  # Create a generator for the streaming response
186
  stream = client.chat_completion(
187
  model=model_to_use,
188
- messages=messages,
189
  stream=True,
190
  **parameters
191
  )
@@ -197,17 +209,17 @@ def respond(
197
  if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
198
  # Extract the content from the response
199
  if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
200
- token_text = chunk.choices[0].delta.content
201
- if token_text:
202
- print(token_text, end="", flush=True)
203
- response += token_text
204
- yield response
205
 
206
  print()
207
  except Exception as e:
208
  print(f"Error during inference: {e}")
209
- response += f"\nError: {str(e)}"
210
- yield response
211
 
212
  print("Completed response generation.")
213
 
@@ -294,212 +306,221 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
294
 
295
  # Provider selection
296
  providers_list = [
297
- "hf-inference", # Default Hugging Face Inference
298
- "cerebras", # Cerebras provider
299
- "together", # Together AI
300
- "sambanova", # SambaNova
301
- "novita", # Novita AI
302
- "cohere", # Cohere
303
- "fireworks-ai", # Fireworks AI
304
- "hyperbolic", # Hyperbolic
305
- "nebius", # Nebius
306
  ]
307
 
308
  provider_radio = gr.Radio(
309
- choices=providers_list,
310
- value="hf-inference",
311
- label="Inference Provider",
312
  )
313
 
314
- # New BYOK textbox
315
  byok_textbox = gr.Textbox(
316
- value="",
317
- label="BYOK (Bring Your Own Key)",
318
  info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
319
- placeholder="Enter your Hugging Face API token",
320
- type="password" # Hide the API key for security
321
  )
322
 
323
- # Custom model box
324
  custom_model_box = gr.Textbox(
325
- value="",
326
- label="Custom Model",
327
  info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
328
  placeholder="meta-llama/Llama-3.3-70B-Instruct"
329
  )
330
 
331
- # Model search
332
  model_search_box = gr.Textbox(
333
- label="Filter Models",
334
- placeholder="Search for a featured model...",
335
- lines=1
336
  )
337
 
338
- # Featured models list
339
  models_list = [
340
- "meta-llama/Llama-3.2-11B-Vision-Instruct",
341
- "meta-llama/Llama-3.3-70B-Instruct",
342
- "meta-llama/Llama-3.1-70B-Instruct",
343
- "meta-llama/Llama-3.0-70B-Instruct",
344
- "meta-llama/Llama-3.2-3B-Instruct",
345
- "meta-llama/Llama-3.2-1B-Instruct",
346
- "meta-llama/Llama-3.1-8B-Instruct",
347
- "NousResearch/Hermes-3-Llama-3.1-8B",
348
- "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
349
- "mistralai/Mistral-Nemo-Instruct-2407",
350
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
351
- "mistralai/Mistral-7B-Instruct-v0.3",
352
- "mistralai/Mistral-7B-Instruct-v0.2",
353
- "Qwen/Qwen3-235B-A22B",
354
- "Qwen/Qwen3-32B",
355
- "Qwen/Qwen2.5-72B-Instruct",
356
- "Qwen/Qwen2.5-3B-Instruct",
357
- "Qwen/Qwen2.5-0.5B-Instruct",
358
- "Qwen/QwQ-32B",
359
- "Qwen/Qwen2.5-Coder-32B-Instruct",
360
- "microsoft/Phi-3.5-mini-instruct",
361
- "microsoft/Phi-3-mini-128k-instruct",
362
- "microsoft/Phi-3-mini-4k-instruct",
363
  ]
364
 
365
  featured_model_radio = gr.Radio(
366
- label="Select a model below",
367
- choices=models_list,
368
- value="meta-llama/Llama-3.2-11B-Vision-Instruct", # Default to a multimodal model
369
- interactive=True
370
  )
371
 
372
  gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
373
 
374
- # MCP Support Information Accordion
375
- with gr.Accordion("MCP Support (for LLMs)", open=False):
376
  gr.Markdown("""
377
- ### Model Context Protocol (MCP) Support
378
-
379
- This application can function as an MCP Server, allowing compatible AI models and agents (like Claude Desktop or custom MCP clients) to use its text and image generation capabilities as a tool.
380
-
381
- When MCP is enabled, Gradio automatically exposes the relevant functions (likely based on the `bot` function in this app) as MCP tools.
382
 
383
- **To connect an MCP client to this server:**
 
384
 
385
- 1. Ensure this Gradio application is running.
386
- 2. Use the following URL for the MCP server in your client configuration:
387
- - If running locally: `http://127.0.0.1:7860/gradio_api/mcp/sse`
388
- - If deployed on Hugging Face Spaces: `https://YOUR_USERNAME-YOUR_SPACENAME.hf.space/gradio_api/mcp/sse` (replace with your actual Space URL)
 
 
 
 
 
389
 
390
- **Example MCP Client Configuration (`mcp.json` or similar):**
391
  ```json
392
  {
393
- "mcpServers": {
394
- "serverlessTextgenHub": {
395
- "url": "http://127.0.0.1:7860/gradio_api/mcp/sse"
 
 
 
 
396
  }
397
- }
398
  }
399
  ```
400
-
401
- **Tool Parameters:**
402
- The exposed MCP tool will likely have parameters corresponding to the inputs of the `bot` function (e.g., `history`, `system_msg`, `max_tokens`, `temperature`, `model`, etc.).
403
-
404
- * **Important for `history` parameter:** For image inputs, the MCP client might need to format the `history` to include image references in a way the `bot` function can parse (e.g., markdown links `![Image](URL_or_base64_data_uri)` within the history's message part).
405
- * It's highly recommended to inspect the MCP schema for this server to understand the exact tool names, descriptions, and input/output schemas. You can usually find this at: `http://127.0.0.1:7860/gradio_api/mcp/schema` (or the equivalent URL for your deployed Space).
406
-
407
- This allows for powerful integrations where an AI agent can programmatically request text or multimodal generations from this Serverless-TextGen-Hub.
408
  """)
409
 
410
  # Chat history state
411
- chat_history = gr.State([])
412
 
413
  # Function to filter models
414
- def filter_models(search_term):
415
  print(f"Filtering models with search term: {search_term}")
416
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
417
  print(f"Filtered models: {filtered}")
418
- return gr.update(choices=filtered)
419
 
420
  # Function to set custom model from radio
421
- def set_custom_model_from_radio(selected):
422
  print(f"Featured model selected: {selected}")
 
 
423
  return selected
424
 
425
- # Function for the chat interface
426
- def user(user_message, history):
427
- print(f"User message received: {user_message}")
428
-
429
- if not user_message or (not user_message.get("text") and not user_message.get("files")):
430
- print("Empty message, skipping")
431
- return history # Return immediately if message is empty
432
 
433
- text_content = user_message.get("text", "").strip()
434
- files = user_message.get("files", [])
435
 
436
- print(f"Text content: {text_content}")
437
- print(f"Files: {files}")
438
 
439
- if not text_content and not files: # Check again after stripping text
440
- print("No content to display")
441
- return history
442
-
443
- # Append text message first if it exists and is not empty
444
  if text_content:
445
- print(f"Adding text message: {text_content}")
446
  history.append([text_content, None])
447
-
448
- # Then append each image file as a separate message
 
449
  if files:
450
  for file_path in files:
451
- if file_path and isinstance(file_path, str): # Ensure file_path is valid
452
- print(f"Adding image: {file_path}")
453
- history.append([f"![Image]({file_path})", None]) # Image as a new message
454
-
 
 
 
 
 
 
 
455
  return history
456
 
457
  # Define bot response function
458
- def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
459
- if not history or not history[-1][0]: # Check if history or last message is empty
460
- print("No history or empty last message to process for bot")
461
- # Yield an empty update or the history itself to avoid errors
462
- # depending on how Gradio handles empty yields.
463
- # For safety, just return the history if it's in a bad state.
464
- yield history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
  return
466
 
467
- user_message_content = history[-1][0] # This is the user's latest message (text or image markdown)
468
- print(f"Bot processing user message content: {user_message_content}")
469
-
470
- # Determine if the current turn is primarily about an image or text
471
- # This logic assumes images are added as separate history entries like "![Image](path)"
472
- # and text prompts might precede them or be separate.
473
 
474
- current_message_text_for_api = ""
475
- current_image_files_for_api = []
476
-
477
- # Check if the last entry is an image
478
- if isinstance(user_message_content, str) and user_message_content.startswith("![Image]("):
479
- image_path = user_message_content.replace("![Image](", "").replace(")", "")
480
- current_image_files_for_api.append(image_path)
481
- print(f"Bot identified image in last history entry: {image_path}")
482
- # If it's an image, check the second to last entry for a text prompt
483
- if len(history) > 1:
484
- prev_content = history[-2][0]
485
- if isinstance(prev_content, str) and not prev_content.startswith("![Image]("):
486
- current_message_text_for_api = prev_content
487
- print(f"Bot identified preceding text for image: {current_message_text_for_api}")
488
- else: # Last entry is text
489
- current_message_text_for_api = user_message_content
490
- print(f"Bot identified text in last history entry: {current_message_text_for_api}")
491
-
492
- # The history sent to `respond` should not include the current turn's input,
493
- # as `respond` will add `message` (current_message_text_for_api) to its internal `messages` list.
494
- # If an image is present, it's passed via `image_files`.
495
- history_for_respond_func = history[:-1] # Pass history *before* the current turn
496
-
497
- history[-1][1] = "" # Initialize assistant's response for the current turn
498
 
 
 
 
 
499
  for response_chunk in respond(
500
- message=current_message_text_for_api,
501
- image_files=current_image_files_for_api,
502
- history=history_for_respond_func, # Pass prior history
503
  system_message=system_msg,
504
  max_tokens=max_tokens,
505
  temperature=temperature,
@@ -509,14 +530,14 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
509
  provider=provider,
510
  custom_api_key=api_key,
511
  custom_model=custom_model,
512
- model_search_term=search_term, # Though these two might not be directly used by respond if model is fixed
513
  selected_model=selected_model
514
  ):
515
- history[-1][1] = response_chunk
516
  yield history
517
 
518
-
519
  # Event handlers
 
520
  msg.submit(
521
  user,
522
  [msg, chatbot],
@@ -526,39 +547,32 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
526
  bot,
527
  [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
528
  frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
529
- model_search_box, featured_model_radio],
 
530
  [chatbot]
531
  ).then(
532
- lambda: {"text": "", "files": []}, # Clear inputs after submission
533
  None,
534
  [msg]
535
  )
536
 
537
  model_search_box.change(
538
- fn=filter_models,
539
- inputs=model_search_box,
540
- outputs=featured_model_radio
541
  )
542
  print("Model search box change event linked.")
543
 
544
  featured_model_radio.change(
545
- fn=set_custom_model_from_radio,
546
- inputs=featured_model_radio,
547
- outputs=custom_model_box
548
  )
549
  print("Featured model radio button change event linked.")
550
 
551
  byok_textbox.change(
552
- fn=validate_provider,
553
- inputs=[byok_textbox, provider_radio],
554
- outputs=provider_radio
555
  )
556
  print("BYOK textbox change event linked.")
557
 
558
  provider_radio.change(
559
- fn=validate_provider,
560
- inputs=[byok_textbox, provider_radio],
561
- outputs=provider_radio
562
  )
563
  print("Provider radio button change event linked.")
564
 
@@ -566,4 +580,5 @@ print("Gradio interface initialized.")
566
 
567
  if __name__ == "__main__":
568
  print("Launching the demo application.")
569
- demo.launch(show_api=True, mcp_server=True) # MCP SERVER ENABLED HERE
 
 
15
  print("No image path provided")
16
  return None
17
 
18
+ try:
19
  print(f"Encoding image from path: {image_path}")
20
 
21
  # If it's already a PIL Image
 
31
 
32
  # Encode to base64
33
  buffered = io.BytesIO()
34
+ image.save(buffered, format="JPEG") # Keep JPEG for consistency with image_url
35
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
36
  print("Image encoded successfully")
37
  return img_str
 
52
  provider,
53
  custom_api_key,
54
  custom_model,
55
+ model_search_term, # Retained for function signature consistency if called elsewhere
56
+ selected_model # Retained for function signature consistency
57
  ):
58
+ """
59
+ Core function to stream responses from a language model.
60
+
61
+ Args:
62
+ message (str | list): The user's message, can be text or multimodal content.
63
+ image_files (list[str]): List of paths to image files for the current turn.
64
+ history (list[tuple[str, str]]): Conversation history.
65
+ system_message (str): System prompt for the model.
66
+ max_tokens (int): Maximum tokens for the response.
67
+ temperature (float): Sampling temperature.
68
+ top_p (float): Top-p (nucleus) sampling.
69
+ frequency_penalty (float): Frequency penalty.
70
+ seed (int): Random seed (-1 for random).
71
+ provider (str): Inference provider.
72
+ custom_api_key (str): Custom API key.
73
+ custom_model (str): Custom model ID.
74
+ model_search_term (str): Term for searching models (UI related).
75
+ selected_model (str): Model selected from UI list.
76
+
77
+ Yields:
78
+ str: The cumulative response from the model.
79
+ """
80
  print(f"Received message: {message}")
81
+ print(f"Received {len(image_files) if image_files else 0} images for current turn")
82
  print(f"History: {history}")
83
  print(f"System message: {system_message}")
84
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
 
105
  if seed == -1:
106
  seed = None
107
 
108
+ # Create multimodal content if images are present for the current message
109
+ # The 'message' parameter to 'respond' is now the text part of the current turn
110
+ # 'image_files' parameter to 'respond' now holds image paths for the current turn
111
+ current_turn_content = []
112
+ if message and isinstance(message, str) and message.strip():
113
+ current_turn_content.append({
114
+ "type": "text",
115
+ "text": message
116
+ })
117
+
118
  if image_files and len(image_files) > 0:
119
+ for img_path in image_files: # Iterate through paths in image_files
120
+ if img_path is not None:
 
 
 
 
 
 
 
 
 
 
 
 
121
  try:
122
+ encoded_image = encode_image(img_path) # img_path is already a path
123
  if encoded_image:
124
+ current_turn_content.append({
125
  "type": "image_url",
126
  "image_url": {
127
  "url": f"data:image/jpeg;base64,{encoded_image}"
128
  }
129
  })
130
  except Exception as e:
131
+ print(f"Error encoding image for current turn: {e}")
132
+
133
+ # If current_turn_content is empty (e.g. only empty text message), use the raw message
134
+ if not current_turn_content and isinstance(message, str):
135
+ final_user_content_for_api = message
136
+ elif not current_turn_content and not isinstance(message, str): # case where message might be complex type but empty
137
+ final_user_content_for_api = "" # or handle as error
138
  else:
139
+ final_user_content_for_api = current_turn_content
140
+
141
 
142
  # Prepare messages in the format expected by the API
143
+ messages_for_api = [{"role": "system", "content": system_message}]
144
  print("Initial messages array constructed.")
145
 
146
  # Add conversation history to the context
147
+ for val in history: # history is list[tuple[str, str]]
148
+ user_hist_msg_content = val[0] # This is what user typed or image markdown
149
+ assistant_hist_msg = val[1]
150
+
151
+ # Process user history message (could be text or markdown image path)
152
+ if user_hist_msg_content:
153
+ # Check if it's an image markdown from history
154
+ if isinstance(user_hist_msg_content, str) and user_hist_msg_content.startswith("![Image]("):
155
+ hist_img_path = user_hist_msg_content.replace("![Image](", "").replace(")", "")
156
+ encoded_hist_image = encode_image(hist_img_path)
157
+ if encoded_hist_image:
158
+ messages_for_api.append({"role": "user", "content": [
159
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_hist_image}"}}
160
+ ]})
161
+ else: # if image encoding fails, maybe send a placeholder or skip
162
+ messages_for_api.append({"role": "user", "content": "[Image could not be loaded]"})
163
+ else: # It's a text message from history
164
+ messages_for_api.append({"role": "user", "content": user_hist_msg_content})
165
+ print(f"Added user message to API context from history (type: {type(user_hist_msg_content)})")
166
+
167
+ if assistant_hist_msg:
168
+ messages_for_api.append({"role": "assistant", "content": assistant_hist_msg})
169
+ print(f"Added assistant message to API context from history: {assistant_hist_msg}")
170
+
171
+ # Append the latest user message (which now includes images if any for this turn)
172
+ messages_for_api.append({"role": "user", "content": final_user_content_for_api})
173
+ print(f"Latest user message appended to API context (content type: {type(final_user_content_for_api)})")
 
 
 
 
 
 
 
 
 
 
174
 
 
 
 
175
 
176
  # Determine which model to use, prioritizing custom_model if provided
177
  model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
178
  print(f"Model selected for inference: {model_to_use}")
179
 
180
  # Start with an empty string to build the response as tokens stream in
181
+ response_text = ""
182
  print(f"Sending request to {provider} provider.")
183
 
184
  # Prepare parameters for the chat completion request
 
197
  # Create a generator for the streaming response
198
  stream = client.chat_completion(
199
  model=model_to_use,
200
+ messages=messages_for_api, # Use the correctly formatted messages
201
  stream=True,
202
  **parameters
203
  )
 
209
  if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
210
  # Extract the content from the response
211
  if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
212
+ token_text_chunk = chunk.choices[0].delta.content
213
+ if token_text_chunk:
214
+ print(token_text_chunk, end="", flush=True)
215
+ response_text += token_text_chunk
216
+ yield response_text
217
 
218
  print()
219
  except Exception as e:
220
  print(f"Error during inference: {e}")
221
+ response_text += f"\nError: {str(e)}"
222
+ yield response_text
223
 
224
  print("Completed response generation.")
225
 
 
306
 
307
  # Provider selection
308
  providers_list = [
309
+ "hf-inference", "cerebras", "together", "sambanova",
310
+ "novita", "cohere", "fireworks-ai", "hyperbolic", "nebius",
 
 
 
 
 
 
 
311
  ]
312
 
313
  provider_radio = gr.Radio(
314
+ choices=providers_list, value="hf-inference", label="Inference Provider",
 
 
315
  )
316
 
 
317
  byok_textbox = gr.Textbox(
318
+ value="", label="BYOK (Bring Your Own Key)",
 
319
  info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
320
+ placeholder="Enter your Hugging Face API token", type="password"
 
321
  )
322
 
 
323
  custom_model_box = gr.Textbox(
324
+ value="", label="Custom Model",
 
325
  info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
326
  placeholder="meta-llama/Llama-3.3-70B-Instruct"
327
  )
328
 
 
329
  model_search_box = gr.Textbox(
330
+ label="Filter Models", placeholder="Search for a featured model...", lines=1
 
 
331
  )
332
 
 
333
  models_list = [
334
+ "meta-llama/Llama-3.2-11B-Vision-Instruct", "meta-llama/Llama-3.3-70B-Instruct",
335
+ "meta-llama/Llama-3.1-70B-Instruct", "meta-llama/Llama-3.0-70B-Instruct",
336
+ "meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.2-1B-Instruct",
337
+ "meta-llama/Llama-3.1-8B-Instruct", "NousResearch/Hermes-3-Llama-3.1-8B",
338
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "mistralai/Mistral-Nemo-Instruct-2407",
339
+ "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3",
340
+ "mistralai/Mistral-7B-Instruct-v0.2", "Qwen/Qwen3-235B-A22B", "Qwen/Qwen3-32B",
341
+ "Qwen/Qwen2.5-72B-Instruct", "Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-0.5B-Instruct",
342
+ "Qwen/QwQ-32B", "Qwen/Qwen2.5-Coder-32B-Instruct", "microsoft/Phi-3.5-mini-instruct",
343
+ "microsoft/Phi-3-mini-128k-instruct", "microsoft/Phi-3-mini-4k-instruct",
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  ]
345
 
346
  featured_model_radio = gr.Radio(
347
+ label="Select a model below", choices=models_list,
348
+ value="meta-llama/Llama-3.2-11B-Vision-Instruct", interactive=True
 
 
349
  )
350
 
351
  gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
352
 
353
+ # MCP Support Information
354
+ with gr.Accordion("MCP Support (for AI Tool Use)", open=False):
355
  gr.Markdown("""
356
+ ### MCP (Model Context Protocol) Enabled
 
 
 
 
357
 
358
+ This application's text and image generation capability can be used as a tool by MCP-compatible AI models
359
+ (e.g., certain versions of Claude, Cursor, or custom MCP clients like Tiny Agents).
360
 
361
+ The primary interaction function (`bot`) is exposed as an MCP tool.
362
+ Provide the conversation history and other parameters as arguments to the tool.
363
+ For multimodal input, ensure the history correctly references image data that the server can access
364
+ (Gradio's MCP layer may handle base64 to file conversion if the tool schema indicates file inputs).
365
+
366
+ **MCP Server URL:**
367
+ `https://YOUR_SPACE_NAME-serverless-textgen-hub.hf.space/gradio_api/mcp/sse`
368
+ *(Replace `YOUR_SPACE_NAME` with your Hugging Face username or organization if this is a user space,
369
+ or the full space name if different. You can find this URL in your browser once the Space is running.)*
370
 
371
+ **Example MCP Client Configuration (`mcp.json` style):**
372
  ```json
373
  {
374
+ "servers": [
375
+ {
376
+ "name": "ServerlessTextGenHubTool",
377
+ "transport": {
378
+ "type": "sse",
379
+ "url": "https://YOUR_SPACE_NAME-serverless-textgen-hub.hf.space/gradio_api/mcp/sse"
380
+ }
381
  }
382
+ ]
383
  }
384
  ```
385
+ **Note on Tool Schema:** The exact schema of the MCP tool will be determined by Gradio based on the `bot` function's
386
+ signature (including type hints) and the Gradio components it interacts with.
387
+ Refer to the `/gradio_api/mcp/schema` endpoint of your running application for the precise tool definition.
388
+ For image inputs via MCP, clients should ideally send image URLs or base64 encoded data if the tool's schema supports file types.
389
+ Gradio's MCP layer attempts to handle file data conversions.
 
 
 
390
  """)
391
 
392
  # Chat history state
393
+ chat_history = gr.State([]) # Not directly used, chatbot component handles its state internally
394
 
395
  # Function to filter models
396
+ def filter_models(search_term: str):
397
  print(f"Filtering models with search term: {search_term}")
398
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
399
  print(f"Filtered models: {filtered}")
400
+ return gr.update(choices=filtered if filtered else models_list, value=featured_model_radio.value if filtered and featured_model_radio.value in filtered else (filtered[0] if filtered else models_list[0]))
401
 
402
  # Function to set custom model from radio
403
+ def set_custom_model_from_radio(selected: str):
404
  print(f"Featured model selected: {selected}")
405
+ # This function now directly returns the selected model to update custom_model_box
406
+ # If custom_model_box is meant to override, this keeps them in sync until user types in custom_model_box
407
  return selected
408
 
409
+
410
+ # Function for the chat interface (user's turn)
411
+ def user(user_message_input: dict, history: list[list[str | None]]):
412
+ print(f"User input (raw from MultimodalTextbox): {user_message_input}")
 
 
 
413
 
414
+ text_content = user_message_input.get("text", "").strip()
415
+ files = user_message_input.get("files", []) # List of temp file paths
416
 
417
+ print(f"Parsed text content: '{text_content}'")
418
+ print(f"Parsed files: {files}")
419
 
420
+ # Append text message to history if present
 
 
 
 
421
  if text_content:
 
422
  history.append([text_content, None])
423
+ print(f"Appended text to history: {text_content}")
424
+
425
+ # Append image messages to history
426
  if files:
427
  for file_path in files:
428
+ if file_path and isinstance(file_path, str): # file_path is a temp path from Gradio
429
+ # Embed image as markdown link in history for display
430
+ # The actual file path is used by `respond` via `bot`
431
+ history.append([f"![Image]({file_path})", None])
432
+ print(f"Appended image to history: {file_path}")
433
+
434
+ # If neither text nor files, don't add an empty turn
435
+ if not text_content and not files:
436
+ print("Empty input, no change to history.")
437
+ return history # Return current history as is
438
+
439
  return history
440
 
441
  # Define bot response function
442
+ def bot(
443
+ history: list[list[str | None]], # Type hint for history
444
+ system_msg: str,
445
+ max_tokens: int,
446
+ temperature: float,
447
+ top_p: float,
448
+ freq_penalty: float,
449
+ seed: int,
450
+ provider: str,
451
+ api_key: str,
452
+ custom_model: str,
453
+ # model_search_term: str, # This argument comes from model_search_box
454
+ selected_model: str # This argument comes from featured_model_radio
455
+ ):
456
+ """
457
+ Processes user input from the chat history, calls the language model via the 'respond'
458
+ function, and streams the bot's response back to update the chat history.
459
+ This function is intended to be exposed as an MCP tool.
460
+
461
+ Args:
462
+ history (list[list[str | None]]): The conversation history.
463
+ Each item is [user_message, bot_message].
464
+ User messages can be text or markdown image paths like "![Image](/tmp/path.jpg)".
465
+ system_msg (str): The system prompt.
466
+ max_tokens (int): Maximum number of tokens to generate.
467
+ temperature (float): Sampling temperature for generation.
468
+ top_p (float): Top-P (nucleus) sampling probability.
469
+ freq_penalty (float): Frequency penalty for generation.
470
+ seed (int): Random seed for generation (-1 for random).
471
+ provider (str): The inference provider to use.
472
+ api_key (str): Custom API key, if provided by the user.
473
+ custom_model (str): Custom model path/ID. If empty, selected_model is used.
474
+ selected_model (str): The model selected from the featured list.
475
+
476
+ Yields:
477
+ list[list[str | None]]: The updated chat history with the bot's streaming response.
478
+ """
479
+ print(f"Bot function called. History: {history}")
480
+ if not history or history[-1][0] is None: # Check if last user message is None
481
+ print("No user message in the last history turn to process.")
482
+ # yield history # removed to avoid issues with Gradio expecting a specific sequence
483
+ return # Or raise an error, or handle appropriately
484
+
485
+ # The last user message is history[-1][0]
486
+ # The bot's response will go into history[-1][1]
487
+
488
+ user_turn_content = history[-1][0]
489
+ current_turn_text_message = ""
490
+ current_turn_image_paths = []
491
+
492
+ # Check if the last user message in history is an image markdown
493
+ if isinstance(user_turn_content, str) and user_turn_content.startswith("![Image]("):
494
+ # This is an image message
495
+ img_path = user_turn_content.replace("![Image](", "").replace(")", "")
496
+ current_turn_image_paths.append(img_path)
497
+ # Check if there was a text message immediately preceding this image in the same "turn"
498
+ # This requires looking at how `user` function structures history.
499
+ # `user` adds text and images as separate entries in history.
500
+ # So, if history[-1][0] is an image, history[-2][0] might be related text IF it was part of the same multimodal input.
501
+ # This logic becomes complex. Simpler: assume each history entry is distinct.
502
+ # For MCP, it's better if the client structures the call to `bot` clearly.
503
+ print(f"Processing image from history: {img_path}")
504
+ elif isinstance(user_turn_content, str):
505
+ # This is a text message
506
+ current_turn_text_message = user_turn_content
507
+ print(f"Processing text from history: {current_turn_text_message}")
508
+ else:
509
+ print(f"Unexpected content in history user turn: {user_turn_content}")
510
+ # yield history # removed
511
  return
512
 
 
 
 
 
 
 
513
 
514
+ history[-1][1] = "" # Initialize bot response field for the current turn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
 
516
+ # Call the 'respond' function.
517
+ # History for 'respond' should be prior turns, not including the current user message being processed.
518
+ history_for_respond = history[:-1]
519
+
520
  for response_chunk in respond(
521
+ message=current_turn_text_message, # Text part of current turn
522
+ image_files=current_turn_image_paths, # Image paths of current turn
523
+ history=history_for_respond, # History up to the previous turn
524
  system_message=system_msg,
525
  max_tokens=max_tokens,
526
  temperature=temperature,
 
530
  provider=provider,
531
  custom_api_key=api_key,
532
  custom_model=custom_model,
533
+ model_search_term="", # Not directly used by respond's core logic here
534
  selected_model=selected_model
535
  ):
536
+ history[-1][1] = response_chunk # Update bot response in the current turn
537
  yield history
538
 
 
539
  # Event handlers
540
+ # The parameters to `bot` must match the order of inputs list
541
  msg.submit(
542
  user,
543
  [msg, chatbot],
 
547
  bot,
548
  [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
549
  frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
550
+ # model_search_box, # Removed from bot inputs as it's UI only
551
+ featured_model_radio],
552
  [chatbot]
553
  ).then(
554
+ lambda: {"text": "", "files": []},
555
  None,
556
  [msg]
557
  )
558
 
559
  model_search_box.change(
560
+ fn=filter_models, inputs=model_search_box, outputs=featured_model_radio
 
 
561
  )
562
  print("Model search box change event linked.")
563
 
564
  featured_model_radio.change(
565
+ fn=set_custom_model_from_radio, inputs=featured_model_radio, outputs=custom_model_box
 
 
566
  )
567
  print("Featured model radio button change event linked.")
568
 
569
  byok_textbox.change(
570
+ fn=validate_provider, inputs=[byok_textbox, provider_radio], outputs=provider_radio
 
 
571
  )
572
  print("BYOK textbox change event linked.")
573
 
574
  provider_radio.change(
575
+ fn=validate_provider, inputs=[byok_textbox, provider_radio], outputs=provider_radio
 
 
576
  )
577
  print("Provider radio button change event linked.")
578
 
 
580
 
581
  if __name__ == "__main__":
582
  print("Launching the demo application.")
583
+ # Added mcp_server=True
584
+ demo.launch(show_api=True, mcp_server=True)