Nymbo commited on
Commit
7c1212e
·
verified ·
1 Parent(s): 02ec239

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -249
app.py CHANGED
@@ -3,7 +3,6 @@ from huggingface_hub import InferenceClient
3
  import os
4
  import json
5
  import base64
6
- import requests
7
  from PIL import Image
8
  import io
9
 
@@ -16,7 +15,7 @@ def encode_image(image_path):
16
  print("No image path provided")
17
  return None
18
 
19
- try:
20
  print(f"Encoding image from path: {image_path}")
21
 
22
  # If it's already a PIL Image
@@ -40,61 +39,9 @@ def encode_image(image_path):
40
  print(f"Error encoding image: {e}")
41
  return None
42
 
43
- # Function to convert text to speech using Kokoro MCP server
44
- def text_to_speech(text, mcp_server_url, speed=1.0):
45
- """
46
- Convert text to speech using an MCP server with TTS capabilities.
47
-
48
- Args:
49
- text (str): The text to convert to speech
50
- mcp_server_url (str): URL of the MCP server
51
- speed (float): Speech speed multiplier
52
-
53
- Returns:
54
- str: Base64 encoded audio data
55
- """
56
- if not text or not mcp_server_url:
57
- return None
58
-
59
- try:
60
- # Create JSON-RPC request for the TTS tool
61
- payload = {
62
- "jsonrpc": "2.0",
63
- "id": 1,
64
- "method": "tools/call",
65
- "params": {
66
- "name": "text_to_audio_b64",
67
- "arguments": {
68
- "text": text,
69
- "speed": speed
70
- }
71
- }
72
- }
73
-
74
- # Send request to MCP server
75
- response = requests.post(
76
- mcp_server_url,
77
- json=payload,
78
- headers={"Content-Type": "application/json"}
79
- )
80
-
81
- if response.status_code == 200:
82
- result = response.json()
83
- if "result" in result:
84
- return result["result"]
85
- else:
86
- print(f"Error in MCP server response: {result.get('error', 'Unknown error')}")
87
- else:
88
- print(f"Error calling MCP server: {response.status_code}")
89
-
90
- except Exception as e:
91
- print(f"Error in text_to_speech: {e}")
92
-
93
- return None
94
-
95
  def respond(
96
  message,
97
- image_files,
98
  history: list[tuple[str, str]],
99
  system_message,
100
  max_tokens,
@@ -106,9 +53,7 @@ def respond(
106
  custom_api_key,
107
  custom_model,
108
  model_search_term,
109
- selected_model,
110
- mcp_server_url,
111
- enable_tts
112
  ):
113
  print(f"Received message: {message}")
114
  print(f"Received {len(image_files) if image_files else 0} images")
@@ -121,8 +66,6 @@ def respond(
121
  print(f"Selected model (custom_model): {custom_model}")
122
  print(f"Model search term: {model_search_term}")
123
  print(f"Selected model from radio: {selected_model}")
124
- print(f"MCP Server URL: {mcp_server_url}")
125
- print(f"TTS Enabled: {enable_tts}")
126
 
127
  # Determine which token to use
128
  token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
@@ -266,19 +209,6 @@ def respond(
266
  response += f"\nError: {str(e)}"
267
  yield response
268
 
269
- # If TTS is enabled and MCP server URL is provided, generate speech from the response
270
- if enable_tts and mcp_server_url and response:
271
- try:
272
- print("Generating speech from response using MCP server...")
273
- audio_b64 = text_to_speech(response, mcp_server_url)
274
- if audio_b64:
275
- # Add a hidden audio tag with the audio data
276
- audio_html = f'<audio id="tts-audio" autoplay style="display:none"><source src="data:audio/wav;base64,{audio_b64}" type="audio/wav"></audio>'
277
- response += f"\n\n{audio_html}"
278
- yield response
279
- except Exception as e:
280
- print(f"Error generating speech: {e}")
281
-
282
  print("Completed response generation.")
283
 
284
  # Function to validate provider selection based on BYOK
@@ -287,30 +217,13 @@ def validate_provider(api_key, provider):
287
  return gr.update(value="hf-inference")
288
  return gr.update(value=provider)
289
 
290
- # Function to validate MCP Server URL
291
- def validate_mcp_url(url):
292
- if not url:
293
- return gr.update(value="")
294
-
295
- if not url.startswith(("http://", "https://")):
296
- url = "https://" + url
297
-
298
- # If gradio_api/mcp/sse is not in the URL, add it
299
- if not url.endswith("/gradio_api/mcp/sse"):
300
- if url.endswith("/"):
301
- url = url + "gradio_api/mcp/sse"
302
- else:
303
- url = url + "/gradio_api/mcp/sse"
304
-
305
- return gr.update(value=url)
306
-
307
  # GRADIO UI
308
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
309
  # Create the chatbot component
310
  chatbot = gr.Chatbot(
311
  height=600,
312
  show_copy_button=True,
313
- placeholder="Select a model and begin chatting. Now supports multiple inference providers, multimodal inputs, and MCP servers",
314
  layout="panel"
315
  )
316
  print("Chatbot interface created.")
@@ -423,7 +336,6 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
423
  )
424
 
425
  # Featured models list
426
- # Updated to include multimodal models
427
  models_list = [
428
  "meta-llama/Llama-3.2-11B-Vision-Instruct",
429
  "meta-llama/Llama-3.3-70B-Instruct",
@@ -458,40 +370,42 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
458
  )
459
 
460
  gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
 
462
- # Add MCP Server configuration
463
- with gr.Accordion("MCP Server Settings", open=False):
464
- gr.Markdown("""
465
- # Model Context Protocol (MCP) Integration
466
-
467
- Connect to MCP servers to add advanced capabilities like Text-to-Speech to your chat.
468
-
469
- [Learn more about MCP](https://modelcontextprotocol.io/)
470
- """)
471
-
472
- mcp_server_url = gr.Textbox(
473
- label="MCP Server URL",
474
- placeholder="https://your-tts-server.hf.space/gradio_api/mcp/sse",
475
- info="URL to an MCP-compatible server (e.g., Kokoro TTS)"
476
- )
477
-
478
- enable_tts = gr.Checkbox(
479
- label="Enable Text-to-Speech",
480
- value=False,
481
- info="When enabled, AI responses will be read aloud using the MCP server"
482
- )
483
-
484
- # Example servers
485
- gr.Markdown("""
486
- ### Example MCP Servers
487
-
488
- Try these MCP servers for additional capabilities:
489
-
490
- - **Kokoro TTS**: `https://fdaudens-kokoro-mcp.hf.space/gradio_api/mcp/sse`
491
- - **More examples coming soon**
492
-
493
- To use, copy the URL above and paste it into the MCP Server URL field.
494
- """)
495
 
496
  # Chat history state
497
  chat_history = gr.State([])
@@ -510,134 +424,99 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
510
 
511
  # Function for the chat interface
512
  def user(user_message, history):
513
- # Debug logging for troubleshooting
514
  print(f"User message received: {user_message}")
515
 
516
- # Skip if message is empty (no text and no files)
517
  if not user_message or (not user_message.get("text") and not user_message.get("files")):
518
  print("Empty message, skipping")
519
- return history
520
 
521
- # Prepare multimodal message format
522
  text_content = user_message.get("text", "").strip()
523
  files = user_message.get("files", [])
524
 
525
  print(f"Text content: {text_content}")
526
  print(f"Files: {files}")
527
 
528
- # If both text and files are empty, skip
529
- if not text_content and not files:
530
  print("No content to display")
531
  return history
 
 
 
 
 
532
 
533
- # Add message with images to history
534
- if files and len(files) > 0:
535
- # Add text message first if it exists
536
- if text_content:
537
- # Add a separate text message
538
- print(f"Adding text message: {text_content}")
539
- history.append([text_content, None])
540
-
541
- # Then add each image file separately
542
  for file_path in files:
543
- if file_path and isinstance(file_path, str):
544
  print(f"Adding image: {file_path}")
545
- # Add image as a separate message with no text
546
- history.append([f"![Image]({file_path})", None])
547
-
548
- return history
549
- else:
550
- # For text-only messages
551
- print(f"Adding text-only message: {text_content}")
552
- history.append([text_content, None])
553
- return history
554
 
555
  # Define bot response function
556
- def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model, mcp_url, tts_enabled):
557
- # Check if history is valid
558
- if not history or len(history) == 0:
559
- print("No history to process")
560
- return history
561
-
562
- # Get the most recent message and detect if it's an image
563
- user_message = history[-1][0]
564
- print(f"Processing user message: {user_message}")
565
-
566
- is_image = False
567
- image_path = None
568
- text_content = user_message
569
-
570
- # Check if this is an image message (marked with ![Image])
571
- if isinstance(user_message, str) and user_message.startswith("![Image]("):
572
- is_image = True
573
- # Extract image path from markdown format ![Image](path)
574
- image_path = user_message.replace("![Image](", "").replace(")", "")
575
- print(f"Image detected: {image_path}")
576
- text_content = "" # No text for image-only messages
577
-
578
- # Look back for text context if this is an image
579
- text_context = ""
580
- if is_image and len(history) > 1:
581
- # Use the previous message as context if it's text
582
- prev_message = history[-2][0]
583
- if isinstance(prev_message, str) and not prev_message.startswith("![Image]("):
584
- text_context = prev_message
585
- print(f"Using text context from previous message: {text_context}")
586
-
587
- # Process message through respond function
588
- history[-1][1] = ""
589
-
590
- # Validate and format MCP server URL
591
- if mcp_url:
592
- mcp_url = validate_mcp_url(mcp_url)["value"]
593
-
594
- # Use either the image or text for the API
595
- if is_image:
596
- # For image messages
597
- for response in respond(
598
- text_context, # Text context from previous message if any
599
- [image_path], # Current image
600
- history[:-1], # Previous history
601
- system_msg,
602
- max_tokens,
603
- temperature,
604
- top_p,
605
- freq_penalty,
606
- seed,
607
- provider,
608
- api_key,
609
- custom_model,
610
- search_term,
611
- selected_model,
612
- mcp_url,
613
- tts_enabled
614
- ):
615
- history[-1][1] = response
616
- yield history
617
- else:
618
- # For text-only messages
619
- for response in respond(
620
- text_content, # Text message
621
- None, # No image
622
- history[:-1], # Previous history
623
- system_msg,
624
- max_tokens,
625
- temperature,
626
- top_p,
627
- freq_penalty,
628
- seed,
629
- provider,
630
- api_key,
631
- custom_model,
632
- search_term,
633
- selected_model,
634
- mcp_url,
635
- tts_enabled
636
- ):
637
- history[-1][1] = response
638
- yield history
639
-
640
- # Event handlers - only using the MultimodalTextbox's built-in submit functionality
641
  msg.submit(
642
  user,
643
  [msg, chatbot],
@@ -647,7 +526,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
647
  bot,
648
  [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
649
  frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
650
- model_search_box, featured_model_radio, mcp_server_url, enable_tts],
651
  [chatbot]
652
  ).then(
653
  lambda: {"text": "", "files": []}, # Clear inputs after submission
@@ -655,7 +534,6 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
655
  [msg]
656
  )
657
 
658
- # Connect the model filter to update the radio choices
659
  model_search_box.change(
660
  fn=filter_models,
661
  inputs=model_search_box,
@@ -663,7 +541,6 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
663
  )
664
  print("Model search box change event linked.")
665
 
666
- # Connect the featured model radio to update the custom model box
667
  featured_model_radio.change(
668
  fn=set_custom_model_from_radio,
669
  inputs=featured_model_radio,
@@ -671,7 +548,6 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
671
  )
672
  print("Featured model radio button change event linked.")
673
 
674
- # Connect the BYOK textbox to validate provider selection
675
  byok_textbox.change(
676
  fn=validate_provider,
677
  inputs=[byok_textbox, provider_radio],
@@ -679,24 +555,15 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
679
  )
680
  print("BYOK textbox change event linked.")
681
 
682
- # Also validate provider when the radio changes to ensure consistency
683
  provider_radio.change(
684
  fn=validate_provider,
685
  inputs=[byok_textbox, provider_radio],
686
  outputs=provider_radio
687
  )
688
  print("Provider radio button change event linked.")
689
-
690
- # Connect the MCP server URL field to validate URL
691
- mcp_server_url.change(
692
- fn=validate_mcp_url,
693
- inputs=mcp_server_url,
694
- outputs=mcp_server_url
695
- )
696
- print("MCP server URL change event linked.")
697
 
698
  print("Gradio interface initialized.")
699
 
700
  if __name__ == "__main__":
701
  print("Launching the demo application.")
702
- demo.launch(show_api=True, mcp_server=True) # Enable MCP server for this app too
 
3
  import os
4
  import json
5
  import base64
 
6
  from PIL import Image
7
  import io
8
 
 
15
  print("No image path provided")
16
  return None
17
 
18
+ try
19
  print(f"Encoding image from path: {image_path}")
20
 
21
  # If it's already a PIL Image
 
39
  print(f"Error encoding image: {e}")
40
  return None
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def respond(
43
  message,
44
+ image_files, # Changed parameter name and structure
45
  history: list[tuple[str, str]],
46
  system_message,
47
  max_tokens,
 
53
  custom_api_key,
54
  custom_model,
55
  model_search_term,
56
+ selected_model
 
 
57
  ):
58
  print(f"Received message: {message}")
59
  print(f"Received {len(image_files) if image_files else 0} images")
 
66
  print(f"Selected model (custom_model): {custom_model}")
67
  print(f"Model search term: {model_search_term}")
68
  print(f"Selected model from radio: {selected_model}")
 
 
69
 
70
  # Determine which token to use
71
  token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
 
209
  response += f"\nError: {str(e)}"
210
  yield response
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  print("Completed response generation.")
213
 
214
  # Function to validate provider selection based on BYOK
 
217
  return gr.update(value="hf-inference")
218
  return gr.update(value=provider)
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  # GRADIO UI
221
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
222
  # Create the chatbot component
223
  chatbot = gr.Chatbot(
224
  height=600,
225
  show_copy_button=True,
226
+ placeholder="Select a model and begin chatting. Now supports multiple inference providers and multimodal inputs",
227
  layout="panel"
228
  )
229
  print("Chatbot interface created.")
 
336
  )
337
 
338
  # Featured models list
 
339
  models_list = [
340
  "meta-llama/Llama-3.2-11B-Vision-Instruct",
341
  "meta-llama/Llama-3.3-70B-Instruct",
 
370
  )
371
 
372
  gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
373
+
374
+ # MCP Support Information Accordion
375
+ with gr.Accordion("MCP Support (for LLMs)", open=False):
376
+ gr.Markdown("""
377
+ ### Model Context Protocol (MCP) Support
378
+
379
+ This application can function as an MCP Server, allowing compatible AI models and agents (like Claude Desktop or custom MCP clients) to use its text and image generation capabilities as a tool.
380
+
381
+ When MCP is enabled, Gradio automatically exposes the relevant functions (likely based on the `bot` function in this app) as MCP tools.
382
+
383
+ **To connect an MCP client to this server:**
384
+
385
+ 1. Ensure this Gradio application is running.
386
+ 2. Use the following URL for the MCP server in your client configuration:
387
+ - If running locally: `http://127.0.0.1:7860/gradio_api/mcp/sse`
388
+ - If deployed on Hugging Face Spaces: `https://YOUR_USERNAME-YOUR_SPACENAME.hf.space/gradio_api/mcp/sse` (replace with your actual Space URL)
389
 
390
+ **Example MCP Client Configuration (`mcp.json` or similar):**
391
+ ```json
392
+ {
393
+ "mcpServers": {
394
+ "serverlessTextgenHub": {
395
+ "url": "http://127.0.0.1:7860/gradio_api/mcp/sse"
396
+ }
397
+ }
398
+ }
399
+ ```
400
+
401
+ **Tool Parameters:**
402
+ The exposed MCP tool will likely have parameters corresponding to the inputs of the `bot` function (e.g., `history`, `system_msg`, `max_tokens`, `temperature`, `model`, etc.).
403
+
404
+ * **Important for `history` parameter:** For image inputs, the MCP client might need to format the `history` to include image references in a way the `bot` function can parse (e.g., markdown links `![Image](URL_or_base64_data_uri)` within the history's message part).
405
+ * It's highly recommended to inspect the MCP schema for this server to understand the exact tool names, descriptions, and input/output schemas. You can usually find this at: `http://127.0.0.1:7860/gradio_api/mcp/schema` (or the equivalent URL for your deployed Space).
406
+
407
+ This allows for powerful integrations where an AI agent can programmatically request text or multimodal generations from this Serverless-TextGen-Hub.
408
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
 
410
  # Chat history state
411
  chat_history = gr.State([])
 
424
 
425
  # Function for the chat interface
426
  def user(user_message, history):
 
427
  print(f"User message received: {user_message}")
428
 
 
429
  if not user_message or (not user_message.get("text") and not user_message.get("files")):
430
  print("Empty message, skipping")
431
+ return history # Return immediately if message is empty
432
 
 
433
  text_content = user_message.get("text", "").strip()
434
  files = user_message.get("files", [])
435
 
436
  print(f"Text content: {text_content}")
437
  print(f"Files: {files}")
438
 
439
+ if not text_content and not files: # Check again after stripping text
 
440
  print("No content to display")
441
  return history
442
+
443
+ # Append text message first if it exists and is not empty
444
+ if text_content:
445
+ print(f"Adding text message: {text_content}")
446
+ history.append([text_content, None])
447
 
448
+ # Then append each image file as a separate message
449
+ if files:
 
 
 
 
 
 
 
450
  for file_path in files:
451
+ if file_path and isinstance(file_path, str): # Ensure file_path is valid
452
  print(f"Adding image: {file_path}")
453
+ history.append([f"![Image]({file_path})", None]) # Image as a new message
454
+
455
+ return history
 
 
 
 
 
 
456
 
457
  # Define bot response function
458
+ def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
459
+ if not history or not history[-1][0]: # Check if history or last message is empty
460
+ print("No history or empty last message to process for bot")
461
+ # Yield an empty update or the history itself to avoid errors
462
+ # depending on how Gradio handles empty yields.
463
+ # For safety, just return the history if it's in a bad state.
464
+ yield history
465
+ return
466
+
467
+ user_message_content = history[-1][0] # This is the user's latest message (text or image markdown)
468
+ print(f"Bot processing user message content: {user_message_content}")
469
+
470
+ # Determine if the current turn is primarily about an image or text
471
+ # This logic assumes images are added as separate history entries like "![Image](path)"
472
+ # and text prompts might precede them or be separate.
473
+
474
+ current_message_text_for_api = ""
475
+ current_image_files_for_api = []
476
+
477
+ # Check if the last entry is an image
478
+ if isinstance(user_message_content, str) and user_message_content.startswith("![Image]("):
479
+ image_path = user_message_content.replace("![Image](", "").replace(")", "")
480
+ current_image_files_for_api.append(image_path)
481
+ print(f"Bot identified image in last history entry: {image_path}")
482
+ # If it's an image, check the second to last entry for a text prompt
483
+ if len(history) > 1:
484
+ prev_content = history[-2][0]
485
+ if isinstance(prev_content, str) and not prev_content.startswith("![Image]("):
486
+ current_message_text_for_api = prev_content
487
+ print(f"Bot identified preceding text for image: {current_message_text_for_api}")
488
+ else: # Last entry is text
489
+ current_message_text_for_api = user_message_content
490
+ print(f"Bot identified text in last history entry: {current_message_text_for_api}")
491
+
492
+ # The history sent to `respond` should not include the current turn's input,
493
+ # as `respond` will add `message` (current_message_text_for_api) to its internal `messages` list.
494
+ # If an image is present, it's passed via `image_files`.
495
+ history_for_respond_func = history[:-1] # Pass history *before* the current turn
496
+
497
+ history[-1][1] = "" # Initialize assistant's response for the current turn
498
+
499
+ for response_chunk in respond(
500
+ message=current_message_text_for_api,
501
+ image_files=current_image_files_for_api,
502
+ history=history_for_respond_func, # Pass prior history
503
+ system_message=system_msg,
504
+ max_tokens=max_tokens,
505
+ temperature=temperature,
506
+ top_p=top_p,
507
+ frequency_penalty=freq_penalty,
508
+ seed=seed,
509
+ provider=provider,
510
+ custom_api_key=api_key,
511
+ custom_model=custom_model,
512
+ model_search_term=search_term, # Though these two might not be directly used by respond if model is fixed
513
+ selected_model=selected_model
514
+ ):
515
+ history[-1][1] = response_chunk
516
+ yield history
517
+
518
+
519
+ # Event handlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
  msg.submit(
521
  user,
522
  [msg, chatbot],
 
526
  bot,
527
  [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
528
  frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
529
+ model_search_box, featured_model_radio],
530
  [chatbot]
531
  ).then(
532
  lambda: {"text": "", "files": []}, # Clear inputs after submission
 
534
  [msg]
535
  )
536
 
 
537
  model_search_box.change(
538
  fn=filter_models,
539
  inputs=model_search_box,
 
541
  )
542
  print("Model search box change event linked.")
543
 
 
544
  featured_model_radio.change(
545
  fn=set_custom_model_from_radio,
546
  inputs=featured_model_radio,
 
548
  )
549
  print("Featured model radio button change event linked.")
550
 
 
551
  byok_textbox.change(
552
  fn=validate_provider,
553
  inputs=[byok_textbox, provider_radio],
 
555
  )
556
  print("BYOK textbox change event linked.")
557
 
 
558
  provider_radio.change(
559
  fn=validate_provider,
560
  inputs=[byok_textbox, provider_radio],
561
  outputs=provider_radio
562
  )
563
  print("Provider radio button change event linked.")
 
 
 
 
 
 
 
 
564
 
565
  print("Gradio interface initialized.")
566
 
567
  if __name__ == "__main__":
568
  print("Launching the demo application.")
569
+ demo.launch(show_api=True, mcp_server=True) # MCP SERVER ENABLED HERE