Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on May 18

Commit

7c1212e

verified ·

1 Parent(s): 02ec239

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -249

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ from huggingface_hub import InferenceClient
 import os
 import json
 import base64
-import requests
 from PIL import Image
 import io
@@ -16,7 +15,7 @@ def encode_image(image_path):
         print("No image path provided")
         return None
-    try:
         print(f"Encoding image from path: {image_path}")
         # If it's already a PIL Image
@@ -40,61 +39,9 @@ def encode_image(image_path):
         print(f"Error encoding image: {e}")
         return None
-# Function to convert text to speech using Kokoro MCP server
-def text_to_speech(text, mcp_server_url, speed=1.0):
-    """
-    Convert text to speech using an MCP server with TTS capabilities.
-    Args:
-        text (str): The text to convert to speech
-        mcp_server_url (str): URL of the MCP server
-        speed (float): Speech speed multiplier
-    Returns:
-        str: Base64 encoded audio data
-    """
-    if not text or not mcp_server_url:
-        return None
-    try:
-        # Create JSON-RPC request for the TTS tool
-        payload = {
-            "jsonrpc": "2.0",
-            "id": 1,
-            "method": "tools/call",
-            "params": {
-                "name": "text_to_audio_b64",
-                "arguments": {
-                    "text": text,
-                    "speed": speed
-                }
-            }
-        }
-        # Send request to MCP server
-        response = requests.post(
-            mcp_server_url,
-            json=payload,
-            headers={"Content-Type": "application/json"}
-        )
-        if response.status_code == 200:
-            result = response.json()
-            if "result" in result:
-                return result["result"]
-            else:
-                print(f"Error in MCP server response: {result.get('error', 'Unknown error')}")
-        else:
-            print(f"Error calling MCP server: {response.status_code}")
-    except Exception as e:
-        print(f"Error in text_to_speech: {e}")
-    return None
 def respond(
     message,
-    image_files,
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
@@ -106,9 +53,7 @@ def respond(
     custom_api_key,
     custom_model,
     model_search_term,
-    selected_model,
-    mcp_server_url,
-    enable_tts
 ):
     print(f"Received message: {message}")
     print(f"Received {len(image_files) if image_files else 0} images")
@@ -121,8 +66,6 @@ def respond(
     print(f"Selected model (custom_model): {custom_model}")
     print(f"Model search term: {model_search_term}")
     print(f"Selected model from radio: {selected_model}")
-    print(f"MCP Server URL: {mcp_server_url}")
-    print(f"TTS Enabled: {enable_tts}")
     # Determine which token to use
     token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
@@ -266,19 +209,6 @@ def respond(
         response += f"\nError: {str(e)}"
         yield response
-    # If TTS is enabled and MCP server URL is provided, generate speech from the response
-    if enable_tts and mcp_server_url and response:
-        try:
-            print("Generating speech from response using MCP server...")
-            audio_b64 = text_to_speech(response, mcp_server_url)
-            if audio_b64:
-                # Add a hidden audio tag with the audio data
-                audio_html = f'<audio id="tts-audio" autoplay style="display:none"><source src="data:audio/wav;base64,{audio_b64}" type="audio/wav"></audio>'
-                response += f"\n\n{audio_html}"
-                yield response
-        except Exception as e:
-            print(f"Error generating speech: {e}")
     print("Completed response generation.")
 # Function to validate provider selection based on BYOK
@@ -287,30 +217,13 @@ def validate_provider(api_key, provider):
         return gr.update(value="hf-inference")
     return gr.update(value=provider)
-# Function to validate MCP Server URL
-def validate_mcp_url(url):
-    if not url:
-        return gr.update(value="")
-    if not url.startswith(("http://", "https://")):
-        url = "https://" + url
-    # If gradio_api/mcp/sse is not in the URL, add it
-    if not url.endswith("/gradio_api/mcp/sse"):
-        if url.endswith("/"):
-            url = url + "gradio_api/mcp/sse"
-        else:
-            url = url + "/gradio_api/mcp/sse"
-    return gr.update(value=url)
 # GRADIO UI
 with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     # Create the chatbot component
     chatbot = gr.Chatbot(
         height=600,
         show_copy_button=True,
-        placeholder="Select a model and begin chatting. Now supports multiple inference providers, multimodal inputs, and MCP servers",
         layout="panel"
     )
     print("Chatbot interface created.")
@@ -423,7 +336,6 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         )
         # Featured models list
-        # Updated to include multimodal models
         models_list = [
             "meta-llama/Llama-3.2-11B-Vision-Instruct",
             "meta-llama/Llama-3.3-70B-Instruct",
@@ -458,40 +370,42 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         )
         gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
-        # Add MCP Server configuration
-        with gr.Accordion("MCP Server Settings", open=False):
-            gr.Markdown("""
-            # Model Context Protocol (MCP) Integration
-            Connect to MCP servers to add advanced capabilities like Text-to-Speech to your chat.
-            [Learn more about MCP](https://modelcontextprotocol.io/)
-            """)
-            mcp_server_url = gr.Textbox(
-                label="MCP Server URL",
-                placeholder="https://your-tts-server.hf.space/gradio_api/mcp/sse",
-                info="URL to an MCP-compatible server (e.g., Kokoro TTS)"
-            )
-            enable_tts = gr.Checkbox(
-                label="Enable Text-to-Speech",
-                value=False,
-                info="When enabled, AI responses will be read aloud using the MCP server"
-            )
-            # Example servers
-            gr.Markdown("""
-            ### Example MCP Servers
-            Try these MCP servers for additional capabilities:
-            - **Kokoro TTS**: `https://fdaudens-kokoro-mcp.hf.space/gradio_api/mcp/sse`
-            - **More examples coming soon**
-            To use, copy the URL above and paste it into the MCP Server URL field.
-            """)
     # Chat history state
     chat_history = gr.State([])
@@ -510,134 +424,99 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     # Function for the chat interface
     def user(user_message, history):
-        # Debug logging for troubleshooting
         print(f"User message received: {user_message}")
-        # Skip if message is empty (no text and no files)
         if not user_message or (not user_message.get("text") and not user_message.get("files")):
             print("Empty message, skipping")
-            return history
-        # Prepare multimodal message format
         text_content = user_message.get("text", "").strip()
         files = user_message.get("files", [])
         print(f"Text content: {text_content}")
         print(f"Files: {files}")
-        # If both text and files are empty, skip
-        if not text_content and not files:
             print("No content to display")
             return history
-        # Add message with images to history
-        if files and len(files) > 0:
-            # Add text message first if it exists
-            if text_content:
-                # Add a separate text message
-                print(f"Adding text message: {text_content}")
-                history.append([text_content, None])
-            # Then add each image file separately
             for file_path in files:
-                if file_path and isinstance(file_path, str):
                     print(f"Adding image: {file_path}")
-                    # Add image as a separate message with no text
-                    history.append([f"![Image]({file_path})", None])
-            return history
-        else:
-            # For text-only messages
-            print(f"Adding text-only message: {text_content}")
-            history.append([text_content, None])
-            return history
     # Define bot response function
-    def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model, mcp_url, tts_enabled):
-        # Check if history is valid
-        if not history or len(history) == 0:
-            print("No history to process")
-            return history
-        # Get the most recent message and detect if it's an image
-        user_message = history[-1][0]
-        print(f"Processing user message: {user_message}")
-        is_image = False
-        image_path = None
-        text_content = user_message
-        # Check if this is an image message (marked with ![Image])
-        if isinstance(user_message, str) and user_message.startswith("![Image]("):
-            is_image = True
-            # Extract image path from markdown format ![Image](path)
-            image_path = user_message.replace("![Image](", "").replace(")", "")
-            print(f"Image detected: {image_path}")
-            text_content = ""  # No text for image-only messages
-        # Look back for text context if this is an image
-        text_context = ""
-        if is_image and len(history) > 1:
-            # Use the previous message as context if it's text
-            prev_message = history[-2][0]
-            if isinstance(prev_message, str) and not prev_message.startswith("![Image]("):
-                text_context = prev_message
-                print(f"Using text context from previous message: {text_context}")
-        # Process message through respond function
-        history[-1][1] = ""
-        # Validate and format MCP server URL
-        if mcp_url:
-            mcp_url = validate_mcp_url(mcp_url)["value"]
-        # Use either the image or text for the API
-        if is_image:
-            # For image messages
-            for response in respond(
-                text_context,           # Text context from previous message if any
-                [image_path],           # Current image
-                history[:-1],           # Previous history
-                system_msg,
-                max_tokens,
-                temperature,
-                top_p,
-                freq_penalty,
-                seed,
-                provider,
-                api_key,
-                custom_model,
-                search_term,
-                selected_model,
-                mcp_url,
-                tts_enabled
-            ):
-                history[-1][1] = response
-                yield history
-        else:
-            # For text-only messages
-            for response in respond(
-                text_content,           # Text message
-                None,                   # No image
-                history[:-1],           # Previous history
-                system_msg,
-                max_tokens,
-                temperature,
-                top_p,
-                freq_penalty,
-                seed,
-                provider,
-                api_key,
-                custom_model,
-                search_term,
-                selected_model,
-                mcp_url,
-                tts_enabled
-            ):
-                history[-1][1] = response
-                yield history
-    # Event handlers - only using the MultimodalTextbox's built-in submit functionality
     msg.submit(
         user,
         [msg, chatbot],
@@ -647,7 +526,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         bot,
         [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
          frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
-         model_search_box, featured_model_radio, mcp_server_url, enable_tts],
         [chatbot]
     ).then(
         lambda: {"text": "", "files": []},  # Clear inputs after submission
@@ -655,7 +534,6 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         [msg]
     )
-    # Connect the model filter to update the radio choices
     model_search_box.change(
         fn=filter_models,
         inputs=model_search_box,
@@ -663,7 +541,6 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     )
     print("Model search box change event linked.")
-    # Connect the featured model radio to update the custom model box
     featured_model_radio.change(
         fn=set_custom_model_from_radio,
         inputs=featured_model_radio,
@@ -671,7 +548,6 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     )
     print("Featured model radio button change event linked.")
-    # Connect the BYOK textbox to validate provider selection
     byok_textbox.change(
         fn=validate_provider,
         inputs=[byok_textbox, provider_radio],
@@ -679,24 +555,15 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     )
     print("BYOK textbox change event linked.")
-    # Also validate provider when the radio changes to ensure consistency
     provider_radio.change(
         fn=validate_provider,
         inputs=[byok_textbox, provider_radio],
         outputs=provider_radio
     )
     print("Provider radio button change event linked.")
-    # Connect the MCP server URL field to validate URL
-    mcp_server_url.change(
-        fn=validate_mcp_url,
-        inputs=mcp_server_url,
-        outputs=mcp_server_url
-    )
-    print("MCP server URL change event linked.")
 print("Gradio interface initialized.")
 if __name__ == "__main__":
     print("Launching the demo application.")
-    demo.launch(show_api=True, mcp_server=True)  # Enable MCP server for this app too

 import os
 import json
 import base64
 from PIL import Image
 import io
         print("No image path provided")
         return None
+    try
         print(f"Encoding image from path: {image_path}")
         # If it's already a PIL Image
         print(f"Error encoding image: {e}")
         return None
 def respond(
     message,
+    image_files,  # Changed parameter name and structure
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
     custom_api_key,
     custom_model,
     model_search_term,
+    selected_model
 ):
     print(f"Received message: {message}")
     print(f"Received {len(image_files) if image_files else 0} images")
     print(f"Selected model (custom_model): {custom_model}")
     print(f"Model search term: {model_search_term}")
     print(f"Selected model from radio: {selected_model}")
     # Determine which token to use
     token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
         response += f"\nError: {str(e)}"
         yield response
     print("Completed response generation.")
 # Function to validate provider selection based on BYOK
         return gr.update(value="hf-inference")
     return gr.update(value=provider)
 # GRADIO UI
 with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
     # Create the chatbot component
     chatbot = gr.Chatbot(
         height=600,
         show_copy_button=True,
+        placeholder="Select a model and begin chatting. Now supports multiple inference providers and multimodal inputs",
         layout="panel"
     )
     print("Chatbot interface created.")
         )
         # Featured models list
         models_list = [
             "meta-llama/Llama-3.2-11B-Vision-Instruct",
             "meta-llama/Llama-3.3-70B-Instruct",
         )
         gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
+    # MCP Support Information Accordion
+    with gr.Accordion("MCP Support (for LLMs)", open=False):
+        gr.Markdown("""
+        ### Model Context Protocol (MCP) Support
+        This application can function as an MCP Server, allowing compatible AI models and agents (like Claude Desktop or custom MCP clients) to use its text and image generation capabilities as a tool.
+        When MCP is enabled, Gradio automatically exposes the relevant functions (likely based on the `bot` function in this app) as MCP tools.
+        **To connect an MCP client to this server:**
+        1. Ensure this Gradio application is running.
+        2. Use the following URL for the MCP server in your client configuration:
+           - If running locally: `http://127.0.0.1:7860/gradio_api/mcp/sse`
+           - If deployed on Hugging Face Spaces: `https://YOUR_USERNAME-YOUR_SPACENAME.hf.space/gradio_api/mcp/sse` (replace with your actual Space URL)
+        **Example MCP Client Configuration (`mcp.json` or similar):**
+        ```json
+        {
+          "mcpServers": {
+            "serverlessTextgenHub": {
+              "url": "http://127.0.0.1:7860/gradio_api/mcp/sse"
+            }
+          }
+        }
+        ```
+        **Tool Parameters:**
+        The exposed MCP tool will likely have parameters corresponding to the inputs of the `bot` function (e.g., `history`, `system_msg`, `max_tokens`, `temperature`, `model`, etc.).
+        *   **Important for `history` parameter:** For image inputs, the MCP client might need to format the `history` to include image references in a way the `bot` function can parse (e.g., markdown links `![Image](URL_or_base64_data_uri)` within the history's message part).
+        *   It's highly recommended to inspect the MCP schema for this server to understand the exact tool names, descriptions, and input/output schemas. You can usually find this at: `http://127.0.0.1:7860/gradio_api/mcp/schema` (or the equivalent URL for your deployed Space).
+        This allows for powerful integrations where an AI agent can programmatically request text or multimodal generations from this Serverless-TextGen-Hub.
+        """)
     # Chat history state
     chat_history = gr.State([])
     # Function for the chat interface
     def user(user_message, history):
         print(f"User message received: {user_message}")
         if not user_message or (not user_message.get("text") and not user_message.get("files")):
             print("Empty message, skipping")
+            return history # Return immediately if message is empty
         text_content = user_message.get("text", "").strip()
         files = user_message.get("files", [])
         print(f"Text content: {text_content}")
         print(f"Files: {files}")
+        if not text_content and not files: # Check again after stripping text
             print("No content to display")
             return history
+        # Append text message first if it exists and is not empty
+        if text_content:
+            print(f"Adding text message: {text_content}")
+            history.append([text_content, None])
+        # Then append each image file as a separate message
+        if files:
             for file_path in files:
+                if file_path and isinstance(file_path, str): # Ensure file_path is valid
                     print(f"Adding image: {file_path}")
+                    history.append([f"![Image]({file_path})", None]) # Image as a new message
+        return history
     # Define bot response function
+    def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
+        if not history or not history[-1][0]: # Check if history or last message is empty
+            print("No history or empty last message to process for bot")
+            # Yield an empty update or the history itself to avoid errors
+            # depending on how Gradio handles empty yields.
+            # For safety, just return the history if it's in a bad state.
+            yield history
+            return
+        user_message_content = history[-1][0] # This is the user's latest message (text or image markdown)
+        print(f"Bot processing user message content: {user_message_content}")
+        # Determine if the current turn is primarily about an image or text
+        # This logic assumes images are added as separate history entries like "![Image](path)"
+        # and text prompts might precede them or be separate.
+        current_message_text_for_api = ""
+        current_image_files_for_api = []
+        # Check if the last entry is an image
+        if isinstance(user_message_content, str) and user_message_content.startswith("![Image]("):
+            image_path = user_message_content.replace("![Image](", "").replace(")", "")
+            current_image_files_for_api.append(image_path)
+            print(f"Bot identified image in last history entry: {image_path}")
+            # If it's an image, check the second to last entry for a text prompt
+            if len(history) > 1:
+                prev_content = history[-2][0]
+                if isinstance(prev_content, str) and not prev_content.startswith("![Image]("):
+                    current_message_text_for_api = prev_content
+                    print(f"Bot identified preceding text for image: {current_message_text_for_api}")
+        else: # Last entry is text
+            current_message_text_for_api = user_message_content
+            print(f"Bot identified text in last history entry: {current_message_text_for_api}")
+        # The history sent to `respond` should not include the current turn's input,
+        # as `respond` will add `message` (current_message_text_for_api) to its internal `messages` list.
+        # If an image is present, it's passed via `image_files`.
+        history_for_respond_func = history[:-1] # Pass history *before* the current turn
+        history[-1][1] = "" # Initialize assistant's response for the current turn
+        for response_chunk in respond(
+            message=current_message_text_for_api,
+            image_files=current_image_files_for_api,
+            history=history_for_respond_func, # Pass prior history
+            system_message=system_msg,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            frequency_penalty=freq_penalty,
+            seed=seed,
+            provider=provider,
+            custom_api_key=api_key,
+            custom_model=custom_model,
+            model_search_term=search_term, # Though these two might not be directly used by respond if model is fixed
+            selected_model=selected_model
+        ):
+            history[-1][1] = response_chunk
+            yield history
+    # Event handlers
     msg.submit(
         user,
         [msg, chatbot],
         bot,
         [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
          frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
+         model_search_box, featured_model_radio],
         [chatbot]
     ).then(
         lambda: {"text": "", "files": []},  # Clear inputs after submission
         [msg]
     )
     model_search_box.change(
         fn=filter_models,
         inputs=model_search_box,
     )
     print("Model search box change event linked.")
     featured_model_radio.change(
         fn=set_custom_model_from_radio,
         inputs=featured_model_radio,
     )
     print("Featured model radio button change event linked.")
     byok_textbox.change(
         fn=validate_provider,
         inputs=[byok_textbox, provider_radio],
     )
     print("BYOK textbox change event linked.")
     provider_radio.change(
         fn=validate_provider,
         inputs=[byok_textbox, provider_radio],
         outputs=provider_radio
     )
     print("Provider radio button change event linked.")
 print("Gradio interface initialized.")
 if __name__ == "__main__":
     print("Launching the demo application.")
+    demo.launch(show_api=True, mcp_server=True) # MCP SERVER ENABLED HERE