Nymbo's picture
Update app.py
63903e4 verified
raw
history blame
22.5 kB
import gradio as gr
from huggingface_hub import InferenceClient
import os
import json
import base64
from PIL import Image
import io
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")
# Function to encode image to base64
def encode_image(image_path):
if not image_path:
print("No image path provided")
return None
try
print(f"Encoding image from path: {image_path}")
# If it's already a PIL Image
if isinstance(image_path, Image.Image):
image = image_path
else:
# Try to open the image file
image = Image.open(image_path)
# Convert to RGB if image has an alpha channel (RGBA)
if image.mode == 'RGBA':
image = image.convert('RGB')
# Encode to base64
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
print("Image encoded successfully")
return img_str
except Exception as e:
print(f"Error encoding image: {e}")
return None
def respond(
message,
image_files, # Changed parameter name and structure
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
frequency_penalty,
seed,
provider,
custom_api_key,
custom_model,
model_search_term,
selected_model
):
print(f"Received message: {message}")
print(f"Received {len(image_files) if image_files else 0} images")
print(f"History: {history}")
print(f"System message: {system_message}")
print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
print(f"Selected provider: {provider}")
print(f"Custom API Key provided: {bool(custom_api_key.strip())}")
print(f"Selected model (custom_model): {custom_model}")
print(f"Model search term: {model_search_term}")
print(f"Selected model from radio: {selected_model}")
# Determine which token to use
token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
if custom_api_key.strip() != "":
print("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication")
else:
print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication")
# Initialize the Inference Client with the provider and appropriate token
client = InferenceClient(token=token_to_use, provider=provider)
print(f"Hugging Face Inference Client initialized with {provider} provider.")
# Convert seed to None if -1 (meaning random)
if seed == -1:
seed = None
# Create multimodal content if images are present
if image_files and len(image_files) > 0:
# Process the user message to include images
user_content = []
# Add text part if there is any
if message and message.strip():
user_content.append({
"type": "text",
"text": message
})
# Add image parts
for img in image_files:
if img is not None:
# Get raw image data from path
try:
encoded_image = encode_image(img)
if encoded_image:
user_content.append({
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{encoded_image}"
}
})
except Exception as e:
print(f"Error encoding image: {e}")
else:
# Text-only message
user_content = message
# Prepare messages in the format expected by the API
messages = [{"role": "system", "content": system_message}]
print("Initial messages array constructed.")
# Add conversation history to the context
for val in history:
user_part = val[0]
assistant_part = val[1]
if user_part:
# Handle both text-only and multimodal messages in history
if isinstance(user_part, tuple) and len(user_part) == 2:
# This is a multimodal message with text and images
history_content = []
if user_part[0]: # Text
history_content.append({
"type": "text",
"text": user_part[0]
})
for img in user_part[1]: # Images
if img:
try:
encoded_img = encode_image(img)
if encoded_img:
history_content.append({
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{encoded_img}"
}
})
except Exception as e:
print(f"Error encoding history image: {e}")
messages.append({"role": "user", "content": history_content})
else:
# Regular text message
messages.append({"role": "user", "content": user_part})
print(f"Added user message to context (type: {type(user_part)})")
if assistant_part:
messages.append({"role": "assistant", "content": assistant_part})
print(f"Added assistant message to context: {assistant_part}")
# Append the latest user message
messages.append({"role": "user", "content": user_content})
print(f"Latest user message appended (content type: {type(user_content)})")
# Determine which model to use, prioritizing custom_model if provided
model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
print(f"Model selected for inference: {model_to_use}")
# Start with an empty string to build the response as tokens stream in
response = ""
print(f"Sending request to {provider} provider.")
# Prepare parameters for the chat completion request
parameters = {
"max_tokens": max_tokens,
"temperature": temperature,
"top_p": top_p,
"frequency_penalty": frequency_penalty,
}
if seed is not None:
parameters["seed"] = seed
# Use the InferenceClient for making the request
try:
# Create a generator for the streaming response
stream = client.chat_completion(
model=model_to_use,
messages=messages,
stream=True,
**parameters
)
print("Received tokens: ", end="", flush=True)
# Process the streaming response
for chunk in stream:
if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
# Extract the content from the response
if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
token_text = chunk.choices[0].delta.content
if token_text:
print(token_text, end="", flush=True)
response += token_text
yield response
print()
except Exception as e:
print(f"Error during inference: {e}")
response += f"\nError: {str(e)}"
yield response
print("Completed response generation.")
# Function to validate provider selection based on BYOK
def validate_provider(api_key, provider):
if not api_key.strip() and provider != "hf-inference":
return gr.update(value="hf-inference")
return gr.update(value=provider)
# GRADIO UI
with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
# Create the chatbot component
chatbot = gr.Chatbot(
height=600,
show_copy_button=True,
placeholder="Select a model and begin chatting. Now supports multiple inference providers and multimodal inputs",
layout="panel"
)
print("Chatbot interface created.")
# Multimodal textbox for messages (combines text and file uploads)
msg = gr.MultimodalTextbox(
placeholder="Type a message or upload images...",
show_label=False,
container=False,
scale=12,
file_types=["image"],
file_count="multiple",
sources=["upload"]
)
# Create accordion for settings
with gr.Accordion("Settings", open=False):
# System message
system_message_box = gr.Textbox(
value="You are a helpful AI assistant that can understand images and text.",
placeholder="You are a helpful assistant.",
label="System Prompt"
)
# Generation parameters
with gr.Row():
with gr.Column():
max_tokens_slider = gr.Slider(
minimum=1,
maximum=4096,
value=512,
step=1,
label="Max tokens"
)
temperature_slider = gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
)
top_p_slider = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P"
)
with gr.Column():
frequency_penalty_slider = gr.Slider(
minimum=-2.0,
maximum=2.0,
value=0.0,
step=0.1,
label="Frequency Penalty"
)
seed_slider = gr.Slider(
minimum=-1,
maximum=65535,
value=-1,
step=1,
label="Seed (-1 for random)"
)
# Provider selection
providers_list = [
"hf-inference", # Default Hugging Face Inference
"cerebras", # Cerebras provider
"together", # Together AI
"sambanova", # SambaNova
"novita", # Novita AI
"cohere", # Cohere
"fireworks-ai", # Fireworks AI
"hyperbolic", # Hyperbolic
"nebius", # Nebius
]
provider_radio = gr.Radio(
choices=providers_list,
value="hf-inference",
label="Inference Provider",
)
# New BYOK textbox
byok_textbox = gr.Textbox(
value="",
label="BYOK (Bring Your Own Key)",
info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
placeholder="Enter your Hugging Face API token",
type="password" # Hide the API key for security
)
# Custom model box
custom_model_box = gr.Textbox(
value="",
label="Custom Model",
info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
placeholder="meta-llama/Llama-3.3-70B-Instruct"
)
# Model search
model_search_box = gr.Textbox(
label="Filter Models",
placeholder="Search for a featured model...",
lines=1
)
# Featured models list
models_list = [
"meta-llama/Llama-3.2-11B-Vision-Instruct",
"meta-llama/Llama-3.3-70B-Instruct",
"meta-llama/Llama-3.1-70B-Instruct",
"meta-llama/Llama-3.0-70B-Instruct",
"meta-llama/Llama-3.2-3B-Instruct",
"meta-llama/Llama-3.2-1B-Instruct",
"meta-llama/Llama-3.1-8B-Instruct",
"NousResearch/Hermes-3-Llama-3.1-8B",
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
"mistralai/Mistral-Nemo-Instruct-2407",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"mistralai/Mistral-7B-Instruct-v0.3",
"mistralai/Mistral-7B-Instruct-v0.2",
"Qwen/Qwen3-235B-A22B",
"Qwen/Qwen3-32B",
"Qwen/Qwen2.5-72B-Instruct",
"Qwen/Qwen2.5-3B-Instruct",
"Qwen/Qwen2.5-0.5B-Instruct",
"Qwen/QwQ-32B",
"Qwen/Qwen2.5-Coder-32B-Instruct",
"microsoft/Phi-3.5-mini-instruct",
"microsoft/Phi-3-mini-128k-instruct",
"microsoft/Phi-3-mini-4k-instruct",
]
featured_model_radio = gr.Radio(
label="Select a model below",
choices=models_list,
value="meta-llama/Llama-3.2-11B-Vision-Instruct", # Default to a multimodal model
interactive=True
)
gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
# MCP Support Information Accordion
with gr.Accordion("MCP Support (for LLMs)", open=False):
gr.Markdown("""
### Model Context Protocol (MCP) Support
This application can function as an MCP Server, allowing compatible AI models and agents (like Claude Desktop or custom MCP clients) to use its text and image generation capabilities as a tool.
When MCP is enabled, Gradio automatically exposes the relevant functions (likely based on the `bot` function in this app) as MCP tools.
**To connect an MCP client to this server:**
1. Ensure this Gradio application is running.
2. Use the following URL for the MCP server in your client configuration:
- If running locally: `http://127.0.0.1:7860/gradio_api/mcp/sse`
- If deployed on Hugging Face Spaces: `https://YOUR_USERNAME-YOUR_SPACENAME.hf.space/gradio_api/mcp/sse` (replace with your actual Space URL)
**Example MCP Client Configuration (`mcp.json` or similar):**
```json
{
"mcpServers": {
"serverlessTextgenHub": {
"url": "http://127.0.0.1:7860/gradio_api/mcp/sse"
}
}
}
```
**Tool Parameters:**
The exposed MCP tool will likely have parameters corresponding to the inputs of the `bot` function (e.g., `history`, `system_msg`, `max_tokens`, `temperature`, `model`, etc.).
* **Important for `history` parameter:** For image inputs, the MCP client might need to format the `history` to include image references in a way the `bot` function can parse (e.g., markdown links `![Image](URL_or_base64_data_uri)` within the history's message part).
* It's highly recommended to inspect the MCP schema for this server to understand the exact tool names, descriptions, and input/output schemas. You can usually find this at: `http://127.0.0.1:7860/gradio_api/mcp/schema` (or the equivalent URL for your deployed Space).
This allows for powerful integrations where an AI agent can programmatically request text or multimodal generations from this Serverless-TextGen-Hub.
""")
# Chat history state
chat_history = gr.State([])
# Function to filter models
def filter_models(search_term):
print(f"Filtering models with search term: {search_term}")
filtered = [m for m in models_list if search_term.lower() in m.lower()]
print(f"Filtered models: {filtered}")
return gr.update(choices=filtered)
# Function to set custom model from radio
def set_custom_model_from_radio(selected):
print(f"Featured model selected: {selected}")
return selected
# Function for the chat interface
def user(user_message, history):
print(f"User message received: {user_message}")
if not user_message or (not user_message.get("text") and not user_message.get("files")):
print("Empty message, skipping")
return history # Return immediately if message is empty
text_content = user_message.get("text", "").strip()
files = user_message.get("files", [])
print(f"Text content: {text_content}")
print(f"Files: {files}")
if not text_content and not files: # Check again after stripping text
print("No content to display")
return history
# Append text message first if it exists and is not empty
if text_content:
print(f"Adding text message: {text_content}")
history.append([text_content, None])
# Then append each image file as a separate message
if files:
for file_path in files:
if file_path and isinstance(file_path, str): # Ensure file_path is valid
print(f"Adding image: {file_path}")
history.append([f"![Image]({file_path})", None]) # Image as a new message
return history
# Define bot response function
def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
if not history or not history[-1][0]: # Check if history or last message is empty
print("No history or empty last message to process for bot")
# Yield an empty update or the history itself to avoid errors
# depending on how Gradio handles empty yields.
# For safety, just return the history if it's in a bad state.
yield history
return
user_message_content = history[-1][0] # This is the user's latest message (text or image markdown)
print(f"Bot processing user message content: {user_message_content}")
# Determine if the current turn is primarily about an image or text
# This logic assumes images are added as separate history entries like "![Image](path)"
# and text prompts might precede them or be separate.
current_message_text_for_api = ""
current_image_files_for_api = []
# Check if the last entry is an image
if isinstance(user_message_content, str) and user_message_content.startswith("![Image]("):
image_path = user_message_content.replace("![Image](", "").replace(")", "")
current_image_files_for_api.append(image_path)
print(f"Bot identified image in last history entry: {image_path}")
# If it's an image, check the second to last entry for a text prompt
if len(history) > 1:
prev_content = history[-2][0]
if isinstance(prev_content, str) and not prev_content.startswith("![Image]("):
current_message_text_for_api = prev_content
print(f"Bot identified preceding text for image: {current_message_text_for_api}")
else: # Last entry is text
current_message_text_for_api = user_message_content
print(f"Bot identified text in last history entry: {current_message_text_for_api}")
# The history sent to `respond` should not include the current turn's input,
# as `respond` will add `message` (current_message_text_for_api) to its internal `messages` list.
# If an image is present, it's passed via `image_files`.
history_for_respond_func = history[:-1] # Pass history *before* the current turn
history[-1][1] = "" # Initialize assistant's response for the current turn
for response_chunk in respond(
message=current_message_text_for_api,
image_files=current_image_files_for_api,
history=history_for_respond_func, # Pass prior history
system_message=system_msg,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
frequency_penalty=freq_penalty,
seed=seed,
provider=provider,
custom_api_key=api_key,
custom_model=custom_model,
model_search_term=search_term, # Though these two might not be directly used by respond if model is fixed
selected_model=selected_model
):
history[-1][1] = response_chunk
yield history
# Event handlers
msg.submit(
user,
[msg, chatbot],
[chatbot],
queue=False
).then(
bot,
[chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
model_search_box, featured_model_radio],
[chatbot]
).then(
lambda: {"text": "", "files": []}, # Clear inputs after submission
None,
[msg]
)
model_search_box.change(
fn=filter_models,
inputs=model_search_box,
outputs=featured_model_radio
)
print("Model search box change event linked.")
featured_model_radio.change(
fn=set_custom_model_from_radio,
inputs=featured_model_radio,
outputs=custom_model_box
)
print("Featured model radio button change event linked.")
byok_textbox.change(
fn=validate_provider,
inputs=[byok_textbox, provider_radio],
outputs=provider_radio
)
print("BYOK textbox change event linked.")
provider_radio.change(
fn=validate_provider,
inputs=[byok_textbox, provider_radio],
outputs=provider_radio
)
print("Provider radio button change event linked.")
print("Gradio interface initialized.")
if __name__ == "__main__":
print("Launching the demo application.")
demo.launch(show_api=True, mcp_server=True) # MCP SERVER ENABLED HERE