Nymbo commited on
Commit
8f939dc
·
verified ·
1 Parent(s): 57cb471

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +326 -100
app.py CHANGED
@@ -5,13 +5,7 @@ import json
5
  import base64
6
  from PIL import Image
7
  import io
8
- from smolagents.mcp_client import MCPClient
9
 
10
- # Global variables for MCP Client and TTS tool
11
- mcp_client = None
12
- tts_tool = None
13
-
14
- # Access token from environment
15
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
16
  print("Access token loaded.")
17
 
@@ -23,14 +17,19 @@ def encode_image(image_path):
23
 
24
  try:
25
  print(f"Encoding image from path: {image_path}")
 
 
26
  if isinstance(image_path, Image.Image):
27
  image = image_path
28
  else:
 
29
  image = Image.open(image_path)
30
 
 
31
  if image.mode == 'RGBA':
32
  image = image.convert('RGB')
33
 
 
34
  buffered = io.BytesIO()
35
  image.save(buffered, format="JPEG")
36
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
@@ -40,19 +39,69 @@ def encode_image(image_path):
40
  print(f"Error encoding image: {e}")
41
  return None
42
 
43
- # Initialize MCP Client at startup
44
- def init_mcp_client():
45
- global mcp_client, tts_tool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  try:
47
- mcp_client = MCPClient({"url": "https://fdaudens-kokoro-mcp.hf.space/gradio_api/mcp/sse"})
48
- tools = mcp_client.get_tools()
49
- tts_tool = next((tool for tool in tools if tool.name == "text_to_audio"), None)
50
- if tts_tool:
51
- print("Successfully connected to Kokoro TTS tool")
 
 
 
 
 
 
52
  else:
53
- print("TTS tool not found")
54
  except Exception as e:
55
- print(f"Error initializing MCP Client: {e}")
 
56
 
57
  def respond(
58
  message,
@@ -82,6 +131,7 @@ def respond(
82
  print(f"Model search term: {model_search_term}")
83
  print(f"Selected model from radio: {selected_model}")
84
 
 
85
  token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
86
 
87
  if custom_api_key.strip() != "":
@@ -89,57 +139,81 @@ def respond(
89
  else:
90
  print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication")
91
 
 
92
  client = InferenceClient(token=token_to_use, provider=provider)
93
  print(f"Hugging Face Inference Client initialized with {provider} provider.")
94
 
 
95
  if seed == -1:
96
  seed = None
97
 
 
98
  if image_files and len(image_files) > 0:
 
99
  user_content = []
 
 
100
  if message and message.strip():
101
- user_content.append({"type": "text", "text": message})
 
 
 
102
 
 
103
  for img in image_files:
104
  if img is not None:
 
105
  try:
106
  encoded_image = encode_image(img)
107
  if encoded_image:
108
  user_content.append({
109
  "type": "image_url",
110
- "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}
 
 
111
  })
112
  except Exception as e:
113
  print(f"Error encoding image: {e}")
114
  else:
 
115
  user_content = message
116
 
 
117
  messages = [{"role": "system", "content": system_message}]
118
  print("Initial messages array constructed.")
119
 
 
120
  for val in history:
121
  user_part = val[0]
122
  assistant_part = val[1]
123
  if user_part:
 
124
  if isinstance(user_part, tuple) and len(user_part) == 2:
 
125
  history_content = []
126
- if user_part[0]:
127
- history_content.append({"type": "text", "text": user_part[0]})
 
 
 
128
 
129
- for img in user_part[1]:
130
  if img:
131
  try:
132
  encoded_img = encode_image(img)
133
  if encoded_img:
134
  history_content.append({
135
  "type": "image_url",
136
- "image_url": {"url": f"data:image/jpeg;base64,{encoded_img}"}
 
 
137
  })
138
  except Exception as e:
139
  print(f"Error encoding history image: {e}")
140
 
141
  messages.append({"role": "user", "content": history_content})
142
  else:
 
143
  messages.append({"role": "user", "content": user_part})
144
  print(f"Added user message to context (type: {type(user_part)})")
145
 
@@ -147,15 +221,19 @@ def respond(
147
  messages.append({"role": "assistant", "content": assistant_part})
148
  print(f"Added assistant message to context: {assistant_part}")
149
 
 
150
  messages.append({"role": "user", "content": user_content})
151
  print(f"Latest user message appended (content type: {type(user_content)})")
152
 
 
153
  model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
154
  print(f"Model selected for inference: {model_to_use}")
155
 
 
156
  response = ""
157
  print(f"Sending request to {provider} provider.")
158
 
 
159
  parameters = {
160
  "max_tokens": max_tokens,
161
  "temperature": temperature,
@@ -166,7 +244,9 @@ def respond(
166
  if seed is not None:
167
  parameters["seed"] = seed
168
 
 
169
  try:
 
170
  stream = client.chat_completion(
171
  model=model_to_use,
172
  messages=messages,
@@ -176,8 +256,10 @@ def respond(
176
 
177
  print("Received tokens: ", end="", flush=True)
178
 
 
179
  for chunk in stream:
180
  if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
 
181
  if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
182
  token_text = chunk.choices[0].delta.content
183
  if token_text:
@@ -193,40 +275,16 @@ def respond(
193
 
194
  print("Completed response generation.")
195
 
196
- # Function to generate audio from the last bot response
197
- def generate_audio(history):
198
- if not history or len(history) == 0:
199
- print("No history available for audio generation")
200
- return None
201
- last_message = history[-1][1] # Bot's response
202
- if not last_message or not isinstance(last_message, str):
203
- print("Last message is empty or not a string")
204
- return None
205
- if tts_tool:
206
- try:
207
- # Call the TTS tool directly, expecting (sample_rate, audio_array)
208
- result = tts_tool(text=last_message, speed=1.0)
209
- if result and len(result) == 2:
210
- sample_rate, audio_data = result
211
- print("Audio generated successfully")
212
- return (sample_rate, audio_data)
213
- else:
214
- print("TTS tool returned invalid result")
215
- return None
216
- except Exception as e:
217
- print(f"Error generating audio: {e}")
218
- return None
219
- else:
220
- print("TTS tool not available")
221
- return None
222
-
223
  def validate_provider(api_key, provider):
224
  if not api_key.strip() and provider != "hf-inference":
225
  return gr.update(value="hf-inference")
226
  return gr.update(value=provider)
227
 
228
- # Gradio UI
229
- with gr.Blocks(theme="Nymbo/Nymbo_Theme") chatbot = gr.Chatbot(
 
 
230
  height=600,
231
  show_copy_button=True,
232
  placeholder="Select a model and begin chatting. Now supports multiple inference providers and multimodal inputs",
@@ -234,6 +292,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") chatbot = gr.Chatbot(
234
  )
235
  print("Chatbot interface created.")
236
 
 
237
  msg = gr.MultimodalTextbox(
238
  placeholder="Type a message or upload images...",
239
  show_label=False,
@@ -243,83 +302,207 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") chatbot = gr.Chatbot(
243
  file_count="multiple",
244
  sources=["upload"]
245
  )
 
 
246
 
247
- # Audio generation components
248
- with gr.Row():
249
- generate_audio_btn = gr.Button("Generate Audio from Last Response")
250
- audio_output = gr.Audio(label="Generated Audio", type="numpy")
251
-
252
  with gr.Accordion("Settings", open=False):
 
253
  system_message_box = gr.Textbox(
254
  value="You are a helpful AI assistant that can understand images and text.",
255
  placeholder="You are a helpful assistant.",
256
  label="System Prompt"
257
  )
258
 
 
259
  with gr.Row():
260
  with gr.Column():
261
- max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max tokens")
262
- temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
263
- top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  with gr.Column():
265
- frequency_penalty_slider = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
266
- seed_slider = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
 
268
  providers_list = [
269
- "hf-inference", "cerebras", "together", "sambanova", "novita", "cohere", "fireworks-ai", "hyperbolic", "nebius"
 
 
 
 
 
 
 
 
270
  ]
271
 
272
- provider_radio = gr.Radio(choices=providers_list, value="hf-inference", label="Inference Provider")
273
- byok_textbox = gr.Textbox(value="", label="BYOK (Bring Your Own Key)", info="Enter a custom Hugging Face API key here.", placeholder="Enter your Hugging Face API token", type="password")
274
- custom_model_box = gr.Textbox(value="", label="Custom Model", info="(Optional) Provide a custom Hugging Face model path.", placeholder="meta-llama/Llama-3.3-70B-Instruct")
275
- model_search_box = gr.Textbox(label="Filter Models", placeholder="Search for a featured model...", lines=1)
 
 
 
 
 
 
 
 
 
 
276
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  models_list = [
278
- "meta-llama/Llama-3.2-11B-Vision-Instruct", "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.1-70B-Instruct",
279
- "meta-llama/Llama-3.0-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.2-1B-Instruct",
280
- "meta-llama/Llama-3.1-8B-Instruct", "NousResearch/Hermes-3-Llama-3.1-8B", "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
281
- "mistralai/Mistral-Nemo-Instruct-2407", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3",
282
- "mistralai/Mistral-7B-Instruct-v0.2", "Qwen/Qwen3-235B-A22B", "Qwen/Qwen3-32B", "Qwen/Qwen2.5-72B-Instruct",
283
- "Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-0.5B-Instruct", "Qwen/QwQ-32B", "Qwen/Qwen2.5-Coder-32B-Instruct",
284
- "microsoft/Phi-3.5-mini-instruct", "microsoft/Phi-3-mini-128k-instruct", "microsoft/Phi-3-mini-4k-instruct"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  ]
286
 
287
- featured_model_radio = gr.Radio(label="Select a model below", choices=models_list, value="meta-llama/Llama-3.2-11B-Vision-Instruct", interactive=True)
 
 
 
 
 
 
288
  gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  chat_history = gr.State([])
291
 
 
292
  def filter_models(search_term):
293
  print(f"Filtering models with search term: {search_term}")
294
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
295
  print(f"Filtered models: {filtered}")
296
  return gr.update(choices=filtered)
297
 
 
298
  def set_custom_model_from_radio(selected):
299
  print(f"Featured model selected: {selected}")
300
  return selected
301
 
 
302
  def user(user_message, history):
303
  print(f"User message received: {user_message}")
 
 
304
  if not user_message or (not user_message.get("text") and not user_message.get("files")):
305
  print("Empty message, skipping")
306
  return history
307
 
 
308
  text_content = user_message.get("text", "").strip()
309
  files = user_message.get("files", [])
310
 
311
  print(f"Text content: {text_content}")
312
  print(f"Files: {files}")
313
 
 
314
  if not text_content and not files:
315
  print("No content to display")
316
  return history
317
 
 
318
  if files and len(files) > 0:
 
319
  if text_content:
320
  print(f"Adding text message: {text_content}")
321
  history.append([text_content, None])
322
 
 
323
  for file_path in files:
324
  if file_path and isinstance(file_path, str):
325
  print(f"Adding image: {file_path}")
@@ -327,10 +510,12 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") chatbot = gr.Chatbot(
327
 
328
  return history
329
  else:
 
330
  print(f"Adding text-only message: {text_content}")
331
  history.append([text_content, None])
332
  return history
333
 
 
334
  def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
335
  if not history or len(history) == 0:
336
  print("No history to process")
@@ -360,50 +545,91 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") chatbot = gr.Chatbot(
360
 
361
  if is_image:
362
  for response in respond(
363
- text_context, [image_path], history[:-1], system_msg, max_tokens, temperature, top_p,
364
- freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model
 
 
 
 
 
 
 
 
 
 
 
 
365
  ):
366
  history[-1][1] = response
367
  yield history
368
  else:
369
  for response in respond(
370
- text_content, None, history[:-1], system_msg, max_tokens, temperature, top_p,
371
- freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model
 
 
 
 
 
 
 
 
 
 
 
 
372
  ):
373
  history[-1][1] = response
374
  yield history
375
 
376
- msg.submit(user, [msg, chatbot], [chatbot], queue=False).then(
377
- bot, [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
378
- frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
379
- model_search_box, featured_model_radio], [chatbot]
380
- ).then(lambda: {"text": "", "files": []}, None, [msg])
 
 
 
 
 
 
 
 
 
 
 
 
381
 
382
- model_search_box.change(fn=filter_models, inputs=model_search_box, outputs=featured_model_radio)
 
 
 
 
383
  print("Model search box change event linked.")
384
 
385
- featured_model_radio.change(fn=set_custom_model_from_radio, inputs=featured_model_radio, outputs=custom_model_box)
 
 
 
 
386
  print("Featured model radio button change event linked.")
387
-
388
- byok_textbox.change(fn=validate_provider, inputs=[byok_textbox, provider_radio], outputs=provider_radio)
 
 
 
 
389
  print("BYOK textbox change event linked.")
390
 
391
- provider_radio.change(fn=validate_provider, inputs=[byok_textbox, provider_radio], outputs=provider_radio)
 
 
 
 
392
  print("Provider radio button change event linked.")
393
 
394
- # Event handler for audio generation
395
- generate_audio_btn.click(fn=generate_audio, inputs=[chatbot], outputs=[audio_output])
396
-
397
- # Initialize MCP Client on app load
398
- demo.load(init_mcp_client)
399
-
400
  print("Gradio interface initialized.")
401
 
402
  if __name__ == "__main__":
403
  print("Launching the demo application.")
404
- try:
405
- demo.launch(server_api=True)
406
- finally:
407
- if mcp_client:
408
- mcp_client.close()
409
- print("MCP Client closed.")
 
5
  import base64
6
  from PIL import Image
7
  import io
 
8
 
 
 
 
 
 
9
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
10
  print("Access token loaded.")
11
 
 
17
 
18
  try:
19
  print(f"Encoding image from path: {image_path}")
20
+
21
+ # If it's already a PIL Image
22
  if isinstance(image_path, Image.Image):
23
  image = image_path
24
  else:
25
+ # Try to open the image file
26
  image = Image.open(image_path)
27
 
28
+ # Convert to RGB if image has an alpha channel (RGBA)
29
  if image.mode == 'RGBA':
30
  image = image.convert('RGB')
31
 
32
+ # Encode to base64
33
  buffered = io.BytesIO()
34
  image.save(buffered, format="JPEG")
35
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
 
39
  print(f"Error encoding image: {e}")
40
  return None
41
 
42
+ def text_generation(
43
+ message: str,
44
+ system_message: str = "You are a helpful AI assistant.",
45
+ max_tokens: int = 512,
46
+ temperature: float = 0.7,
47
+ top_p: float = 0.95,
48
+ frequency_penalty: float = 0.0,
49
+ provider: str = "hf-inference",
50
+ model: str = "meta-llama/Llama-3.2-11B-Vision-Instruct"
51
+ ) -> str:
52
+ """
53
+ Generate text based on the input message using the specified model and provider.
54
+
55
+ Args:
56
+ message (str): The input text prompt.
57
+ system_message (str): The system prompt to guide the AI's behavior.
58
+ max_tokens (int): Maximum number of tokens to generate.
59
+ temperature (float): Sampling temperature for randomness.
60
+ top_p (float): Top-p sampling parameter.
61
+ frequency_penalty (float): Penalty for frequent tokens.
62
+ provider (str): Inference provider (e.g., 'hf-inference').
63
+ model (str): Model identifier (e.g., 'meta-llama/Llama-3.2-11B-Vision-Instruct').
64
+
65
+ Returns:
66
+ str: The generated text response.
67
+ """
68
+ print(f"Text generation called with message: {message}")
69
+
70
+ # Initialize the Inference Client
71
+ client = InferenceClient(token=ACCESS_TOKEN, provider=provider)
72
+ print(f"Inference Client initialized with {provider} provider.")
73
+
74
+ # Prepare messages
75
+ messages = [
76
+ {"role": "system", "content": system_message},
77
+ {"role": "user", "content": message}
78
+ ]
79
+
80
+ # Prepare parameters
81
+ parameters = {
82
+ "max_tokens": max_tokens,
83
+ "temperature": temperature,
84
+ "top_p": top_p,
85
+ "frequency_penalty": frequency_penalty,
86
+ }
87
+
88
  try:
89
+ # Perform chat completion (non-streaming for MCP simplicity)
90
+ response = client.chat_completion(
91
+ model=model,
92
+ messages=messages,
93
+ stream=False,
94
+ **parameters
95
+ )
96
+ if hasattr(response, 'choices') and len(response.choices) > 0:
97
+ generated_text = response.choices[0].message.content
98
+ print(f"Generated text: {generated_text}")
99
+ return generated_text
100
  else:
101
+ raise ValueError("No valid response received from the model.")
102
  except Exception as e:
103
+ print(f"Error during text generation: {e}")
104
+ return f"Error: {str(e)}"
105
 
106
  def respond(
107
  message,
 
131
  print(f"Model search term: {model_search_term}")
132
  print(f"Selected model from radio: {selected_model}")
133
 
134
+ # Determine which token to use
135
  token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
136
 
137
  if custom_api_key.strip() != "":
 
139
  else:
140
  print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication")
141
 
142
+ # Initialize the Inference Client with the provider and appropriate token
143
  client = InferenceClient(token=token_to_use, provider=provider)
144
  print(f"Hugging Face Inference Client initialized with {provider} provider.")
145
 
146
+ # Convert seed to None if -1 (meaning random)
147
  if seed == -1:
148
  seed = None
149
 
150
+ # Create multimodal content if images are present
151
  if image_files and len(image_files) > 0:
152
+ # Process the user message to include images
153
  user_content = []
154
+
155
+ # Add text part if there is any
156
  if message and message.strip():
157
+ user_content.append({
158
+ "type": "text",
159
+ "text": message
160
+ })
161
 
162
+ # Add image parts
163
  for img in image_files:
164
  if img is not None:
165
+ # Get raw image data from path
166
  try:
167
  encoded_image = encode_image(img)
168
  if encoded_image:
169
  user_content.append({
170
  "type": "image_url",
171
+ "image_url": {
172
+ "url": f"data:image/jpeg;base64,{encoded_image}"
173
+ }
174
  })
175
  except Exception as e:
176
  print(f"Error encoding image: {e}")
177
  else:
178
+ # Text-only message
179
  user_content = message
180
 
181
+ # Prepare messages in the format expected by the API
182
  messages = [{"role": "system", "content": system_message}]
183
  print("Initial messages array constructed.")
184
 
185
+ # Add conversation history to the context
186
  for val in history:
187
  user_part = val[0]
188
  assistant_part = val[1]
189
  if user_part:
190
+ # Handle both text-only and multimodal messages in history
191
  if isinstance(user_part, tuple) and len(user_part) == 2:
192
+ # This is a multimodal message with text and images
193
  history_content = []
194
+ if user_part[0]: # Text
195
+ history_content.append({
196
+ "type": "text",
197
+ "text": user_part[0]
198
+ })
199
 
200
+ for img in user_part[1]: # Images
201
  if img:
202
  try:
203
  encoded_img = encode_image(img)
204
  if encoded_img:
205
  history_content.append({
206
  "type": "image_url",
207
+ "image_url": {
208
+ "url": f"data:image/jpeg;base64,{encoded_img}"
209
+ }
210
  })
211
  except Exception as e:
212
  print(f"Error encoding history image: {e}")
213
 
214
  messages.append({"role": "user", "content": history_content})
215
  else:
216
+ # Regular text message
217
  messages.append({"role": "user", "content": user_part})
218
  print(f"Added user message to context (type: {type(user_part)})")
219
 
 
221
  messages.append({"role": "assistant", "content": assistant_part})
222
  print(f"Added assistant message to context: {assistant_part}")
223
 
224
+ # Append the latest user message
225
  messages.append({"role": "user", "content": user_content})
226
  print(f"Latest user message appended (content type: {type(user_content)})")
227
 
228
+ # Determine which model to use, prioritizing custom_model if provided
229
  model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
230
  print(f"Model selected for inference: {model_to_use}")
231
 
232
+ # Start with an empty string to build the response as tokens stream in
233
  response = ""
234
  print(f"Sending request to {provider} provider.")
235
 
236
+ # Prepare parameters for the chat completion request
237
  parameters = {
238
  "max_tokens": max_tokens,
239
  "temperature": temperature,
 
244
  if seed is not None:
245
  parameters["seed"] = seed
246
 
247
+ # Use the InferenceClient for making the request
248
  try:
249
+ # Create a generator for the streaming response
250
  stream = client.chat_completion(
251
  model=model_to_use,
252
  messages=messages,
 
256
 
257
  print("Received tokens: ", end="", flush=True)
258
 
259
+ # Process the streaming response
260
  for chunk in stream:
261
  if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
262
+ # Extract the content from the response
263
  if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
264
  token_text = chunk.choices[0].delta.content
265
  if token_text:
 
275
 
276
  print("Completed response generation.")
277
 
278
+ # Function to validate provider selection based on BYOK
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  def validate_provider(api_key, provider):
280
  if not api_key.strip() and provider != "hf-inference":
281
  return gr.update(value="hf-inference")
282
  return gr.update(value=provider)
283
 
284
+ # GRADIO UI
285
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
286
+ # Create the chatbot component
287
+ chatbot = gr.Chatbot(
288
  height=600,
289
  show_copy_button=True,
290
  placeholder="Select a model and begin chatting. Now supports multiple inference providers and multimodal inputs",
 
292
  )
293
  print("Chatbot interface created.")
294
 
295
+ # Multimodal textbox for messages (combines text and file uploads)
296
  msg = gr.MultimodalTextbox(
297
  placeholder="Type a message or upload images...",
298
  show_label=False,
 
302
  file_count="multiple",
303
  sources=["upload"]
304
  )
305
+
306
+ # Note: We're removing the separate submit button since MultimodalTextbox has its own
307
 
308
+ # Create accordion for settings
 
 
 
 
309
  with gr.Accordion("Settings", open=False):
310
+ # System message
311
  system_message_box = gr.Textbox(
312
  value="You are a helpful AI assistant that can understand images and text.",
313
  placeholder="You are a helpful assistant.",
314
  label="System Prompt"
315
  )
316
 
317
+ # Generation parameters
318
  with gr.Row():
319
  with gr.Column():
320
+ max_tokens_slider = gr.Slider(
321
+ minimum=1,
322
+ maximum=4096,
323
+ value=512,
324
+ step=1,
325
+ label="Max tokens"
326
+ )
327
+
328
+ temperature_slider = gr.Slider(
329
+ minimum=0.1,
330
+ maximum=4.0,
331
+ value=0.7,
332
+ step=0.1,
333
+ label="Temperature"
334
+ )
335
+
336
+ top_p_slider = gr.Slider(
337
+ minimum=0.1,
338
+ maximum=1.0,
339
+ value=0.95,
340
+ step=0.05,
341
+ label="Top-P"
342
+ )
343
+
344
  with gr.Column():
345
+ frequency_penalty_slider = gr.Slider(
346
+ minimum=-2.0,
347
+ maximum=2.0,
348
+ value=0.0,
349
+ step=0.1,
350
+ label="Frequency Penalty"
351
+ )
352
+
353
+ seed_slider = gr.Slider(
354
+ minimum=-1,
355
+ maximum=65535,
356
+ value=-1,
357
+ step=1,
358
+ label="Seed (-1 for random)"
359
+ )
360
 
361
+ # Provider selection
362
  providers_list = [
363
+ "hf-inference", # Default Hugging Face Inference
364
+ "cerebras", # Cerebras provider
365
+ "together", # Together AI
366
+ "sambanova", # SambaNova
367
+ "novita", # Novita AI
368
+ "cohere", # Cohere
369
+ "fireworks-ai", # Fireworks AI
370
+ "hyperbolic", # Hyperbolic
371
+ "nebius", # Nebius
372
  ]
373
 
374
+ provider_radio = gr.Radio(
375
+ choices=providers_list,
376
+ value="hf-inference",
377
+ label="Inference Provider",
378
+ )
379
+
380
+ # New BYOK textbox
381
+ byok_textbox = gr.Textbox(
382
+ value="",
383
+ label="BYOK (Bring Your Own Key)",
384
+ info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
385
+ placeholder="Enter your Hugging Face API token",
386
+ type="password" # Hide the API key for security
387
+ )
388
 
389
+ # Custom model box
390
+ custom_model_box = gr.Textbox(
391
+ value="",
392
+ label="Custom Model",
393
+ info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
394
+ placeholder="meta-llama/Llama-3.3-70B-Instruct"
395
+ )
396
+
397
+ # Model search
398
+ model_search_box = gr.Textbox(
399
+ label="Filter Models",
400
+ placeholder="Search for a featured model...",
401
+ lines=1
402
+ )
403
+
404
+ # Featured models list
405
  models_list = [
406
+ "meta-llama/Llama-3.2-11B-Vision-Instruct",
407
+ "meta-llama/Llama-3.3-70B-Instruct",
408
+ "meta-llama/Llama-3.1-70B-Instruct",
409
+ "meta-llama/Llama-3.0-70B-Instruct",
410
+ "meta-llama/Llama-3.2-3B-Instruct",
411
+ "meta-llama/Llama-3.2-1B-Instruct",
412
+ "meta-llama/Llama-3.1-8B-Instruct",
413
+ "NousResearch/Hermes-3-Llama-3.1-8B",
414
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
415
+ "mistralai/Mistral-Nemo-Instruct-2407",
416
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
417
+ "mistralai/Mistral-7B-Instruct-v0.3",
418
+ "mistralai/Mistral-7B-Instruct-v0.2",
419
+ "Qwen/Qwen3-235B-A22B",
420
+ "Qwen/Qwen3-32B",
421
+ "Qwen/Qwen2.5-72B-Instruct",
422
+ "Qwen/Qwen2.5-3B-Instruct",
423
+ "Qwen/Qwen2.5-0.5B-Instruct",
424
+ "Qwen/QwQ-32B",
425
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
426
+ "microsoft/Phi-3.5-mini-instruct",
427
+ "microsoft/Phi-3-mini-128k-instruct",
428
+ "microsoft/Phi-3-mini-4k-instruct",
429
  ]
430
 
431
+ featured_model_radio = gr.Radio(
432
+ label="Select a model below",
433
+ choices=models_list,
434
+ value="meta-llama/Llama-3.2-11B-Vision-Instruct",
435
+ interactive=True
436
+ )
437
+
438
  gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
439
 
440
+ # Add MCP Support Section
441
+ with gr.Accordion("MCP Support (for LLMs)", open=False):
442
+ gr.Markdown("""
443
+ ### MCP Support
444
+
445
+ This app supports the Model Context Protocol (MCP), allowing Large Language Models like Claude Desktop to use it as a text generation tool.
446
+
447
+ To use this app with an MCP client, add the following configuration:
448
+
449
+ ```json
450
+ {
451
+ "mcpServers": {
452
+ "textGen": {
453
+ "url": "https://YOUR_USERNAME-serverless-textgen-hub.hf.space/gradio_api/mcp/sse"
454
+ }
455
+ }
456
+ }
457
+ ```
458
+
459
+ Replace `YOUR_USERNAME` with your actual Hugging Face username.
460
+ """)
461
+
462
+ # Chat history state
463
  chat_history = gr.State([])
464
 
465
+ # Function to filter models
466
  def filter_models(search_term):
467
  print(f"Filtering models with search term: {search_term}")
468
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
469
  print(f"Filtered models: {filtered}")
470
  return gr.update(choices=filtered)
471
 
472
+ # Function to set custom model from radio
473
  def set_custom_model_from_radio(selected):
474
  print(f"Featured model selected: {selected}")
475
  return selected
476
 
477
+ # Function for the chat interface
478
  def user(user_message, history):
479
  print(f"User message received: {user_message}")
480
+
481
+ # Skip if message is empty (no text and no files)
482
  if not user_message or (not user_message.get("text") and not user_message.get("files")):
483
  print("Empty message, skipping")
484
  return history
485
 
486
+ # Prepare multimodal message format
487
  text_content = user_message.get("text", "").strip()
488
  files = user_message.get("files", [])
489
 
490
  print(f"Text content: {text_content}")
491
  print(f"Files: {files}")
492
 
493
+ # If both text and files are empty, skip
494
  if not text_content and not files:
495
  print("No content to display")
496
  return history
497
 
498
+ # Add message with images to history
499
  if files and len(files) > 0:
500
+ # Add text message first if it exists
501
  if text_content:
502
  print(f"Adding text message: {text_content}")
503
  history.append([text_content, None])
504
 
505
+ # Then add each image file separately
506
  for file_path in files:
507
  if file_path and isinstance(file_path, str):
508
  print(f"Adding image: {file_path}")
 
510
 
511
  return history
512
  else:
513
+ # For text-only messages
514
  print(f"Adding text-only message: {text_content}")
515
  history.append([text_content, None])
516
  return history
517
 
518
+ # Define bot response function
519
  def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
520
  if not history or len(history) == 0:
521
  print("No history to process")
 
545
 
546
  if is_image:
547
  for response in respond(
548
+ text_context,
549
+ [image_path],
550
+ history[:-1],
551
+ system_msg,
552
+ max_tokens,
553
+ temperature,
554
+ top_p,
555
+ freq_penalty,
556
+ seed,
557
+ provider,
558
+ api_key,
559
+ custom_model,
560
+ search_term,
561
+ selected_model
562
  ):
563
  history[-1][1] = response
564
  yield history
565
  else:
566
  for response in respond(
567
+ text_content,
568
+ None,
569
+ history[:-1],
570
+ system_msg,
571
+ max_tokens,
572
+ temperature,
573
+ top_p,
574
+ freq_penalty,
575
+ seed,
576
+ provider,
577
+ api_key,
578
+ custom_model,
579
+ search_term,
580
+ selected_model
581
  ):
582
  history[-1][1] = response
583
  yield history
584
 
585
+ # Event handlers
586
+ msg.submit(
587
+ user,
588
+ [msg, chatbot],
589
+ [chatbot],
590
+ queue=False
591
+ ).then(
592
+ bot,
593
+ [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
594
+ frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
595
+ model_search_box, featured_model_radio],
596
+ [chatbot]
597
+ ).then(
598
+ lambda: {"text": "", "files": []},
599
+ None,
600
+ [msg]
601
+ )
602
 
603
+ model_search_box.change(
604
+ fn=filter_models,
605
+ inputs=model_search_box,
606
+ outputs=featured_model_radio
607
+ )
608
  print("Model search box change event linked.")
609
 
610
+ featured_model_radio.change(
611
+ fn=set_custom_model_from_radio,
612
+ inputs=featured_model_radio,
613
+ outputs=custom_model_box
614
+ )
615
  print("Featured model radio button change event linked.")
616
+
617
+ byok_textbox.change(
618
+ fn=validate_provider,
619
+ inputs=[byok_textbox, provider_radio],
620
+ outputs=provider_radio
621
+ )
622
  print("BYOK textbox change event linked.")
623
 
624
+ provider_radio.change(
625
+ fn=validate_provider,
626
+ inputs=[byok_textbox, provider_radio],
627
+ outputs=provider_radio
628
+ )
629
  print("Provider radio button change event linked.")
630
 
 
 
 
 
 
 
631
  print("Gradio interface initialized.")
632
 
633
  if __name__ == "__main__":
634
  print("Launching the demo application.")
635
+ demo.launch(show_api=True, mcp_server=True)