Nymbo commited on
Commit
4db9e4f
·
verified ·
1 Parent(s): d6da898

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +257 -410
app.py CHANGED
@@ -1,19 +1,47 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient as HubInferenceClient # Renamed to avoid conflict
3
  import os
4
  import json
5
  import base64
6
  from PIL import Image
7
  import io
8
 
9
- # Smolagents imports
10
- from smolagents import CodeAgent, Tool, LiteLLMModel, OpenAIServerModel, TransformersModel, InferenceClientModel as SmolInferenceClientModel
11
- from smolagents.gradio_ui import stream_to_gradio
12
-
13
 
14
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
15
  print("Access token loaded.")
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  # Function to encode image to base64
18
  def encode_image(image_path):
19
  if not image_path:
@@ -44,20 +72,6 @@ def encode_image(image_path):
44
  print(f"Error encoding image: {e}")
45
  return None
46
 
47
- # --- Smolagents Tool Definition ---
48
- try:
49
- image_generation_tool = Tool.from_space(
50
- "black-forest-labs/FLUX.1-schnell",
51
- name="image_generator",
52
- description="Generates an image from a textual prompt. Use this tool if the user asks to generate, create, or draw an image.",
53
- token=ACCESS_TOKEN # Pass token if the space might be private or has rate limits
54
- )
55
- print("Image generation tool loaded successfully.")
56
- SMOLAGENTS_TOOLS = [image_generation_tool]
57
- except Exception as e:
58
- print(f"Error loading image generation tool: {e}. Proceeding without it.")
59
- SMOLAGENTS_TOOLS = []
60
-
61
  def respond(
62
  message,
63
  image_files, # Changed parameter name and structure
@@ -76,7 +90,7 @@ def respond(
76
  ):
77
  print(f"Received message: {message}")
78
  print(f"Received {len(image_files) if image_files else 0} images")
79
- # print(f"History: {history}") # Can be very verbose
80
  print(f"System message: {system_message}")
81
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
82
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
@@ -86,7 +100,41 @@ def respond(
86
  print(f"Model search term: {model_search_term}")
87
  print(f"Selected model from radio: {selected_model}")
88
 
89
- # Determine which token to use
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
91
 
92
  if custom_api_key.strip() != "":
@@ -94,129 +142,134 @@ def respond(
94
  else:
95
  print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication")
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  # Determine which model to use, prioritizing custom_model if provided
98
  model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
99
- print(f"Model selected for LLM: {model_to_use}")
 
 
 
 
100
 
101
- # Prepare parameters for the LLM
102
- llm_parameters = {
103
- "max_tokens": max_tokens, # For LiteLLMModel, OpenAIServerModel
104
- "max_new_tokens": max_tokens, # For TransformersModel, InferenceClientModel
105
  "temperature": temperature,
106
  "top_p": top_p,
107
  "frequency_penalty": frequency_penalty,
108
  }
109
- if seed != -1:
110
- llm_parameters["seed"] = seed
111
-
112
- # Initialize the smolagents Model
113
- # For simplicity, we'll use InferenceClientModel if provider is hf-inference,
114
- # otherwise LiteLLMModel which supports many providers.
115
- # You might want to add more sophisticated logic to select the right smolagents Model class.
116
- if provider == "hf-inference" or provider is None or provider == "": # provider can be None if custom_model is a URL
117
- smol_model = SmolInferenceClientModel(
118
- model_id=model_to_use,
119
- token=token_to_use,
120
- provider=provider if provider else None, # Pass provider only if it's explicitly set and not hf-inference default
121
- **llm_parameters
122
- )
123
- print(f"Using SmolInferenceClientModel for LLM with provider: {provider or 'default'}")
124
- else:
125
- # Assuming other providers might be LiteLLM compatible
126
- # LiteLLM uses `model` for model_id and `api_key` for token
127
- smol_model = LiteLLMModel(
128
- model_id=f"{provider}/{model_to_use}" if provider else model_to_use, # LiteLLM often expects provider/model_name
129
- api_key=token_to_use,
130
- **llm_parameters
131
- )
132
- print(f"Using LiteLLMModel for LLM with provider: {provider}")
133
-
134
-
135
- # Initialize smolagent
136
- # We'll use CodeAgent as it's generally more powerful.
137
- # The system_message from the UI will be part of the task for the agent.
138
- agent_task = message
139
- if system_message and system_message.strip():
140
- agent_task = f"System Instructions: {system_message}\n\nUser Task: {message}"
141
 
142
- print(f"Initializing CodeAgent with model: {model_to_use}")
143
- agent = CodeAgent(
144
- tools=SMOLAGENTS_TOOLS, # Use the globally defined tools
145
- model=smol_model,
146
- stream_outputs=True # Important for streaming
147
- )
148
- print("CodeAgent initialized.")
149
-
150
- # Prepare multimodal inputs for the agent if images are present
151
- agent_images = []
152
- if image_files and len(image_files) > 0:
153
- for img_path in image_files:
154
- if img_path:
155
- try:
156
- # Smolagents expects PIL Image objects for images
157
- pil_image = Image.open(img_path)
158
- agent_images.append(pil_image)
159
- except Exception as e:
160
- print(f"Error opening image for agent: {e}")
161
-
162
- print(f"Prepared {len(agent_images)} images for the agent.")
163
-
164
- # Start with an empty string to build the response as tokens stream in
165
- response_text = ""
166
- print(f"Running agent with task: {agent_task}")
167
 
 
168
  try:
169
- # Use stream_to_gradio for handling agent's streaming output
170
- # The history needs to be converted to the format smolagents expects if we want to continue conversations.
171
- # For now, we'll pass reset=True to simplify, meaning each call is a new conversation for the agent.
172
- # To support conversation history with the agent, `history` needs to be transformed into agent.memory.steps
173
- # or passed appropriately. The `stream_to_gradio` function expects the agent's internal stream.
174
-
175
- # Simplified history for agent (if needed, but stream_to_gradio handles Gradio's history)
176
- # For `agent.run`, we don't directly pass Gradio's history.
177
- # `stream_to_gradio` will yield messages that Gradio's chatbot can append.
178
-
179
- # The `stream_to_gradio` function itself is a generator.
180
- # It takes the agent and task, and yields Gradio-compatible chat messages.
181
- # The `bot` function in Gradio needs to yield these messages.
182
 
183
- # The `respond` function is already a generator, so we can yield from `stream_to_gradio`.
184
 
185
- # Gradio's history (list of tuples) is not directly used by agent.run()
186
- # Instead, the agent's own memory would handle conversational context if reset=False.
187
- # Here, we'll let stream_to_gradio handle the output formatting.
 
 
 
 
 
 
 
188
 
189
- print("Streaming response from agent...")
190
- for content_chunk in stream_to_gradio(
191
- agent,
192
- task=agent_task,
193
- task_images=agent_images if agent_images else None,
194
- reset_agent_memory=True # For simplicity, treat each interaction as new for the agent
195
- ):
196
- # stream_to_gradio yields either a string (for text delta) or a ChatMessage object
197
- if isinstance(content_chunk, str): # This is a text delta
198
- response_text += content_chunk
199
- yield response_text
200
- elif hasattr(content_chunk, 'content'): # This is a ChatMessage object
201
- if isinstance(content_chunk.content, dict) and 'path' in content_chunk.content: # Image/Audio
202
- # Gradio's chatbot can handle dicts for files directly if msg.submit is used
203
- # For streaming, we yield the path or a markdown representation
204
- yield f"![file]({content_chunk.content['path']})"
205
- elif isinstance(content_chunk.content, str):
206
- response_text = content_chunk.content # Replace if it's a full message
207
- yield response_text
208
- else: # Should not happen with stream_to_gradio's typical output
209
- print(f"Unexpected chunk type from stream_to_gradio: {type(content_chunk)}")
210
- yield str(content_chunk)
211
-
212
-
213
- print("\nCompleted response generation from agent.")
214
-
215
  except Exception as e:
216
- print(f"Error during agent execution: {e}")
217
- response_text += f"\nError: {str(e)}"
218
- yield response_text
219
 
 
220
 
221
  # Function to validate provider selection based on BYOK
222
  def validate_provider(api_key, provider):
@@ -230,15 +283,15 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
230
  chatbot = gr.Chatbot(
231
  height=600,
232
  show_copy_button=True,
233
- placeholder="Select a model and begin chatting. Now supports multiple inference providers, multimodal inputs, and image generation tool.",
234
  layout="panel",
235
- show_share_button=True # Added for easy sharing
236
  )
237
  print("Chatbot interface created.")
238
 
239
  # Multimodal textbox for messages (combines text and file uploads)
240
  msg = gr.MultimodalTextbox(
241
- placeholder="Type a message or upload images... (e.g., 'generate an image of a cat playing chess')",
242
  show_label=False,
243
  container=False,
244
  scale=12,
@@ -251,7 +304,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
251
  with gr.Accordion("Settings", open=False):
252
  # System message
253
  system_message_box = gr.Textbox(
254
- value="You are a helpful AI assistant that can understand images and text. If asked to generate an image, use the available image_generator tool.",
255
  placeholder="You are a helpful assistant.",
256
  label="System Prompt"
257
  )
@@ -262,7 +315,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
262
  max_tokens_slider = gr.Slider(
263
  minimum=1,
264
  maximum=4096,
265
- value=1024, # Increased default for potentially longer agent outputs
266
  step=1,
267
  label="Max tokens"
268
  )
@@ -311,7 +364,6 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
311
  "fireworks-ai", # Fireworks AI
312
  "hyperbolic", # Hyperbolic
313
  "nebius", # Nebius
314
- # Add other providers supported by LiteLLM if desired
315
  ]
316
 
317
  provider_radio = gr.Radio(
@@ -324,8 +376,8 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
324
  byok_textbox = gr.Textbox(
325
  value="",
326
  label="BYOK (Bring Your Own Key)",
327
- info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used. For other providers, this key will be used as their respective API key.",
328
- placeholder="Enter your API token",
329
  type="password" # Hide the API key for security
330
  )
331
 
@@ -333,7 +385,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
333
  custom_model_box = gr.Textbox(
334
  value="",
335
  label="Custom Model",
336
- info="(Optional) Provide a custom Hugging Face model path (e.g., 'meta-llama/Llama-3.3-70B-Instruct') or a model name compatible with the selected provider. Overrides any selected featured model.",
337
  placeholder="meta-llama/Llama-3.3-70B-Instruct"
338
  )
339
 
@@ -345,6 +397,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
345
  )
346
 
347
  # Featured models list
 
348
  models_list = [
349
  "meta-llama/Llama-3.2-11B-Vision-Instruct",
350
  "meta-llama/Llama-3.3-70B-Instruct",
@@ -372,7 +425,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
372
  ]
373
 
374
  featured_model_radio = gr.Radio(
375
- label="Select a model below (or specify a custom one above)",
376
  choices=models_list,
377
  value="meta-llama/Llama-3.2-11B-Vision-Instruct", # Default to a multimodal model
378
  interactive=True
@@ -388,300 +441,95 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
388
  print(f"Filtering models with search term: {search_term}")
389
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
390
  print(f"Filtered models: {filtered}")
391
- return gr.update(choices=filtered)
 
392
 
393
- # Function to set custom model from radio (actually, sets the selected_model which is then overridden by custom_model_box if filled)
394
- def set_selected_model_from_radio(selected):
395
  print(f"Featured model selected: {selected}")
396
- # This function's output will be one of the inputs to `respond`
397
- return selected
398
 
399
  # Function for the chat interface
400
- def user(user_message_input, history):
401
- # user_message_input is a dict from MultimodalTextbox: {"text": str, "files": list[str]}
402
- print(f"User input received: {user_message_input}")
403
 
404
- text_content = user_message_input.get("text", "").strip()
405
- files = user_message_input.get("files", [])
406
 
407
  if not text_content and not files:
408
- print("Empty message, skipping history update.")
409
- return history # Or gr.skip() if Gradio version supports it well
410
 
411
- # Append to Gradio's history format
412
- # For multimodal, Gradio expects a list of (text, file_path) tuples or (None, file_path)
413
- # We will represent this as a single user turn which might have text and multiple images.
414
- # The `respond` function will then parse this.
415
- # Gradio's Chatbot can display images if the message is a tuple (None, filepath)
416
- # or if text contains markdown like ![alt](filepath)
417
-
418
- current_turn_display = []
419
  if text_content:
420
- current_turn_display.append(text_content)
421
- if files:
422
- for file_path in files:
423
- current_turn_display.append((file_path,)) # Tuple for Gradio to recognize as file
424
-
425
- if not current_turn_display: # Should not happen if we check above
426
- return history
427
-
428
- # For simplicity in history, we'll just append the text and a note about images.
429
- # The actual image data is passed separately to `respond`.
430
- display_message = text_content
431
- if files:
432
- display_message += f" ({len(files)} image(s) uploaded)"
433
 
434
- history.append([display_message, None])
 
 
 
 
 
 
 
 
 
 
 
 
435
  return history
436
 
437
  # Define bot response function
438
- def bot(history, system_msg, max_tokens_val, temperature_val, top_p_val, freq_penalty_val, seed_val, provider_val, api_key_val, custom_model_val, search_term_val, selected_model_val, request: gr.Request):
439
- if not history or not history[-1][0]: # If no user message
 
440
  yield history
441
  return
442
 
443
- # The user's latest input is in history[-1][0]
444
- # The MultimodalTextbox sends a dict: {"text": str, "files": list[str]}
445
- # However, our `user` function above simplifies this for display in `chatbot`.
446
- # We need to retrieve the original input from the request if possible, or parse history.
447
-
448
- # For simplicity with Gradio's streaming and history, we'll re-parse the last user message.
449
- # This is not ideal but works for this setup.
450
- last_user_turn_display = history[-1][0]
451
-
452
- # This is a simplified parsing. A more robust way would be to pass
453
- # the raw MultimodalTextbox output to `bot` directly.
454
- user_text_content = ""
455
- user_image_files = []
456
-
457
- if isinstance(last_user_turn_display, str):
458
- # Check if it's a simple text or a text with image count
459
- img_count_match = re.search(r" \((\d+) image\(s\) uploaded\)$", last_user_turn_display)
460
- if img_count_match:
461
- user_text_content = last_user_turn_display[:img_count_match.start()]
462
- # We can't get back the actual file paths from this string alone.
463
- # This part needs the raw input from MultimodalTextbox.
464
- # For now, we'll assume image_files are passed correctly to `respond`
465
- # This means `msg.submit` should pass `msg` directly to `respond`'s `message` param.
466
- else:
467
- user_text_content = last_user_turn_display
468
-
469
- # The `msg` (MultimodalTextbox) component's value is what we need for image_files
470
- # We assume `msg.value` is implicitly passed or accessible via `request` if Gradio supports it,
471
- # or it should be an explicit input to `bot`.
472
- # For this implementation, we rely on `msg` being passed to `respond` via the `submit` chain.
473
- # The `history` argument to `bot` is for the chatbot display.
474
-
475
- # The actual call to `respond` will happen via the `msg.submit` chain.
476
- # This `bot` function is primarily for updating the chatbot display.
477
-
478
- history[-1][1] = "" # Clear previous bot response
479
-
480
- # `respond` is a generator. We need to iterate through its yields.
481
- # The `msg` component's value (which includes text and files) is the first argument to `respond`.
482
- # We need to ensure that `msg` is correctly passed.
483
- # The current `msg.submit` passes `msg` (the component itself) to `user`, then `user`'s output to `bot`.
484
- # This is problematic for getting the raw files.
485
-
486
- # Correct approach: `msg.submit` should pass `msg` (value) to `respond` (or a wrapper).
487
- # Let's assume `respond` will be called correctly by the `msg.submit` chain.
488
- # This `bot` function will just yield the history updates.
489
-
490
- # The actual generation is now handled by `msg.submit(...).then(respond, ...)`
491
- # This `bot` function is mostly a placeholder in the new structure if `respond` directly yields to chatbot.
492
- # However, Gradio's `chatbot.then(bot, ...)` expects `bot` to be the generator.
493
-
494
- # Re-structuring: `msg.submit` calls `user` to update history for display.
495
- # Then, `user`'s output (which is just `history`) is passed to `bot`.
496
- # `bot` then calls `respond` with all necessary parameters.
497
-
498
- # Extract the latest user message components (text and files)
499
- # This is tricky because `history` only has the display string.
500
- # We need the raw `msg` value.
501
- # The `request: gr.Request` can sometimes hold component values if using `gr.Interface`.
502
- # For Blocks, it's better to pass `msg` directly.
503
-
504
- # Let's assume `user_text_content` and `user_image_files` are correctly extracted
505
- # from the `msg` component's value when `respond` is called.
506
- # The `bot` function here will iterate over what `respond` yields.
507
-
508
- # The `message` param for `respond` should be the raw output of `msg`
509
- # So, `msg` (the component) should be an input to `bot`.
510
- # Then `bot` extracts `text` and `files` from `msg.value` (or `msg` if it's already the value).
511
 
512
- # The `msg.submit` chain needs to be:
513
- # msg.submit(fn=user_interaction_handler, inputs=[msg, chatbot, ...other_params...], outputs=[chatbot])
514
- # where user_interaction_handler calls `user` then `respond`.
515
-
516
- # For now, let's assume `respond` is correctly called by the `msg.submit` chain
517
- # and this `bot` function is what updates the chatbot display.
518
- # The `inputs` to `bot` in `msg.submit(...).then(bot, inputs=[...])` are crucial.
519
-
520
- # The `message` and `image_files` for `respond` will come from the `msg` component.
521
- # The `history` for `respond` will be `history[:-1]` (all but the current user turn).
522
-
523
- # This `bot` function is essentially the core of `respond` now.
524
- # It needs `msg_value` as an input.
525
-
526
- # Let's rename this function to reflect it's the main generation logic
527
- # and ensure it gets the raw `msg` value.
528
- # The Gradio `msg.submit` will call a wrapper that then calls this.
529
- # For simplicity, we'll assume `respond` is called correctly by the chain.
530
- # This `bot` function is what `chatbot.then(bot, ...)` uses.
531
-
532
- # The `history` object here is the one managed by Gradio's Chatbot.
533
- # `history[-1][0]` is the user's latest displayed message.
534
- # `history[-1][1]` is where the bot's response goes.
535
-
536
- # The `respond` function needs the raw message and files.
537
- # The `msg` component itself should be an input to this `bot` function.
538
- # Let's adjust the `msg.submit` call later.
539
-
540
- # For now, this `bot` function is the generator that `chatbot.then()` expects.
541
- # It will internally call `respond`.
542
-
543
- # The `message` and `image_files` for `respond` must be sourced from the `msg` component's value,
544
- # not from `history[-1][0]`.
545
-
546
- # This function signature is what `chatbot.then(bot, ...)` will use.
547
- # The `inputs` to this `bot` must be correctly specified in `msg.submit(...).then(bot, inputs=...)`.
548
- # `msg_input` should be the value of the `msg` MultimodalTextbox.
549
-
550
- # Let's assume `msg_input` is correctly passed as the first argument to this `bot` function.
551
- # We'll rename `history` to `chatbot_history` to avoid confusion.
552
-
553
- # The `msg.submit` chain should be:
554
- # 1. `user` function: takes `msg_input`, `chatbot_history` -> updates `chatbot_history` for display, returns raw `msg_input` and `chatbot_history[:-1]` for `respond`.
555
- # 2. `respond` function: takes raw `msg_input`, `history_for_respond`, and other params -> yields response chunks.
556
 
557
- # Simpler: `msg.submit` calls `respond_wrapper` which handles history and calls `respond`.
558
 
559
- # The current structure: `msg.submit` calls `user`, then `bot`.
560
- # `user` appends user's input to `chatbot` (history).
561
- # `bot` gets this updated `chatbot` (history).
562
- # `bot` needs to extract the latest user input (text & files) to pass to `respond`.
563
- # This is difficult because `history` only has display strings.
564
-
565
- # Solution: `msg` (the component's value) must be passed to `bot`.
566
- # Let's adjust the `msg.submit` later. For now, assume `message_and_files_input` is passed.
567
-
568
- # This function's signature for `chatbot.then(bot, ...)`:
569
- # bot(chatbot_history, system_msg, ..., msg_input_value)
570
- # The `msg_input_value` will be the first argument if we adjust the `inputs` list.
571
-
572
- # Let's assume the first argument `chatbot_history` is the chatbot's state.
573
- # The actual user input (text + files) needs to be passed separately.
574
- # The `inputs` to `bot` in the `.then(bot, inputs=[...])` call must include `msg`.
575
-
576
- # If `respond` is called directly by `msg.submit().then()`, then `respond` itself is the generator.
577
- # The `chatbot` component updates based on what `respond` yields.
578
-
579
- # The current `msg.submit` structure is:
580
- # .then(user, [msg, chatbot], [chatbot]) <- `user` updates chatbot with user's message
581
- # .then(bot, [chatbot, ...other_params...], [chatbot]) <- `bot` generates response
582
-
583
- # `bot` needs the raw `msg` value. Let's add `msg` as an input to `bot`.
584
- # The `inputs` list for `.then(bot, ...)` will need to include `msg`.
585
-
586
- # The `message` and `image_files` for `respond` should come from `msg_val` (the value of the msg component)
587
- # `history_for_api` should be `chatbot_history[:-1]`
588
-
589
- # The `chatbot` variable passed to `bot` is the current state of the Chatbot UI.
590
- # `chatbot[-1][0]` is the latest user message displayed.
591
- # `chatbot[-1][1]` is where the bot's response will be streamed.
592
-
593
- # We need the raw `msg` value. Let's assume it's passed as an argument to `bot`.
594
- # The `inputs` in `.then(bot, inputs=[msg, chatbot, ...])`
595
-
596
- # The `respond` function will be called with:
597
- # - message: text from msg_val
598
- # - image_files: files from msg_val
599
- # - history: chatbot_history[:-1] (all previous turns)
600
-
601
- # This `bot` function is the one that `chatbot.then()` will call.
602
- # It needs `msg_val` as an input.
603
-
604
- # The `inputs` for this `bot` function in the Gradio chain will be:
605
- # [chatbot, system_message_box, ..., msg]
606
- # So, `msg_val` will be the last parameter.
607
-
608
- msg_val = history.pop('_msg_val_temp_') # Retrieve the raw msg value
609
-
610
- raw_text_input = msg_val.get("text", "")
611
- raw_file_inputs = msg_val.get("files", [])
612
-
613
- # The history for the API should be all turns *before* the current user input
614
- history_for_api = [turn for turn in history[:-1]] # all but the last (current) turn
615
-
616
- history[-1][1] = "" # Clear placeholder for bot response
617
-
618
- for chunk in respond(
619
- message=raw_text_input,
620
- image_files=raw_file_inputs,
621
- history=history_for_api, # Pass history *before* current user turn
622
  system_message=system_msg,
623
- max_tokens=max_tokens_val,
624
- temperature=temperature_val,
625
- top_p=top_p_val,
626
- frequency_penalty=freq_penalty_val,
627
- seed=seed_val,
628
- provider=provider_val,
629
- custom_api_key=api_key_val,
630
- custom_model=custom_model_val,
631
- selected_model=selected_model_val, # selected_model is now the one from radio
632
- model_search_term=search_term_val # Though search_term is not directly used by respond
633
  ):
634
- history[-1][1] = chunk # Stream to the last message's bot part
635
  yield history
636
 
637
-
638
  # Event handlers
639
- # We need to pass the raw `msg` value to the `bot` function.
640
- # We can temporarily store it in the `history` state object if Gradio allows modifying state objects directly.
641
- # A cleaner way is to have a single handler function.
642
-
643
- def combined_user_and_bot(msg_val, chatbot_history, system_msg, max_tokens_val, temperature_val, top_p_val, freq_penalty_val, seed_val, provider_val, api_key_val, custom_model_val, search_term_val, selected_model_val):
644
- # 1. Call user to update chatbot display
645
- updated_chatbot_history = user(msg_val, chatbot_history)
646
- yield updated_chatbot_history # Show user message immediately
647
-
648
- # 2. Call respond (which is now the core generation logic)
649
- # The history for `respond` should be `updated_chatbot_history[:-1]`
650
-
651
- # Clear placeholder for bot's response in the last turn
652
- if updated_chatbot_history and updated_chatbot_history[-1] is not None:
653
- updated_chatbot_history[-1][1] = ""
654
-
655
- history_for_api = updated_chatbot_history[:-1] if updated_chatbot_history else []
656
-
657
- for chunk in respond(
658
- message=msg_val.get("text", ""),
659
- image_files=msg_val.get("files", []),
660
- history=history_for_api,
661
- system_message=system_msg,
662
- max_tokens=max_tokens_val,
663
- temperature=temperature_val,
664
- top_p=top_p_val,
665
- frequency_penalty=freq_penalty_val,
666
- seed=seed_val,
667
- provider=provider_val,
668
- custom_api_key=api_key_val,
669
- custom_model=custom_model_val,
670
- selected_model=selected_model_val,
671
- model_search_term=search_term_val
672
- ):
673
- if updated_chatbot_history and updated_chatbot_history[-1] is not None:
674
- updated_chatbot_history[-1][1] = chunk
675
- yield updated_chatbot_history
676
-
677
  msg.submit(
678
- combined_user_and_bot,
679
- [msg, chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
 
 
 
 
 
680
  frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
681
- model_search_box, featured_model_radio], # Pass `msg` (value of MultimodalTextbox)
682
  [chatbot]
683
  ).then(
684
- lambda: {"text": "", "files": []}, # Clear inputs after submission
685
  None,
686
  [msg]
687
  )
@@ -694,12 +542,11 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
694
  )
695
  print("Model search box change event linked.")
696
 
697
- # Connect the featured model radio to update the custom model box (if user selects from radio, it populates custom_model_box)
698
  featured_model_radio.change(
699
- fn=lambda selected_model_from_radio: selected_model_from_radio, # Directly pass the value
700
  inputs=featured_model_radio,
701
- outputs=custom_model_box # This makes custom_model_box reflect the radio selection
702
- # User can then override it by typing.
703
  )
704
  print("Featured model radio button change event linked.")
705
 
@@ -723,4 +570,4 @@ print("Gradio interface initialized.")
723
 
724
  if __name__ == "__main__":
725
  print("Launching the demo application.")
726
- demo.launch(show_api=True, share=True) # Added share=True for easier testing
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
  import os
4
  import json
5
  import base64
6
  from PIL import Image
7
  import io
8
 
9
+ # Import smolagents components
10
+ from smolagents import CodeAgent, Tool
11
+ from smolagents.models import InferenceClientModel as SmolInferenceClientModel # Alias to avoid conflict
 
12
 
13
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
14
  print("Access token loaded.")
15
 
16
+ # --- Smolagents Setup for Image Generation ---
17
+ print("Initializing smolagents components for image generation...")
18
+ try:
19
+ image_generation_tool = Tool.from_space(
20
+ "black-forest-labs/FLUX.1-schnell", # The Space ID of the image generation tool
21
+ name="image_generator",
22
+ description="Generates an image from a textual prompt. Use this tool if the user asks to 'generate an image of X', 'draw X', 'create a picture of X', or similar requests for visual content based on a description.",
23
+ # Ensure the HF_TOKEN is available to gradio-client if the space is private or requires auth
24
+ token=ACCESS_TOKEN if ACCESS_TOKEN and ACCESS_TOKEN.strip() != "" else None
25
+ )
26
+ print("Image generation tool loaded successfully.")
27
+
28
+ # Initialize a model for the CodeAgent. This can be a simpler/faster model
29
+ # as it's mainly for orchestrating the tool call.
30
+ # Using a default InferenceClientModel from smolagents
31
+ smol_agent_model = SmolInferenceClientModel(token=ACCESS_TOKEN if ACCESS_TOKEN and ACCESS_TOKEN.strip() != "" else None)
32
+ print(f"Smolagent model initialized with: {smol_agent_model.model_id if hasattr(smol_agent_model, 'model_id') else 'default'}")
33
+
34
+ image_agent = CodeAgent(
35
+ tools=[image_generation_tool],
36
+ model=smol_agent_model,
37
+ verbosity_level=1 # Set to 0 for less verbose agent logging, 1 for info, 2 for debug
38
+ )
39
+ print("Image generation agent initialized successfully.")
40
+ except Exception as e:
41
+ print(f"Error initializing smolagents components: {e}")
42
+ image_agent = None
43
+ # --- End Smolagents Setup ---
44
+
45
  # Function to encode image to base64
46
  def encode_image(image_path):
47
  if not image_path:
 
72
  print(f"Error encoding image: {e}")
73
  return None
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def respond(
76
  message,
77
  image_files, # Changed parameter name and structure
 
90
  ):
91
  print(f"Received message: {message}")
92
  print(f"Received {len(image_files) if image_files else 0} images")
93
+ print(f"History: {history}")
94
  print(f"System message: {system_message}")
95
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
96
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
 
100
  print(f"Model search term: {model_search_term}")
101
  print(f"Selected model from radio: {selected_model}")
102
 
103
+ # --- Agent-based Image Generation ---
104
+ if message.startswith("/generate_image"):
105
+ if image_agent is None:
106
+ yield "Image generation agent is not initialized. Please check server logs."
107
+ return
108
+
109
+ prompt_for_agent = message.replace("/generate_image", "").strip()
110
+ if not prompt_for_agent:
111
+ yield "Please provide a prompt for image generation. Usage: /generate_image <your prompt>"
112
+ return
113
+
114
+ print(f"Image generation requested with prompt: {prompt_for_agent}")
115
+ try:
116
+ # Agent run is blocking and returns the final result
117
+ # Ensure the image_agent's model also has a token if needed for its operations (though it's for orchestration)
118
+ agent_response = image_agent.run(prompt_for_agent)
119
+
120
+ if isinstance(agent_response, str) and agent_response.lower().startswith("error"):
121
+ yield f"Agent error: {agent_response}"
122
+ elif hasattr(agent_response, 'to_string'): # Check if it's an AgentImage or similar
123
+ image_path = agent_response.to_string() # This is a local path to the generated image
124
+ print(f"Agent returned image path: {image_path}")
125
+ # Gradio's chatbot can display images if the content is a file path string
126
+ # or a tuple (filepath, alt_text)
127
+ yield image_path
128
+ else:
129
+ yield f"Agent returned an unexpected response: {str(agent_response)}"
130
+ return
131
+ except Exception as e:
132
+ print(f"Error running image agent: {e}")
133
+ yield f"Error generating image: {str(e)}"
134
+ return
135
+ # --- End Agent-based Image Generation ---
136
+
137
+ # Determine which token to use for text generation
138
  token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
139
 
140
  if custom_api_key.strip() != "":
 
142
  else:
143
  print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication")
144
 
145
+ # Initialize the Inference Client with the provider and appropriate token
146
+ client = InferenceClient(token=token_to_use, provider=provider)
147
+ print(f"Hugging Face Inference Client initialized with {provider} provider for text generation.")
148
+
149
+ # Convert seed to None if -1 (meaning random)
150
+ if seed == -1:
151
+ seed = None
152
+
153
+ # Create multimodal content if images are present
154
+ if image_files and len(image_files) > 0:
155
+ user_content = []
156
+ if message and message.strip():
157
+ user_content.append({
158
+ "type": "text",
159
+ "text": message
160
+ })
161
+ for img_path in image_files: # Assuming image_files contains paths from MultimodalTextbox
162
+ if img_path is not None:
163
+ try:
164
+ encoded_image = encode_image(img_path) # img_path is already a path from MultimodalTextbox
165
+ if encoded_image:
166
+ user_content.append({
167
+ "type": "image_url",
168
+ "image_url": {
169
+ "url": f"data:image/jpeg;base64,{encoded_image}"
170
+ }
171
+ })
172
+ except Exception as e:
173
+ print(f"Error encoding image: {e}")
174
+ else:
175
+ # Text-only message
176
+ user_content = message
177
+
178
+ # Prepare messages in the format expected by the API
179
+ messages = [{"role": "system", "content": system_message}]
180
+ print("Initial messages array constructed.")
181
+
182
+ # Add conversation history to the context
183
+ for val in history:
184
+ user_part = val[0]
185
+ assistant_part = val[1]
186
+
187
+ # Handle user messages (could be text or image markdown)
188
+ if user_part:
189
+ if isinstance(user_part, str) and user_part.startswith("![Image]("):
190
+ # This is an image path from a previous agent generation
191
+ # or a user upload represented as markdown
192
+ history_image_path = user_part.replace("![Image](", "").replace(")", "")
193
+ encoded_history_image = encode_image(history_image_path)
194
+ if encoded_history_image:
195
+ messages.append({"role": "user", "content": [{
196
+ "type": "image_url",
197
+ "image_url": {"url": f"data:image/jpeg;base64,{encoded_history_image}"}
198
+ }]})
199
+ elif isinstance(user_part, tuple) and len(user_part) == 2: # Multimodal input from user
200
+ history_content_list = []
201
+ if user_part[0]: # Text part
202
+ history_content_list.append({"type": "text", "text": user_part[0]})
203
+ for img_hist_path in user_part[1]: # List of image paths
204
+ encoded_img_hist = encode_image(img_hist_path)
205
+ if encoded_img_hist:
206
+ history_content_list.append({
207
+ "type": "image_url",
208
+ "image_url": {"url": f"data:image/jpeg;base64,{encoded_img_hist}"}
209
+ })
210
+ if history_content_list:
211
+ messages.append({"role": "user", "content": history_content_list})
212
+ else: # Regular text message
213
+ messages.append({"role": "user", "content": user_part})
214
+ print(f"Added user message to context (type: {type(user_part)})")
215
+
216
+ if assistant_part:
217
+ messages.append({"role": "assistant", "content": assistant_part})
218
+ print(f"Added assistant message to context: {assistant_part}")
219
+
220
+ # Append the latest user message
221
+ messages.append({"role": "user", "content": user_content})
222
+ print(f"Latest user message appended (content type: {type(user_content)})")
223
+
224
  # Determine which model to use, prioritizing custom_model if provided
225
  model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
226
+ print(f"Model selected for inference: {model_to_use}")
227
+
228
+ # Start with an empty string to build the response as tokens stream in
229
+ response = ""
230
+ print(f"Sending request to {provider} provider.")
231
 
232
+ # Prepare parameters for the chat completion request
233
+ parameters = {
234
+ "max_tokens": max_tokens,
 
235
  "temperature": temperature,
236
  "top_p": top_p,
237
  "frequency_penalty": frequency_penalty,
238
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
+ if seed is not None:
241
+ parameters["seed"] = seed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
+ # Use the InferenceClient for making the request
244
  try:
245
+ # Create a generator for the streaming response
246
+ stream = client.chat_completion(
247
+ model=model_to_use,
248
+ messages=messages,
249
+ stream=True,
250
+ **parameters
251
+ )
 
 
 
 
 
 
252
 
253
+ print("Received tokens: ", end="", flush=True)
254
 
255
+ # Process the streaming response
256
+ for chunk in stream:
257
+ if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
258
+ # Extract the content from the response
259
+ if hasattr(chunk.choices[0], 'delta') and hasattr(chunk.choices[0].delta, 'content'):
260
+ token_text = chunk.choices[0].delta.content
261
+ if token_text:
262
+ print(token_text, end="", flush=True)
263
+ response += token_text
264
+ yield response
265
 
266
+ print()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  except Exception as e:
268
+ print(f"Error during inference: {e}")
269
+ response += f"\nError: {str(e)}"
270
+ yield response
271
 
272
+ print("Completed response generation.")
273
 
274
  # Function to validate provider selection based on BYOK
275
  def validate_provider(api_key, provider):
 
283
  chatbot = gr.Chatbot(
284
  height=600,
285
  show_copy_button=True,
286
+ placeholder="Select a model and begin chatting. Use '/generate_image your prompt' to create images.",
287
  layout="panel",
288
+ show_share_button=True # Added for ease of sharing if deployed
289
  )
290
  print("Chatbot interface created.")
291
 
292
  # Multimodal textbox for messages (combines text and file uploads)
293
  msg = gr.MultimodalTextbox(
294
+ placeholder="Type a message or upload images... (e.g., /generate_image a cat wearing a hat)",
295
  show_label=False,
296
  container=False,
297
  scale=12,
 
304
  with gr.Accordion("Settings", open=False):
305
  # System message
306
  system_message_box = gr.Textbox(
307
+ value="You are a helpful AI assistant that can understand images and text. If asked to generate an image, use the image_generator tool.",
308
  placeholder="You are a helpful assistant.",
309
  label="System Prompt"
310
  )
 
315
  max_tokens_slider = gr.Slider(
316
  minimum=1,
317
  maximum=4096,
318
+ value=512,
319
  step=1,
320
  label="Max tokens"
321
  )
 
364
  "fireworks-ai", # Fireworks AI
365
  "hyperbolic", # Hyperbolic
366
  "nebius", # Nebius
 
367
  ]
368
 
369
  provider_radio = gr.Radio(
 
376
  byok_textbox = gr.Textbox(
377
  value="",
378
  label="BYOK (Bring Your Own Key)",
379
+ info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
380
+ placeholder="Enter your Hugging Face API token",
381
  type="password" # Hide the API key for security
382
  )
383
 
 
385
  custom_model_box = gr.Textbox(
386
  value="",
387
  label="Custom Model",
388
+ info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
389
  placeholder="meta-llama/Llama-3.3-70B-Instruct"
390
  )
391
 
 
397
  )
398
 
399
  # Featured models list
400
+ # Updated to include multimodal models
401
  models_list = [
402
  "meta-llama/Llama-3.2-11B-Vision-Instruct",
403
  "meta-llama/Llama-3.3-70B-Instruct",
 
425
  ]
426
 
427
  featured_model_radio = gr.Radio(
428
+ label="Select a model below",
429
  choices=models_list,
430
  value="meta-llama/Llama-3.2-11B-Vision-Instruct", # Default to a multimodal model
431
  interactive=True
 
441
  print(f"Filtering models with search term: {search_term}")
442
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
443
  print(f"Filtered models: {filtered}")
444
+ return gr.update(choices=filtered if filtered else models_list, value=filtered[0] if filtered else models_list[0])
445
+
446
 
447
+ # Function to set custom model from radio
448
+ def set_custom_model_from_radio(selected):
449
  print(f"Featured model selected: {selected}")
450
+ return selected
 
451
 
452
  # Function for the chat interface
453
+ def user(user_message_obj, history):
454
+ print(f"User message object received: {user_message_obj}")
 
455
 
456
+ text_content = user_message_obj.get("text", "").strip()
457
+ files = user_message_obj.get("files", []) # files is a list of temp file paths
458
 
459
  if not text_content and not files:
460
+ print("Empty message (no text, no files), skipping history update.")
461
+ return history # Or raise gr.Error("Please enter a message or upload an image.")
462
 
463
+ # Represent uploaded images in history using markdown syntax for local paths
464
+ # For multimodal models, the actual file path from 'files' will be used in 'respond'
465
+ display_message_parts = []
 
 
 
 
 
466
  if text_content:
467
+ display_message_parts.append(text_content)
 
 
 
 
 
 
 
 
 
 
 
 
468
 
469
+ processed_files_for_history = []
470
+ if files:
471
+ for file_path_obj in files:
472
+ # Gradio's MultimodalTextbox provides file objects with a .name attribute for the path
473
+ file_path = file_path_obj.name if hasattr(file_path_obj, 'name') else str(file_path_obj)
474
+ display_message_parts.append(f"![Uploaded Image]({file_path})")
475
+ processed_files_for_history.append(file_path) # Store the actual path for 'respond'
476
+
477
+ # For history, we store the text and a list of file paths
478
+ # The 'respond' function will then re-encode these for the API
479
+ history_entry_user = (text_content, processed_files_for_history)
480
+ history.append([history_entry_user, None])
481
+ print(f"History updated with user input: {history_entry_user}")
482
  return history
483
 
484
  # Define bot response function
485
+ def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model):
486
+ if not history or len(history) == 0 or history[-1][0] is None:
487
+ print("No user message in history to process for bot.")
488
  yield history
489
  return
490
 
491
+ user_input_tuple = history[-1][0] # This is now (text, [file_paths])
492
+ text_message_from_history = user_input_tuple[0]
493
+ image_files_from_history = user_input_tuple[1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
 
495
+ print(f"Bot processing: text='{text_message_from_history}', images={image_files_from_history}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
 
497
+ history[-1][1] = ""
498
 
499
+ # Pass text and image file paths to respond function
500
+ for response_chunk in respond(
501
+ message=text_message_from_history,
502
+ image_files=image_files_from_history,
503
+ history=history[:-1], # Pass history excluding the current user turn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
  system_message=system_msg,
505
+ max_tokens=max_tokens,
506
+ temperature=temperature,
507
+ top_p=top_p,
508
+ frequency_penalty=freq_penalty,
509
+ seed=seed,
510
+ provider=provider,
511
+ custom_api_key=api_key,
512
+ custom_model=custom_model,
513
+ model_search_term=search_term,
514
+ selected_model=selected_model
515
  ):
516
+ history[-1][1] = response_chunk
517
  yield history
518
 
 
519
  # Event handlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
  msg.submit(
521
+ user,
522
+ [msg, chatbot], # msg is MultimodalTextboxOutput(text=str, files=List[FileData])
523
+ [chatbot],
524
+ queue=False
525
+ ).then(
526
+ bot,
527
+ [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
528
  frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
529
+ model_search_box, featured_model_radio],
530
  [chatbot]
531
  ).then(
532
+ lambda: gr.update(value={"text": "", "files": []}), # Clear MultimodalTextbox
533
  None,
534
  [msg]
535
  )
 
542
  )
543
  print("Model search box change event linked.")
544
 
545
+ # Connect the featured model radio to update the custom model box
546
  featured_model_radio.change(
547
+ fn=set_custom_model_from_radio,
548
  inputs=featured_model_radio,
549
+ outputs=custom_model_box
 
550
  )
551
  print("Featured model radio button change event linked.")
552
 
 
570
 
571
  if __name__ == "__main__":
572
  print("Launching the demo application.")
573
+ demo.launch(show_api=False) # show_api=False for cleaner public interface, True for debugging