Nymbo commited on
Commit
0d8a414
·
verified ·
1 Parent(s): c3b8601

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +391 -267
app.py CHANGED
@@ -1,37 +1,44 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
4
- import json
5
  import base64
6
  from PIL import Image
7
  import io
8
 
9
- # Load the default access token from environment variable at startup
10
- # This will be used if no custom key is provided by the user.
11
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
12
- print(f"Default HF_TOKEN from environment loaded: {'Present' if ACCESS_TOKEN else 'Not set'}")
13
 
14
  # Function to encode image to base64
15
- def encode_image(image_path):
16
- if not image_path:
17
- print("No image path provided")
18
  return None
19
 
20
  try:
21
- print(f"Encoding image from path: {image_path}")
22
 
23
- if isinstance(image_path, Image.Image):
24
- image = image_path
 
 
 
 
 
 
 
25
  else:
26
- image = Image.open(image_path)
 
27
 
28
  if image.mode == 'RGBA':
 
29
  image = image.convert('RGB')
30
 
31
  buffered = io.BytesIO()
32
  image.save(buffered, format="JPEG")
33
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
34
- print("Image encoded successfully")
35
  return img_str
36
  except Exception as e:
37
  print(f"Error encoding image: {e}")
@@ -48,130 +55,144 @@ def respond(
48
  frequency_penalty,
49
  seed,
50
  provider,
51
- custom_api_key, # This is the value from the BYOK textbox
52
  custom_model,
53
  model_search_term,
54
  selected_model
55
  ):
56
- print(f"Received message: {message}")
57
- print(f"Received {len(image_files) if image_files else 0} images")
58
- # print(f"History: {history}") # Can be very verbose
59
- print(f"System message: {system_message}")
60
- print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
61
- print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
62
- print(f"Selected provider: {provider}")
63
- print(f"Custom API Key input field value (raw): '{custom_api_key[:10]}...' (masked if long)")
64
- print(f"Selected model (custom_model input field): {custom_model}")
65
- print(f"Model search term: {model_search_term}")
66
- print(f"Selected model from radio: {selected_model}")
 
 
 
 
67
 
68
  token_to_use = None
69
- original_hf_token_env_value = os.environ.get("HF_TOKEN")
70
- env_hf_token_temporarily_modified = False
71
-
72
- if custom_api_key and custom_api_key.strip():
73
- token_to_use = custom_api_key.strip()
74
- print(f"USING CUSTOM API KEY (BYOK): '{token_to_use[:5]}...' (masked for security).")
75
- # Aggressively ensure custom key is fundamental:
76
- # Temporarily remove HF_TOKEN from os.environ if it exists,
77
- # to prevent any possibility of InferenceClient picking it up.
78
- if "HF_TOKEN" in os.environ:
79
- print(f"Temporarily unsetting HF_TOKEN from environment (was: {'Present' if os.environ.get('HF_TOKEN') else 'Not set'}) to prioritize custom key.")
80
- del os.environ["HF_TOKEN"]
81
- env_hf_token_temporarily_modified = True
82
- elif ACCESS_TOKEN: # Use default token from environment if no custom key
83
- token_to_use = ACCESS_TOKEN
84
- print(f"USING DEFAULT API KEY (HF_TOKEN from environment variable at script start): '{token_to_use[:5]}...' (masked for security).")
85
- # Ensure HF_TOKEN is set in the current env if it was loaded at start
86
- # This handles cases where it might have been unset by a previous call with a custom key
87
- if original_hf_token_env_value is not None:
88
- os.environ["HF_TOKEN"] = original_hf_token_env_value
89
- elif "HF_TOKEN" in os.environ: # If ACCESS_TOKEN was loaded but original_hf_token_env_value was None (e.g. set by other means)
90
- pass # Let it be whatever it is
91
  else:
92
- print("No custom API key provided AND no default HF_TOKEN was found in environment at script start.")
93
- print("InferenceClient will be initialized without an explicit token. May fail or use public access.")
94
- # token_to_use remains None
95
- # If HF_TOKEN was in env and we want to ensure it's not used when token_to_use is None:
96
- if "HF_TOKEN" in os.environ:
97
- print(f"Temporarily unsetting HF_TOKEN from environment (was: {'Present' if os.environ.get('HF_TOKEN') else 'Not set'}) as no valid key is chosen.")
98
- del os.environ["HF_TOKEN"]
99
- env_hf_token_temporarily_modified = True # Mark for restoration
100
 
101
- print(f"Final token being passed to InferenceClient: '{str(token_to_use)[:5]}...' (masked)" if token_to_use else "None")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
- try:
104
- client = InferenceClient(token=token_to_use, provider=provider)
105
- print(f"Hugging Face Inference Client initialized with {provider} provider.")
106
 
107
- if seed == -1:
108
- seed = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- user_content = []
111
- if message and message.strip():
112
- user_content.append({"type": "text", "text": message})
113
-
114
- if image_files:
115
- for img_path in image_files:
116
- if img_path:
117
- encoded_image = encode_image(img_path)
118
- if encoded_image:
119
- user_content.append({
120
- "type": "image_url",
121
- "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}
122
- })
123
-
124
- if not user_content: # If only images were sent and none encoded, or empty message
125
- if image_files: # If there were image files, it implies an image-only message
126
- user_content = [{"type": "text", "text": ""}] # Send an empty text for context, or specific prompt
127
- else: # Truly empty input
128
- yield "Error: Empty message content."
129
- return
130
-
131
-
132
- messages = [{"role": "system", "content": system_message}]
133
- for val in history:
134
- user_part, assistant_part = val
135
- # Handle multimodal history if necessary (simplified for now)
136
- if isinstance(user_part, dict) and 'files' in user_part: # from MultimodalTextbox
137
- history_text = user_part.get("text", "")
138
- history_files = user_part.get("files", [])
139
- current_user_content_history = []
140
- if history_text:
141
- current_user_content_history.append({"type": "text", "text": history_text})
142
- for h_img_path in history_files:
143
- encoded_h_img = encode_image(h_img_path)
144
- if encoded_h_img:
145
- current_user_content_history.append({
146
- "type": "image_url",
147
- "image_url": {"url": f"data:image/jpeg;base64,{encoded_h_img}"}
148
- })
149
- if current_user_content_history:
150
- messages.append({"role": "user", "content": current_user_content_history})
151
- elif isinstance(user_part, str): # from simple text history
152
- messages.append({"role": "user", "content": user_part})
153
-
154
- if assistant_part:
155
- messages.append({"role": "assistant", "content": assistant_part})
156
-
157
- messages.append({"role": "user", "content": user_content if len(user_content) > 1 or not isinstance(user_content[0], dict) or user_content[0].get("type") != "text" else user_content[0]["text"]})
158
-
159
-
160
- model_to_use = custom_model.strip() if custom_model.strip() else selected_model
161
- print(f"Model selected for inference: {model_to_use}")
162
-
163
- response_text = ""
164
- print(f"Sending request to {provider} with model {model_to_use}.")
165
-
166
- parameters = {
167
- "max_tokens": max_tokens,
168
- "temperature": temperature,
169
- "top_p": top_p,
170
- "frequency_penalty": frequency_penalty,
171
- }
172
- if seed is not None:
173
- parameters["seed"] = seed
174
 
 
175
  stream = client.chat_completion(
176
  model=model_to_use,
177
  messages=messages,
@@ -179,66 +200,66 @@ def respond(
179
  **parameters
180
  )
181
 
182
- print("Streaming response: ", end="", flush=True)
183
  for chunk in stream:
184
- if hasattr(chunk, 'choices') and chunk.choices:
185
  delta = chunk.choices[0].delta
186
- if hasattr(delta, 'content') and delta.content:
187
- token_chunk = delta.content
188
- print(token_chunk, end="", flush=True)
189
- response_text += token_chunk
190
  yield response_text
191
- print("\nStream finished.")
192
-
193
  except Exception as e:
194
- error_message = f"Error during inference: {e}"
195
- print(error_message)
196
- # If there was already some response, append error. Otherwise, yield error.
197
- if 'response_text' in locals() and response_text:
198
- response_text += f"\n{error_message}"
199
- yield response_text
200
- else:
201
- yield error_message
202
- finally:
203
- # Restore HF_TOKEN in os.environ if it was temporarily removed/modified
204
- if env_hf_token_temporarily_modified:
205
- if original_hf_token_env_value is not None:
206
- os.environ["HF_TOKEN"] = original_hf_token_env_value
207
- print("Restored HF_TOKEN in environment from its original value.")
208
- else:
209
- # If it was unset and originally not present, ensure it remains unset
210
- if "HF_TOKEN" in os.environ: # Should not happen if original was None and we deleted
211
- del os.environ["HF_TOKEN"]
212
- print("HF_TOKEN was originally not set and was temporarily removed; ensuring it remains not set in env.")
213
- print("Response generation attempt complete.")
214
-
215
-
216
- def validate_provider(api_key, provider_choice):
217
- # This validation might need adjustment based on providers.
218
- # For now, it assumes any custom key might work with other providers.
219
- # If HF_TOKEN is the only one available (no custom key), restrict to hf-inference.
220
- if not api_key.strip() and provider_choice != "hf-inference" and ACCESS_TOKEN:
221
- gr.Warning("Default HF_TOKEN can only be used with 'hf-inference' provider. Switching to 'hf-inference'.")
222
- return gr.update(value="hf-inference")
223
- return gr.update(value=provider_choice)
224
-
 
225
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
226
  chatbot = gr.Chatbot(
227
  height=600,
228
  show_copy_button=True,
229
- placeholder="Select a model and begin chatting. Supports multimodal inputs.",
230
  layout="panel",
231
- avatar_images=(None, "https://hf.co/front/assets/huggingface_logo.svg") # Bot avatar
232
  )
233
 
234
  msg = gr.MultimodalTextbox(
235
  placeholder="Type a message or upload images...",
236
  show_label=False,
237
  container=False,
238
- scale=12,
239
  file_types=["image"],
240
- file_count="multiple",
241
- sources=["upload"]
242
  )
243
 
244
  with gr.Accordion("Settings", open=False):
@@ -250,142 +271,245 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
250
 
251
  with gr.Row():
252
  with gr.Column():
253
- max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="Max tokens")
254
- temperature_slider = gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.01, label="Temperature") # Allow 0 for deterministic
255
- top_p_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.01, label="Top-P") # Allow 0
256
  with gr.Column():
257
- frequency_penalty_slider = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
258
- seed_slider = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
259
 
260
  providers_list = ["hf-inference", "cerebras", "together", "sambanova", "novita", "cohere", "fireworks-ai", "hyperbolic", "nebius"]
261
  provider_radio = gr.Radio(choices=providers_list, value="hf-inference", label="Inference Provider")
262
 
263
  byok_textbox = gr.Textbox(
264
  value="", label="BYOK (Bring Your Own Key)",
265
- info="Enter your Hugging Face API key (or provider-specific key). Overrides default. If empty, uses Space's HF_TOKEN (if set) for 'hf-inference'.",
266
- placeholder="hf_... or provider_specific_key", type="password"
267
  )
268
 
269
  custom_model_box = gr.Textbox(
270
- value="", label="Custom Model ID",
271
- info="(Optional) Provide a model ID (e.g., 'meta-llama/Llama-3-8B-Instruct'). Overrides featured model selection.",
272
- placeholder="org/model-name"
273
  )
274
 
275
  model_search_box = gr.Textbox(label="Filter Featured Models", placeholder="Search...", lines=1)
276
 
277
  models_list = [
278
- "meta-llama/Llama-3.2-11B-Vision-Instruct", "meta-llama/Llama-3.1-70B-Instruct",
279
- "mistralai/Mistral-Nemo-Instruct-2407", "Qwen/Qwen2.5-72B-Instruct",
280
- "microsoft/Phi-3.5-mini-instruct", "NousResearch/Hermes-3-Llama-3.1-8B",
281
- # Add more or fetch dynamically if possible
 
 
 
 
 
 
282
  ]
283
  featured_model_radio = gr.Radio(
284
  label="Select a Featured Model", choices=models_list,
285
  value="meta-llama/Llama-3.2-11B-Vision-Instruct", interactive=True
286
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
- gr.Markdown("[All Text Gen Models](https://huggingface.co/models?pipeline_tag=text-generation&sort=trending) | [All Multimodal Models](https://huggingface.co/models?pipeline_tag=image-text-to-text&sort=trending)")
289
-
290
- # Chat history state (using chatbot component directly for history)
291
-
292
- def handle_user_message_submission(user_input_mmtb, chat_history_list):
293
- # user_input_mmtb is a dict: {"text": "...", "files": ["path1", "path2"]}
294
- text_content = user_input_mmtb.get("text", "")
295
- files = user_input_mmtb.get("files", [])
296
 
297
- # Construct the display for the user message in the chat
298
- # For Gradio Chatbot, user message can be a string or a tuple (text, filepath) or (None, filepath)
299
- # If multiple files, they need to be sent as separate messages or handled in display
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
- if not text_content and not files:
302
- return chat_history_list # Or raise an error/warning
303
-
304
- # Append user message to history.
305
- # The actual content for the API will be constructed in respond()
306
- # For display, we can show text and a placeholder for images, or actual images if supported well.
307
- # Let's pass the raw MultimodalTextbox output to history for now.
308
- chat_history_list.append([user_input_mmtb, None])
309
  return chat_history_list
310
 
311
- def handle_bot_response_generation(
312
- chat_history_list, system_msg, max_tokens, temp, top_p, freq_pen, seed_val,
313
- prov, api_key_val, cust_model_val, search_term_val, feat_model_val
 
 
314
  ):
315
- if not chat_history_list or chat_history_list[-1][0] is None:
316
- yield chat_history_list # Or an error message
317
- return
318
 
319
- # The last user message is chat_history_list[-1][0]
320
- # It's the dict from MultimodalTextbox: {"text": "...", "files": ["path1", ...]}
321
- last_user_input_mmtb = chat_history_list[-1][0]
322
 
323
- current_message_text = last_user_input_mmtb.get("text", "")
324
- current_image_files = last_user_input_mmtb.get("files", [])
325
-
326
- # Prepare history for the `respond` function (excluding the current turn's user message)
327
- api_history = []
328
- for user_msg_item, bot_msg_item in chat_history_list[:-1]:
329
- # Convert past user messages (which are MMTB dicts) to API format or simple strings
330
- past_user_text = user_msg_item.get("text", "")
331
- # For simplicity, not including past images in API history here, but could be added
332
- api_history.append((past_user_text, bot_msg_item))
333
-
334
-
335
- # Stream the response
336
- full_response = ""
337
- for_stream_chunk in respond(
338
- message=current_message_text,
339
- image_files=current_image_files,
340
- history=api_history, # Pass the processed history
341
- system_message=system_msg,
342
- max_tokens=max_tokens,
343
- temperature=temp,
344
- top_p=top_p,
345
- frequency_penalty=freq_pen,
346
- seed=seed_val,
347
- provider=prov,
348
- custom_api_key=api_key_val,
349
- custom_model=cust_model_val,
350
- model_search_term=search_term_val, # Note: search_term is for UI filtering, not API
351
- selected_model=feat_model_val
352
- ):
353
- full_response = for_stream_chunk
354
- chat_history_list[-1][1] = full_response
355
- yield chat_history_list
356
 
357
- msg.submit(
358
- handle_user_message_submission,
359
- [msg, chatbot],
360
- [chatbot],
361
- queue=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  ).then(
363
- handle_bot_response_generation,
364
- [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
365
- frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
366
- model_search_box, featured_model_radio],
367
- [chatbot]
 
 
 
368
  ).then(
369
- lambda: gr.update(value=None), # Clears MultimodalTextbox: {"text": None, "files": None}
370
- [], # No inputs needed for this
371
- [msg]
 
372
  )
373
 
374
- def filter_models_ui(search_term):
375
- filtered = [m for m in models_list if search_term.lower() in m.lower()] if search_term else models_list
376
- return gr.update(choices=filtered, value=filtered[0] if filtered else None)
377
-
378
- model_search_box.change(fn=filter_models_ui, inputs=model_search_box, outputs=featured_model_radio)
 
 
 
379
 
380
- # No need for set_custom_model_from_radio if custom_model_box overrides featured_model_radio directly in respond()
 
 
381
 
 
382
  byok_textbox.change(fn=validate_provider, inputs=[byok_textbox, provider_radio], outputs=provider_radio)
383
  provider_radio.change(fn=validate_provider, inputs=[byok_textbox, provider_radio], outputs=provider_radio)
384
 
385
- print("Gradio interface initialized.")
 
386
 
387
  if __name__ == "__main__":
388
- print("Launching the demo application.")
389
- # ForSpaces, share=True is often implied or handled by Spaces platform
390
- # For local, share=True makes it public via Gradio link
391
- demo.queue().launch(show_api=False) # .queue() is good for handling multiple users / long tasks
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
4
+ import json # Added for debug printing payloads
5
  import base64
6
  from PIL import Image
7
  import io
8
 
 
 
9
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
10
+ print(f"Access token from HF_TOKEN env var loaded. Is it None? {ACCESS_TOKEN is None}. Length if not None: {len(ACCESS_TOKEN) if ACCESS_TOKEN else 'N/A'}")
11
 
12
  # Function to encode image to base64
13
+ def encode_image(image_path_or_pil):
14
+ if not image_path_or_pil:
15
+ print("No image path or PIL Image provided to encode_image")
16
  return None
17
 
18
  try:
19
+ # print(f"Encoding image. Input type: {type(image_path_or_pil)}") # Debug
20
 
21
+ if isinstance(image_path_or_pil, Image.Image):
22
+ image = image_path_or_pil
23
+ # print("Input is already a PIL Image.")
24
+ elif isinstance(image_path_or_pil, str):
25
+ # print(f"Input is a path string: {image_path_or_pil}")
26
+ if not os.path.exists(image_path_or_pil):
27
+ print(f"Error: Image path does not exist: {image_path_or_pil}")
28
+ return None
29
+ image = Image.open(image_path_or_pil)
30
  else:
31
+ print(f"Error: Unsupported type for encode_image: {type(image_path_or_pil)}")
32
+ return None
33
 
34
  if image.mode == 'RGBA':
35
+ # print("Converting RGBA image to RGB.")
36
  image = image.convert('RGB')
37
 
38
  buffered = io.BytesIO()
39
  image.save(buffered, format="JPEG")
40
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
41
+ # print("Image encoded successfully to base64.")
42
  return img_str
43
  except Exception as e:
44
  print(f"Error encoding image: {e}")
 
55
  frequency_penalty,
56
  seed,
57
  provider,
58
+ custom_api_key, # This is the value from byok_textbox
59
  custom_model,
60
  model_search_term,
61
  selected_model
62
  ):
63
+ print(f"--- New Respond Call ---")
64
+ print(f"Received message: '{message}'")
65
+ print(f"Received {len(image_files) if image_files else 0} image files.")
66
+ # print(f"History length: {len(history)}") # History can be verbose
67
+ print(f"System message: '{system_message}'")
68
+ print(f"Generation Params: MaxTokens={max_tokens}, Temp={temperature}, TopP={top_p}, FreqPenalty={frequency_penalty}, Seed={seed}")
69
+ print(f"Selected provider: '{provider}'")
70
+
71
+ # Explicitly show the raw custom_api_key received
72
+ raw_key_type = type(custom_api_key)
73
+ raw_key_len = len(custom_api_key) if isinstance(custom_api_key, str) else 'N/A (not a string)'
74
+ print(f"Raw custom_api_key from UI: type={raw_key_type}, length={raw_key_len}")
75
+ if isinstance(custom_api_key, str) and len(custom_api_key) > 0:
76
+ print(f"Raw custom_api_key (masked): '{custom_api_key[:4]}...{custom_api_key[-4:]}'" if len(custom_api_key) > 8 else custom_api_key)
77
+
78
 
79
  token_to_use = None
80
+ effective_custom_key = ""
81
+
82
+ if custom_api_key and isinstance(custom_api_key, str): # Ensure it's a string and not None
83
+ effective_custom_key = custom_api_key.strip()
84
+
85
+ if effective_custom_key: # True if string is not empty after stripping
86
+ token_to_use = effective_custom_key
87
+ print(f"TOKEN SELECTION: USING CUSTOM API KEY (BYOK). Length: {len(token_to_use)}")
88
+ if ACCESS_TOKEN and token_to_use == ACCESS_TOKEN:
89
+ print("INFO: Custom key is identical to the environment HF_TOKEN.")
 
 
 
 
 
 
 
 
 
 
 
 
90
  else:
91
+ token_to_use = ACCESS_TOKEN # This will be None if HF_TOKEN is not set or empty
92
+ if token_to_use:
93
+ print(f"TOKEN SELECTION: USING DEFAULT API KEY (HF_TOKEN from env). Length: {len(token_to_use)}")
94
+ else:
95
+ print("TOKEN SELECTION: DEFAULT API KEY (HF_TOKEN from env) IS NOT SET or EMPTY. Custom key was also empty.")
 
 
 
96
 
97
+ if not token_to_use:
98
+ print("CRITICAL WARNING: No API token determined (neither custom nor default was usable/provided). Inference will likely fail or use public access if supported by model/provider.")
99
+ # InferenceClient will handle token=None by trying its own env var lookup or failing.
100
+ else:
101
+ # For debugging, print a masked version of the token being finally used
102
+ if isinstance(token_to_use, str) and len(token_to_use) > 8:
103
+ print(f"FINAL TOKEN for InferenceClient: '{token_to_use[:4]}...{token_to_use[-4:]}' (masked)")
104
+ elif isinstance(token_to_use, str):
105
+ print(f"FINAL TOKEN for InferenceClient: '{token_to_use}' (short token)")
106
+ else: # Should not happen if logic above is correct and token_to_use is string or None
107
+ print(f"FINAL TOKEN for InferenceClient: {token_to_use} (not a string or None, unusual!)")
108
+
109
+ # Initialize the Inference Client with the provider and appropriate token
110
+ client = InferenceClient(token=token_to_use, provider=provider)
111
+ print(f"Hugging Face Inference Client initialized with provider: '{provider}'.")
112
 
113
+ if seed == -1: # Convert seed to None if -1 (meaning random)
114
+ seed = None
 
115
 
116
+ # Prepare user_content (current message with text and/or images)
117
+ user_content_parts = []
118
+ if message and message.strip():
119
+ user_content_parts.append({"type": "text", "text": message})
120
+
121
+ if image_files and len(image_files) > 0:
122
+ for img_file_path in image_files:
123
+ if img_file_path: # img_file_path is a string path from Gradio MultimodalTextbox
124
+ encoded_image = encode_image(img_file_path)
125
+ if encoded_image:
126
+ user_content_parts.append({
127
+ "type": "image_url",
128
+ "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}
129
+ })
130
+ else:
131
+ print(f"Warning: Failed to encode image for current message: {img_file_path}")
132
+
133
+ # Determine final user_content structure
134
+ if not user_content_parts: # No text and no images
135
+ print("Warning: Current user message is empty (no text, no images).")
136
+ # Depending on API, might need to send empty string or handle this case.
137
+ # For now, let it proceed; API might error or interpret as empty prompt.
138
+ final_user_content = ""
139
+ elif len(user_content_parts) == 1 and user_content_parts[0]["type"] == "text":
140
+ final_user_content = user_content_parts[0]["text"] # Text-only, pass as string
141
+ else:
142
+ final_user_content = user_content_parts # Multimodal, pass as list of dicts
143
+
144
+ # Prepare messages list for the API
145
+ messages = [{"role": "system", "content": system_message}]
146
+
147
+ for hist_user_content, hist_assistant_content in history:
148
+ # hist_user_content can be string (text) or tuple (text, [image_paths])
149
+ if hist_user_content:
150
+ if isinstance(hist_user_content, tuple) and len(hist_user_content) == 2:
151
+ # Multimodal history entry: (text, [list_of_image_paths])
152
+ hist_text, hist_image_paths = hist_user_content
153
+ current_hist_user_parts = []
154
+ if hist_text and hist_text.strip():
155
+ current_hist_user_parts.append({"type": "text", "text": hist_text})
156
+ if hist_image_paths:
157
+ for hist_img_path in hist_image_paths:
158
+ encoded_hist_img = encode_image(hist_img_path)
159
+ if encoded_hist_img:
160
+ current_hist_user_parts.append({
161
+ "type": "image_url",
162
+ "image_url": {"url": f"data:image/jpeg;base64,{encoded_hist_img}"}
163
+ })
164
+ else:
165
+ print(f"Warning: Failed to encode history image: {hist_img_path}")
166
+ if current_hist_user_parts: # Only add if there's content
167
+ messages.append({"role": "user", "content": current_hist_user_parts})
168
+
169
+ elif isinstance(hist_user_content, str): # Text-only history entry
170
+ messages.append({"role": "user", "content": hist_user_content})
171
+ else:
172
+ print(f"Warning: Unexpected type for history user content: {type(hist_user_content)}")
173
 
174
+ if hist_assistant_content:
175
+ messages.append({"role": "assistant", "content": hist_assistant_content})
176
+
177
+ messages.append({"role": "user", "content": final_user_content})
178
+ # print(f"Final messages object for API: {json.dumps(messages, indent=2)}") # Very verbose, use for deep debugging
179
+
180
+ model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
181
+ print(f"Model selected for inference: '{model_to_use}'")
182
+
183
+ response_text = ""
184
+ print(f"Sending request to provider '{provider}' for model '{model_to_use}'. Streaming enabled.")
185
+
186
+ parameters = {
187
+ "max_tokens": max_tokens,
188
+ "temperature": temperature,
189
+ "top_p": top_p,
190
+ "frequency_penalty": frequency_penalty,
191
+ }
192
+ if seed is not None:
193
+ parameters["seed"] = seed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
+ try:
196
  stream = client.chat_completion(
197
  model=model_to_use,
198
  messages=messages,
 
200
  **parameters
201
  )
202
 
203
+ # print("Streaming response tokens: ", end="", flush=True) # Can be noisy
204
  for chunk in stream:
205
+ if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
206
  delta = chunk.choices[0].delta
207
+ if delta and hasattr(delta, 'content') and delta.content:
208
+ token_text = delta.content
209
+ # print(token_text, end="", flush=True) # Handled by yield
210
+ response_text += token_text
211
  yield response_text
212
+ # print("\nStream ended.")
 
213
  except Exception as e:
214
+ error_message = f"{type(e).__name__}: {str(e)}"
215
+ print(f"ERROR DURING INFERENCE: {error_message}")
216
+ # If it's a client error (4xx), the request body might be relevant
217
+ if hasattr(e, 'response') and e.response is not None:
218
+ print(f"Error details: Status {e.response.status_code}. Response text: {e.response.text}")
219
+ if 400 <= e.response.status_code < 500:
220
+ try:
221
+ print(f"Offending request messages payload (first 1000 chars): {json.dumps(messages, indent=2)[:1000]}")
222
+ except Exception as E:
223
+ print(f"Could not dump messages payload: {E}")
224
+
225
+ response_text += f"\nAn error occurred: {error_message}"
226
+ yield response_text
227
+
228
+ print("Completed response generation for current call.")
229
+
230
+
231
+ # Function to validate provider selection based on BYOK
232
+ def validate_provider(api_key, provider_choice): # Renamed provider to provider_choice
233
+ # This function's purpose was to force hf-inference if no BYOK for other providers.
234
+ # However, InferenceClient handles provider-specific keys or HF token routing.
235
+ # For now, let's assume any key might work with any provider and let InferenceClient handle it.
236
+ # If a custom key is entered, it *could* be for any provider.
237
+ # If no custom key, and ACCESS_TOKEN is used, it's an HF_TOKEN, best for hf-inference or HF-managed providers.
238
+ # The current logic doesn't strictly need this validation if we trust InferenceClient.
239
+ # Keeping it simple:
240
+ # if not api_key.strip() and provider_choice != "hf-inference":
241
+ # print(f"No BYOK, but provider '{provider_choice}' selected. Forcing 'hf-inference'.")
242
+ # return gr.update(value="hf-inference")
243
+ return gr.update(value=provider_choice) # No change for now, allow user selection.
244
+
245
+ # GRADIO UI
246
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
247
  chatbot = gr.Chatbot(
248
  height=600,
249
  show_copy_button=True,
250
+ placeholder="Select a model, enter your message, and upload images if needed.",
251
  layout="panel",
252
+ avatar_images=(None, "https://huggingface.co/chat/huggingchat/logo.svg") # Example bot avatar
253
  )
254
 
255
  msg = gr.MultimodalTextbox(
256
  placeholder="Type a message or upload images...",
257
  show_label=False,
258
  container=False,
259
+ scale=12, # Ensure this is within a gr.Row() or similar if scale is used effectively
260
  file_types=["image"],
261
+ file_count="multiple", # Allows multiple image uploads
262
+ sources=["upload"] # Can add "clipboard"
263
  )
264
 
265
  with gr.Accordion("Settings", open=False):
 
271
 
272
  with gr.Row():
273
  with gr.Column():
274
+ max_tokens_slider = gr.Slider(1, 4096, value=512, step=1, label="Max new tokens")
275
+ temperature_slider = gr.Slider(0.1, 2.0, value=0.7, step=0.05, label="Temperature") # Range adjusted
276
+ top_p_slider = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P")
277
  with gr.Column():
278
+ frequency_penalty_slider = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty")
279
+ seed_slider = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 for random)")
280
 
281
  providers_list = ["hf-inference", "cerebras", "together", "sambanova", "novita", "cohere", "fireworks-ai", "hyperbolic", "nebius"]
282
  provider_radio = gr.Radio(choices=providers_list, value="hf-inference", label="Inference Provider")
283
 
284
  byok_textbox = gr.Textbox(
285
  value="", label="BYOK (Bring Your Own Key)",
286
+ info="Enter your API key. For 'hf-inference', use an HF token. For other providers, use their specific key or an HF token if supported.",
287
+ placeholder="Enter your API token here", type="password"
288
  )
289
 
290
  custom_model_box = gr.Textbox(
291
+ value="", label="Custom Model ID / Endpoint",
292
+ info="(Optional) Provide a custom model ID (e.g., 'meta-llama/Llama-3-70b-chat-hf') or full endpoint URL. Overrides featured model selection.",
293
+ placeholder="org/model-name or full URL"
294
  )
295
 
296
  model_search_box = gr.Textbox(label="Filter Featured Models", placeholder="Search...", lines=1)
297
 
298
  models_list = [
299
+ "meta-llama/Llama-3.2-11B-Vision-Instruct", "meta-llama/Llama-3.3-70B-Instruct",
300
+ "meta-llama/Llama-3.1-70B-Instruct", "meta-llama/Llama-3.0-70B-Instruct",
301
+ "meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.2-1B-Instruct",
302
+ "meta-llama/Llama-3.1-8B-Instruct", "NousResearch/Hermes-3-Llama-3.1-8B",
303
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "mistralai/Mistral-Nemo-Instruct-2407",
304
+ "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3",
305
+ "mistralai/Mistral-7B-Instruct-v0.2", "Qwen/Qwen3-235B-A22B", "Qwen/Qwen3-32B",
306
+ "Qwen/Qwen2.5-72B-Instruct", "Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-0.5B-Instruct",
307
+ "Qwen/QwQ-32B", "Qwen/Qwen2.5-Coder-32B-Instruct", "microsoft/Phi-3.5-mini-instruct",
308
+ "microsoft/Phi-3-mini-128k-instruct", "microsoft/Phi-3-mini-4k-instruct",
309
  ]
310
  featured_model_radio = gr.Radio(
311
  label="Select a Featured Model", choices=models_list,
312
  value="meta-llama/Llama-3.2-11B-Vision-Instruct", interactive=True
313
  )
314
+ gr.Markdown("[All Text-to-Text Models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [All Multimodal Models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
315
+
316
+ # Chat history state (remains gr.State for proper handling by Gradio)
317
+ # The `chatbot` component itself manages its display state.
318
+ # We need a separate state if we want to manipulate the history structure before passing to API.
319
+ # The current `bot` function takes `chatbot` (which is history) directly.
320
+
321
+ # Revised user function for MultimodalTextbox
322
+ # It appends the user's input (text and/or files) to the chatbot history.
323
+ # The `bot` function will then process this history.
324
+ def handle_user_input(multimodal_input, chat_history_list):
325
+ text_input = multimodal_input.get("text", "").strip()
326
+ file_inputs = multimodal_input.get("files", []) # List of file paths
327
+
328
+ # print(f"User input: Text='{text_input}', Files={file_inputs}")
329
+
330
+ if not text_input and not file_inputs:
331
+ # print("User input empty, not adding to history.")
332
+ return chat_history_list # No change if input is empty
333
+
334
+ # For multimodal display in chatbot, we can represent images using Markdown.
335
+ # The actual file paths will be used by `respond` for API calls.
336
+ # We need to decide how to store this in history for `respond`
337
+ # Option 1: Store (text, [paths]) tuple for user turns.
338
+ # Option 2: Create separate entries for text and images.
339
 
340
+ # Let's use Option 1 for structured history, easier for `respond`
341
+ # The `chatbot` component can display a text representation.
342
+
343
+ display_entry_user = ""
344
+ if text_input:
345
+ display_entry_user += text_input
 
 
346
 
347
+ # For display in chatbot, we can use Markdown for images.
348
+ # For passing to `respond` via history, we need the actual paths.
349
+ # The `bot` function will unpack this.
350
+
351
+ # For `chatbot` display:
352
+ # If there are images, we can create a text representation.
353
+ # For example, just list "<image1> <image2>" or use Markdown if supported for local files.
354
+ # Gradio Chatbot displays images if the path is a local temp file path.
355
+
356
+ user_turn_content_for_api = (text_input, [f.name for f in file_inputs if f] if file_inputs else [])
357
+
358
+ # For chatbot display:
359
+ # Gradio's Chatbot can display images directly if you pass a list like:
360
+ # [[(image_path1,), (image_path2,)], None] for an image-only user message
361
+ # Or [[text_input, (image_path1,)], None]
362
+ # Let's try to prepare for this.
363
+
364
+ if file_inputs:
365
+ # If there's text AND files, Gradio expects text first, then tuples for files.
366
+ # e.g., history.append( [ [text_input] + [(file.name,) for file in file_inputs], None] )
367
+ # Or, more simply, if Chatbot handles multimodal input display well:
368
+ chatbot_user_message = []
369
+ if text_input:
370
+ chatbot_user_message.append(text_input)
371
+ for file_obj in file_inputs:
372
+ if file_obj and hasattr(file_obj, 'name'): # file_obj is a TemporaryFileWrapper
373
+ chatbot_user_message.append((file_obj.name,)) # Tuple for image path
374
+
375
+ chat_history_list.append([chatbot_user_message, None])
376
+
377
+ elif text_input: # Text only
378
+ chat_history_list.append([text_input, None])
379
 
380
+ # The `bot` function will receive `chat_history_list`.
381
+ # It needs to reconstruct text and image paths from `chat_history_list[-1][0]`
382
+ # to pass to `respond`'s `message` and `image_files` parameters.
383
+
 
 
 
 
384
  return chat_history_list
385
 
386
+
387
+ # Revised bot function to handle history from handle_user_input
388
+ def process_bot_response(
389
+ current_chat_history, # This is the full history from the chatbot
390
+ system_msg, max_tkns, temp, tp_p, freq_pen, sd, prov, api_k, cust_model, srch_term, sel_model
391
  ):
392
+ if not current_chat_history or not current_chat_history[-1][0]:
393
+ print("Bot: History is empty or last user message is empty.")
394
+ return current_chat_history # Or yield current_chat_history
395
 
396
+ last_user_turn_content = current_chat_history[-1][0] # This is what handle_user_input created
 
 
397
 
398
+ # Extract text and image paths from last_user_turn_content
399
+ current_message_text = ""
400
+ current_image_paths = []
401
+
402
+ if isinstance(last_user_turn_content, str): # Text-only
403
+ current_message_text = last_user_turn_content
404
+ elif isinstance(last_user_turn_content, list): # Potentially multimodal from handle_user_input
405
+ for item in last_user_turn_content:
406
+ if isinstance(item, str):
407
+ current_message_text = item # Assumes one text part
408
+ elif isinstance(item, tuple) and len(item) > 0 and isinstance(item[0], str):
409
+ current_image_paths.append(item[0]) # item[0] is the image path
410
+
411
+ # print(f"Bot: Extracted for respond - Text='{current_message_text}', Images={current_image_paths}")
412
+
413
+ # History for `respond` should be all turns *except* the current one.
414
+ history_for_api = []
415
+ for user_content, assistant_content in current_chat_history[:-1]:
416
+ # Reconstruct (text, [paths]) structure for history items if they were multimodal
417
+ # This part needs careful handling if history itself contains multimodal user turns
418
+ # For simplicity, assuming history user_content is string or already (text, [paths])
419
+ # The current `handle_user_input` makes `user_content` a list for multimodal.
420
+ # This needs to be harmonized.
 
 
 
 
 
 
 
 
 
 
421
 
422
+ # Let's simplify: `respond` will parse history. We just pass it.
423
+ # The `respond` function's history processing needs to handle the new format.
424
+ # The `respond` function expects history items to be:
425
+ # user_part: str OR (text_str, [img_paths_list])
426
+ # assistant_part: str
427
+
428
+ # Let's re-structure history_for_api based on how `handle_user_input` formats it.
429
+ # `handle_user_input` stores `chatbot_user_message` which is `[text, (path1,), (path2,)]` or `text`
430
+ # `respond` needs to be adapted for this history format if we pass it directly.
431
+
432
+ # For now, let's adapt the history passed to `respond` to its expected format.
433
+ api_hist_user_entry = None
434
+ if isinstance(user_content, str): # Simple text history
435
+ api_hist_user_entry = user_content
436
+ elif isinstance(user_content, list): # Multimodal history from `handle_user_input`
437
+ hist_text = ""
438
+ hist_paths = []
439
+ for item in user_content:
440
+ if isinstance(item, str): hist_text = item
441
+ elif isinstance(item, tuple): hist_paths.append(item[0])
442
+ api_hist_user_entry = (hist_text, hist_paths)
443
+
444
+ history_for_api.append( (api_hist_user_entry, assistant_content) )
445
+
446
+
447
+ # Call respond with the current message parts and the processed history
448
+ # The `respond` function's first two args are `message` (text) and `image_files` (list of paths)
449
+ # for the *current* turn.
450
+
451
+ # Clear the placeholder for bot's response in the last history item
452
+ current_chat_history[-1][1] = ""
453
+
454
+ stream = respond(
455
+ current_message_text,
456
+ current_image_paths,
457
+ history_for_api, # Pass the history *before* the current turn
458
+ system_msg, max_tkns, temp, tp_p, freq_pen, sd, prov, api_k, cust_model, srch_term, sel_model
459
+ )
460
+
461
+ for partial_response in stream:
462
+ current_chat_history[-1][1] = partial_response
463
+ yield current_chat_history
464
+
465
+
466
+ # Event handlers
467
+ # 1. User submits message (text and/or files)
468
+ # 2. `handle_user_input` updates chatbot history with user's message.
469
+ # 3. `process_bot_response` takes this new history, calls API, and streams response back to chatbot.
470
+
471
+ submit_event = msg.submit(
472
+ handle_user_input,
473
+ inputs=[msg, chatbot], # Pass current message and full history
474
+ outputs=[chatbot], # Update chatbot with user's message
475
+ queue=False # Process user input quickly
476
  ).then(
477
+ process_bot_response,
478
+ inputs=[
479
+ chatbot, # Full history including the latest user message
480
+ system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
481
+ frequency_penalty_slider, seed_slider, provider_radio, byok_textbox,
482
+ custom_model_box, model_search_box, featured_model_radio
483
+ ],
484
+ outputs=[chatbot] # Stream bot's response to chatbot
485
  ).then(
486
+ lambda: gr.update(value=None), # Clear MultimodalTextbox (text and files)
487
+ None, # No inputs
488
+ [msg], # Target component to clear
489
+ queue=False
490
  )
491
 
492
+ def filter_models_choices(search_term):
493
+ # print(f"Filtering models with: '{search_term}'")
494
+ if not search_term: return gr.update(choices=models_list)
495
+ filtered = [m for m in models_list if search_term.lower() in m.lower()]
496
+ # print(f"Filtered models: {filtered}")
497
+ return gr.update(choices=filtered if filtered else [])
498
+
499
+ model_search_box.change(fn=filter_models_choices, inputs=model_search_box, outputs=featured_model_radio)
500
 
501
+ # When a featured model is selected, it could optionally update the custom_model_box.
502
+ # For now, custom_model_box is an override. If empty, featured_model_radio is used by `respond`.
503
+ # No direct link needed unless you want radio to populate custom_model_box.
504
 
505
+ # Provider validation (simplified, as InferenceClient handles token logic)
506
  byok_textbox.change(fn=validate_provider, inputs=[byok_textbox, provider_radio], outputs=provider_radio)
507
  provider_radio.change(fn=validate_provider, inputs=[byok_textbox, provider_radio], outputs=provider_radio)
508
 
509
+ print("Gradio UI defined. Initializing...")
510
+
511
 
512
  if __name__ == "__main__":
513
+ print("Launching Gradio demo...")
514
+ demo.launch(show_api=True, debug=True) # Enable debug for more Gradio logs
515
+ print("Gradio demo launched.")