Nymbo commited on
Commit
5b8ad4f
·
verified ·
1 Parent(s): 4df41b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -174
app.py CHANGED
@@ -1,19 +1,22 @@
1
  import gradio as gr
2
  from openai import OpenAI
3
  import os
 
4
 
5
- # Load the Hugging Face access token from environment variables
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
7
- print("Access token loaded.")
 
 
 
8
 
9
- # Initialize the OpenAI client pointing to the Hugging Face Inference API
10
- client = OpenAI(
11
- base_url="https://api-inference.huggingface.co/v1/",
12
- api_key=ACCESS_TOKEN,
13
- )
14
- print("OpenAI client initialized.")
15
 
16
- # Define the main function that handles chat responses
17
  def respond(
18
  message,
19
  history: list[tuple[str, str]],
@@ -23,150 +26,129 @@ def respond(
23
  top_p,
24
  frequency_penalty,
25
  seed,
26
- custom_model, # Input from the Custom Model textbox
27
- featured_model # Input from the Featured Model radio buttons <<< NEW INPUT
28
  ):
29
 
 
 
30
  print(f"Received message: {message}")
31
- print(f"History: {history}")
32
  print(f"System message: {system_message}")
33
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
34
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
35
- print(f"Custom model input: '{custom_model}'")
36
- print(f"Selected featured model: {featured_model}") # Log the featured model selection
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  # Convert seed to None if -1 (meaning random)
39
  if seed == -1:
40
  seed = None
41
 
42
- # Start constructing the message list for the API call with the system message
43
  messages = [{"role": "system", "content": system_message}]
44
- print("Initial messages array constructed.")
45
 
46
- # Add the conversation history to the messages list
47
  for val in history:
48
- user_part = val[0]
49
- assistant_part = val[1]
50
- if user_part:
51
- messages.append({"role": "user", "content": user_part})
52
- print(f"Added user message to context: {user_part}")
53
- if assistant_part:
54
- messages.append({"role": "assistant", "content": assistant_part})
55
- print(f"Added assistant message to context: {assistant_part}")
56
-
57
- # Add the latest user message to the list
58
- messages.append({"role": "user", "content": message})
59
- print("Latest user message appended.")
60
 
61
- # <<< MODEL SELECTION LOGIC UPDATED >>>
62
- # Determine the model to use: Prioritize the custom model box if it's filled,
63
- # otherwise use the selected featured model.
64
- custom_model_stripped = custom_model.strip() # Remove leading/trailing whitespace
65
- if custom_model_stripped != "":
66
- model_to_use = custom_model_stripped # Use custom model if provided
67
- print(f"Using custom model: {model_to_use}")
68
- else:
69
- model_to_use = featured_model # Use the selected featured model
70
- print(f"Using selected featured model: {model_to_use}")
71
 
 
 
 
 
72
 
73
- # Initialize an empty string to accumulate the response tokens
74
  response = ""
75
- print("Sending request to Hugging Face Inference API.")
76
-
77
- # Stream the response from the API
78
- for message_chunk in client.chat.completions.create(
79
- model=model_to_use, # Use the determined model
80
- max_tokens=max_tokens, # Set maximum tokens for the response
81
- stream=True, # Enable streaming responses
82
- temperature=temperature, # Set sampling temperature
83
- top_p=top_p, # Set nucleus sampling probability
84
- frequency_penalty=frequency_penalty, # Set frequency penalty
85
- seed=seed, # Set random seed (if provided)
86
- messages=messages, # Pass the constructed message history
87
- ):
88
- # Get the text content from the current chunk
89
- token_text = message_chunk.choices[0].delta.content
90
- # Append the token text to the response string (if it's not None)
91
- if token_text:
92
- print(f"Received token: {token_text}")
93
- response += token_text
94
- yield response # Yield the partial response back to Gradio for live updates
 
 
 
 
 
 
 
 
 
95
 
96
  print("Completed response generation.")
97
 
98
- # --- GRADIO UI ---
99
 
100
- # Create the main chatbot display area
101
- chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel")
102
  print("Chatbot interface created.")
103
 
104
- # Create the System Prompt input box
105
- system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
106
-
107
- # Create sliders for model parameters
108
- max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
109
- temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
110
  top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
111
  frequency_penalty_slider = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
112
  seed_slider = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
113
-
114
- # Create the Custom Model input box
115
  custom_model_box = gr.Textbox(
116
- value="", # Default to empty
117
- label="Custom Model",
118
- info="(Optional) Provide a custom Hugging Face model path. Overrides the featured model selection below.",
119
- placeholder="e.g., username/my-custom-model" # Updated placeholder
120
  )
121
 
122
- # Define the list of featured models
123
- models_list = [
124
- "meta-llama/Llama-3.3-70B-Instruct", # Default selected model
125
- "meta-llama/Llama-3.1-70B-Instruct",
126
- "meta-llama/Llama-3.0-70B-Instruct",
127
- "meta-llama/Llama-3.2-3B-Instruct",
128
- "meta-llama/Llama-3.2-1B-Instruct",
129
- "meta-llama/Llama-3.1-8B-Instruct",
130
- "NousResearch/Hermes-3-Llama-3.1-8B",
131
- "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
132
- "mistralai/Mistral-Nemo-Instruct-2407",
133
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
134
- "mistralai/Mistral-7B-Instruct-v0.3",
135
- "mistralai/Mistral-7B-Instruct-v0.2",
136
- "Qwen/Qwen3-235B-A22B",
137
- "Qwen/Qwen3-32B",
138
- "Qwen/Qwen2.5-72B-Instruct",
139
- "Qwen/Qwen2.5-3B-Instruct",
140
- "Qwen/Qwen2.5-0.5B-Instruct",
141
- "Qwen/QwQ-32B",
142
- "Qwen/Qwen2.5-Coder-32B-Instruct",
143
- "microsoft/Phi-3.5-mini-instruct",
144
- "microsoft/Phi-3-mini-128k-instruct",
145
- "microsoft/Phi-3-mini-4k-instruct",
146
- "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
147
- "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
148
- "HuggingFaceH4/zephyr-7b-beta",
149
- "HuggingFaceTB/SmolLM2-360M-Instruct",
150
- "tiiuae/falcon-7b-instruct",
151
- "01-ai/Yi-1.5-34B-Chat",
152
- ]
153
- print("Models list initialized.")
154
-
155
- # Create the radio button selector for featured models
156
- featured_model_radio = gr.Radio(
157
- label="Select a Featured Model", # Changed label slightly
158
- choices=models_list,
159
- value="meta-llama/Llama-3.3-70B-Instruct", # Set the default selection
160
- interactive=True
161
  )
162
- print("Featured models radio button created.")
163
 
164
 
165
- # --- Create the main Chat Interface ---
166
- # <<< `additional_inputs` UPDATED >>>
167
  demo = gr.ChatInterface(
168
- fn=respond, # The function to call when a message is sent
169
- additional_inputs=[ # List of input components passed to the 'respond' function
 
170
  system_message_box,
171
  max_tokens_slider,
172
  temperature_slider,
@@ -174,73 +156,81 @@ demo = gr.ChatInterface(
174
  frequency_penalty_slider,
175
  seed_slider,
176
  custom_model_box,
177
- featured_model_radio # Pass the radio button selection <<< ADDED
178
  ],
179
- fill_height=True, # Make the interface fill the available height
180
- chatbot=chatbot, # Use the predefined chatbot component
181
- theme="Nymbo/Nymbo_Theme", # Apply a theme
 
 
182
  )
183
  print("ChatInterface object created.")
184
 
185
- # --- Add Model Selection Controls within the Interface ---
186
- with demo: # Use the ChatInterface as a context manager to add elements
187
- with gr.Accordion("Model Selection & Parameters", open=False): # Group controls in an accordion
188
- # --- Featured Model Selection ---
189
- gr.Markdown("### Featured Models") # Section title
190
- model_search_box = gr.Textbox(
191
- label="Filter Models",
192
- placeholder="Search featured models...",
193
- lines=1
194
- )
195
  print("Model search box created.")
196
 
197
- # Place the radio buttons here
198
- # No need to define `featured_model_radio` again, just use the variable defined above
199
- demo.load(lambda: featured_model_radio, outputs=featured_model_radio) # Ensure it appears in the layout
200
- print("Featured model radio added to layout.")
201
-
202
-
203
- # --- Custom Model Input ---
204
- gr.Markdown("### Custom Model") # Section title
205
- # No need to define `custom_model_box` again, just use the variable defined above
206
- demo.load(lambda: custom_model_box, outputs=custom_model_box) # Ensure it appears in the layout
207
- print("Custom model box added to layout.")
208
-
209
- # --- Parameters ---
210
- gr.Markdown("### Parameters") # Section title
211
- # Add sliders to the layout
212
- demo.load(lambda: max_tokens_slider, outputs=max_tokens_slider)
213
- demo.load(lambda: temperature_slider, outputs=temperature_slider)
214
- demo.load(lambda: top_p_slider, outputs=top_p_slider)
215
- demo.load(lambda: frequency_penalty_slider, outputs=frequency_penalty_slider)
216
- demo.load(lambda: seed_slider, outputs=seed_slider)
217
- print("Parameter sliders added to layout.")
218
-
219
-
220
- # --- Event Listeners ---
221
 
222
- # Function to filter the radio button choices based on search input
223
  def filter_models(search_term):
224
  print(f"Filtering models with search term: {search_term}")
225
- # List comprehension to find models matching the search term (case-insensitive)
226
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
 
 
 
 
 
 
 
 
227
  print(f"Filtered models: {filtered}")
228
- # Update the 'choices' property of the radio button component
229
- return gr.update(choices=filtered)
230
-
231
- # Link the search box's 'change' event to the filter function
232
- model_search_box.change(
233
- fn=filter_models, # Function to call
234
- inputs=model_search_box, # Input component triggering the event
235
- outputs=featured_model_radio # Output component to update
236
- )
237
- print("Model search box change event linked.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
 
240
- print("Gradio interface layout defined.")
241
 
242
- # --- Launch the Application ---
243
  if __name__ == "__main__":
244
- print("Launching the Gradio demo application.")
245
- # Launch the Gradio app with API endpoint enabled
246
- demo.launch(show_api=True)
 
1
  import gradio as gr
2
  from openai import OpenAI
3
  import os
4
+ import requests # Added for potential future use, though OpenAI client handles it now
5
 
 
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
7
+ if not ACCESS_TOKEN:
8
+ print("Warning: HF_TOKEN environment variable not set. Authentication might fail.")
9
+ else:
10
+ print("Access token loaded.")
11
 
12
+ # Base URLs for different providers
13
+ HF_INFERENCE_BASE_URL = "https://api-inference.huggingface.co/v1/"
14
+ CEREBRAS_ROUTER_BASE_URL = "https://router.huggingface.co/cerebras/v1/" # Use base URL for OpenAI client
15
+
16
+ # Default provider
17
+ DEFAULT_PROVIDER = "hf-inference"
18
 
19
+ # --- Main Respond Function ---
20
  def respond(
21
  message,
22
  history: list[tuple[str, str]],
 
26
  top_p,
27
  frequency_penalty,
28
  seed,
29
+ custom_model,
30
+ inference_provider # New argument for provider selection
31
  ):
32
 
33
+ print(f"--- New Request ---")
34
+ print(f"Selected Inference Provider: {inference_provider}")
35
  print(f"Received message: {message}")
36
+ # print(f"History: {history}") # Can be verbose
37
  print(f"System message: {system_message}")
38
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
39
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
40
+ print(f"Selected model (custom_model): {custom_model}")
41
+
42
+ # Determine the base URL based on the selected provider
43
+ if inference_provider == "cerebras":
44
+ base_url = CEREBRAS_ROUTER_BASE_URL
45
+ print(f"Using Cerebras Router endpoint: {base_url}")
46
+ else: # Default to hf-inference
47
+ base_url = HF_INFERENCE_BASE_URL
48
+ print(f"Using HF Inference API endpoint: {base_url}")
49
+
50
+ # Initialize the OpenAI client dynamically for each request
51
+ try:
52
+ client = OpenAI(
53
+ base_url=base_url,
54
+ api_key=ACCESS_TOKEN,
55
+ )
56
+ print("OpenAI client initialized for the request.")
57
+ except Exception as e:
58
+ print(f"Error initializing OpenAI client: {e}")
59
+ yield f"Error: Could not initialize API client for provider {inference_provider}. Check token and endpoint."
60
+ return
61
 
62
  # Convert seed to None if -1 (meaning random)
63
  if seed == -1:
64
  seed = None
65
 
 
66
  messages = [{"role": "system", "content": system_message}]
67
+ # print("Initial messages array constructed.") # Less verbose logging
68
 
69
+ # Add conversation history to the context
70
  for val in history:
71
+ user_part, assistant_part = val[0], val[1]
72
+ if user_part: messages.append({"role": "user", "content": user_part})
73
+ if assistant_part: messages.append({"role": "assistant", "content": assistant_part})
 
 
 
 
 
 
 
 
 
74
 
75
+ # Append the latest user message
76
+ messages.append({"role": "user", "content": message})
77
+ # print("Full message context prepared.") # Less verbose logging
 
 
 
 
 
 
 
78
 
79
+ # If user provided a model, use that; otherwise, fall back to a default model
80
+ # Ensure a default model is always set if custom_model is empty
81
+ model_to_use = custom_model.strip() if custom_model.strip() else "meta-llama/Llama-3.3-70B-Instruct"
82
+ print(f"Model selected for inference: {model_to_use}")
83
 
84
+ # Start streaming response
85
  response = ""
86
+ print(f"Sending request to {inference_provider} via {base_url}...")
87
+
88
+ try:
89
+ stream = client.chat.completions.create(
90
+ model=model_to_use,
91
+ max_tokens=max_tokens,
92
+ stream=True,
93
+ temperature=temperature,
94
+ top_p=top_p,
95
+ frequency_penalty=frequency_penalty,
96
+ seed=seed,
97
+ messages=messages,
98
+ )
99
+ for message_chunk in stream:
100
+ token_text = message_chunk.choices[0].delta.content
101
+ # Handle potential None or empty tokens gracefully
102
+ if token_text:
103
+ # print(f"Received token: {token_text}") # Very verbose
104
+ response += token_text
105
+ yield response
106
+ # Handle potential finish reason if needed (e.g., length)
107
+ # finish_reason = message_chunk.choices[0].finish_reason
108
+ # if finish_reason:
109
+ # print(f"Stream finished with reason: {finish_reason}")
110
+
111
+ except Exception as e:
112
+ print(f"Error during API call to {inference_provider}: {e}")
113
+ yield f"Error: API call failed. Details: {str(e)}"
114
+ return # Stop generation on error
115
 
116
  print("Completed response generation.")
117
 
118
+ # --- GRADIO UI Elements ---
119
 
120
+ chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and provider, then begin chatting", layout="panel")
 
121
  print("Chatbot interface created.")
122
 
123
+ # Moved these inside the Accordion later
124
+ system_message_box = gr.Textbox(value="You are a helpful assistant.", label="System Prompt")
125
+ max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="Max new tokens") # Increased default
126
+ temperature_slider = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature") # Adjusted range
 
 
127
  top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
128
  frequency_penalty_slider = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
129
  seed_slider = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
 
 
130
  custom_model_box = gr.Textbox(
131
+ value="",
132
+ label="Custom Model Path",
133
+ info="(Optional) Provide a Hugging Face model path. Overrides featured model selection.",
134
+ placeholder="meta-llama/Llama-3.3-70B-Instruct"
135
  )
136
 
137
+ # New UI Element for Provider Selection (will be placed in Accordion)
138
+ inference_provider_radio = gr.Radio(
139
+ choices=["hf-inference", "cerebras"],
140
+ value=DEFAULT_PROVIDER,
141
+ label="Inference Provider",
142
+ info=f"Select the backend API. Default: {DEFAULT_PROVIDER}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  )
144
+ print("Inference provider radio button created.")
145
 
146
 
147
+ # --- Gradio Chat Interface Definition ---
 
148
  demo = gr.ChatInterface(
149
+ fn=respond,
150
+ additional_inputs=[
151
+ # Order matters: must match the 'respond' function signature
152
  system_message_box,
153
  max_tokens_slider,
154
  temperature_slider,
 
156
  frequency_penalty_slider,
157
  seed_slider,
158
  custom_model_box,
159
+ inference_provider_radio, # Added the new input
160
  ],
161
+ fill_height=True,
162
+ chatbot=chatbot,
163
+ theme="Nymbo/Nymbo_Theme",
164
+ title="Multi-Provider Chat Hub",
165
+ description="Chat with various models using different inference backends (HF Inference API or Cerebras via HF Router)."
166
  )
167
  print("ChatInterface object created.")
168
 
169
+ # --- Add Accordions for Settings within the Demo context ---
170
+ with demo:
171
+ # Model Selection Accordion (existing logic)
172
+ with gr.Accordion("Model Selection", open=False):
173
+ model_search_box = gr.Textbox(label="Filter Featured Models", placeholder="Search...", lines=1)
 
 
 
 
 
174
  print("Model search box created.")
175
 
176
+ # Example models list (keep your extensive list)
177
+ models_list = [
178
+ "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.1-70B-Instruct", "meta-llama/Llama-3.1-8B-Instruct",
179
+ "NousResearch/Hermes-3-Llama-3.1-8B", "mistralai/Mistral-Nemo-Instruct-2407", "mistralai/Mixtral-8x7B-Instruct-v0.1",
180
+ "mistralai/Mistral-7B-Instruct-v0.3", "Qwen/Qwen3-32B", "microsoft/Phi-3.5-mini-instruct",
181
+ # Add the rest of your models here...
182
+ ]
183
+ print("Models list initialized.")
184
+
185
+ featured_model_radio = gr.Radio(
186
+ label="Select a Featured Model",
187
+ choices=models_list,
188
+ value="meta-llama/Llama-3.3-70B-Instruct", # Default featured model
189
+ interactive=True
190
+ )
191
+ print("Featured models radio button created.")
 
 
 
 
 
 
 
 
192
 
 
193
  def filter_models(search_term):
194
  print(f"Filtering models with search term: {search_term}")
 
195
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
196
+ # Ensure a valid value is selected if the current one is filtered out
197
+ current_value = featured_model_radio.value
198
+ if current_value not in filtered and filtered:
199
+ new_value = filtered[0] # Select the first available filtered model
200
+ elif not filtered:
201
+ new_value = None # Or handle empty case as needed
202
+ else:
203
+ new_value = current_value # Keep current if still valid
204
  print(f"Filtered models: {filtered}")
205
+ return gr.update(choices=filtered, value=new_value)
206
+
207
+
208
+ def set_custom_model_from_radio(selected_model):
209
+ """Updates the Custom Model text box when a featured model is selected."""
210
+ print(f"Featured model selected: {selected_model}")
211
+ return selected_model # Directly return the selected model name
212
+
213
+ model_search_box.change(fn=filter_models, inputs=model_search_box, outputs=featured_model_radio)
214
+ featured_model_radio.change(fn=set_custom_model_from_radio, inputs=featured_model_radio, outputs=custom_model_box)
215
+ print("Model selection events linked.")
216
+
217
+ # Advanced Settings Accordion (New)
218
+ with gr.Accordion("Advanced Settings", open=False):
219
+ # Place the provider selection and parameter sliders here
220
+ gr.Markdown("Configure inference parameters and select the backend provider.")
221
+ # Add the UI elements defined earlier into this accordion
222
+ gr.Textbox(value="You are a helpful assistant.", label="System Prompt").render() # Render system_message_box here
223
+ inference_provider_radio.render() # Render the provider radio here
224
+ max_tokens_slider.render()
225
+ temperature_slider.render()
226
+ top_p_slider.render()
227
+ frequency_penalty_slider.render()
228
+ seed_slider.render()
229
+ print("Advanced settings accordion created with provider selection and parameters.")
230
 
231
 
232
+ print("Gradio interface fully initialized.")
233
 
 
234
  if __name__ == "__main__":
235
+ print("Launching the demo application.")
236
+ demo.launch(show_api=False)