Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on Apr 30

Commit

5b8ad4f

verified ·

1 Parent(s): 4df41b9

Update app.py

Browse files

Files changed (1) hide show

app.py +164 -174

app.py CHANGED Viewed

@@ -1,19 +1,22 @@
 import gradio as gr
 from openai import OpenAI
 import os
-# Load the Hugging Face access token from environment variables
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
-print("Access token loaded.")
-# Initialize the OpenAI client pointing to the Hugging Face Inference API
-client = OpenAI(
-    base_url="https://api-inference.huggingface.co/v1/",
-    api_key=ACCESS_TOKEN,
-)
-print("OpenAI client initialized.")
-# Define the main function that handles chat responses
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -23,150 +26,129 @@ def respond(
     top_p,
     frequency_penalty,
     seed,
-    custom_model,  # Input from the Custom Model textbox
-    featured_model # Input from the Featured Model radio buttons <<< NEW INPUT
 ):
     print(f"Received message: {message}")
-    print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
-    print(f"Custom model input: '{custom_model}'")
-    print(f"Selected featured model: {featured_model}") # Log the featured model selection
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
-    # Start constructing the message list for the API call with the system message
     messages = [{"role": "system", "content": system_message}]
-    print("Initial messages array constructed.")
-    # Add the conversation history to the messages list
     for val in history:
-        user_part = val[0]
-        assistant_part = val[1]
-        if user_part:
-            messages.append({"role": "user", "content": user_part})
-            print(f"Added user message to context: {user_part}")
-        if assistant_part:
-            messages.append({"role": "assistant", "content": assistant_part})
-            print(f"Added assistant message to context: {assistant_part}")
-    # Add the latest user message to the list
-    messages.append({"role": "user", "content": message})
-    print("Latest user message appended.")
-    # <<< MODEL SELECTION LOGIC UPDATED >>>
-    # Determine the model to use: Prioritize the custom model box if it's filled,
-    # otherwise use the selected featured model.
-    custom_model_stripped = custom_model.strip() # Remove leading/trailing whitespace
-    if custom_model_stripped != "":
-        model_to_use = custom_model_stripped # Use custom model if provided
-        print(f"Using custom model: {model_to_use}")
-    else:
-        model_to_use = featured_model # Use the selected featured model
-        print(f"Using selected featured model: {model_to_use}")
-    # Initialize an empty string to accumulate the response tokens
     response = ""
-    print("Sending request to Hugging Face Inference API.")
-    # Stream the response from the API
-    for message_chunk in client.chat.completions.create(
-        model=model_to_use,          # Use the determined model
-        max_tokens=max_tokens,       # Set maximum tokens for the response
-        stream=True,                 # Enable streaming responses
-        temperature=temperature,     # Set sampling temperature
-        top_p=top_p,                 # Set nucleus sampling probability
-        frequency_penalty=frequency_penalty, # Set frequency penalty
-        seed=seed,                   # Set random seed (if provided)
-        messages=messages,           # Pass the constructed message history
-    ):
-        # Get the text content from the current chunk
-        token_text = message_chunk.choices[0].delta.content
-        # Append the token text to the response string (if it's not None)
-        if token_text:
-            print(f"Received token: {token_text}")
-            response += token_text
-            yield response # Yield the partial response back to Gradio for live updates
     print("Completed response generation.")
-# --- GRADIO UI ---
-# Create the main chatbot display area
-chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel")
 print("Chatbot interface created.")
-# Create the System Prompt input box
-system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
-# Create sliders for model parameters
-max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
-temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
 top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
 frequency_penalty_slider = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
 seed_slider = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
-# Create the Custom Model input box
 custom_model_box = gr.Textbox(
-    value="", # Default to empty
-    label="Custom Model",
-    info="(Optional) Provide a custom Hugging Face model path. Overrides the featured model selection below.",
-    placeholder="e.g., username/my-custom-model" # Updated placeholder
 )
-# Define the list of featured models
-models_list = [
-    "meta-llama/Llama-3.3-70B-Instruct", # Default selected model
-    "meta-llama/Llama-3.1-70B-Instruct",
-    "meta-llama/Llama-3.0-70B-Instruct",
-    "meta-llama/Llama-3.2-3B-Instruct",
-    "meta-llama/Llama-3.2-1B-Instruct",
-    "meta-llama/Llama-3.1-8B-Instruct",
-    "NousResearch/Hermes-3-Llama-3.1-8B",
-    "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
-    "mistralai/Mistral-Nemo-Instruct-2407",
-    "mistralai/Mixtral-8x7B-Instruct-v0.1",
-    "mistralai/Mistral-7B-Instruct-v0.3",
-    "mistralai/Mistral-7B-Instruct-v0.2",
-    "Qwen/Qwen3-235B-A22B",
-    "Qwen/Qwen3-32B",
-    "Qwen/Qwen2.5-72B-Instruct",
-    "Qwen/Qwen2.5-3B-Instruct",
-    "Qwen/Qwen2.5-0.5B-Instruct",
-    "Qwen/QwQ-32B",
-    "Qwen/Qwen2.5-Coder-32B-Instruct",
-    "microsoft/Phi-3.5-mini-instruct",
-    "microsoft/Phi-3-mini-128k-instruct",
-    "microsoft/Phi-3-mini-4k-instruct",
-    "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
-    "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-    "HuggingFaceH4/zephyr-7b-beta",
-    "HuggingFaceTB/SmolLM2-360M-Instruct",
-    "tiiuae/falcon-7b-instruct",
-    "01-ai/Yi-1.5-34B-Chat",
-]
-print("Models list initialized.")
-# Create the radio button selector for featured models
-featured_model_radio = gr.Radio(
-    label="Select a Featured Model", # Changed label slightly
-    choices=models_list,
-    value="meta-llama/Llama-3.3-70B-Instruct", # Set the default selection
-    interactive=True
 )
-print("Featured models radio button created.")
-# --- Create the main Chat Interface ---
-# <<< `additional_inputs` UPDATED >>>
 demo = gr.ChatInterface(
-    fn=respond, # The function to call when a message is sent
-    additional_inputs=[ # List of input components passed to the 'respond' function
         system_message_box,
         max_tokens_slider,
         temperature_slider,
@@ -174,73 +156,81 @@ demo = gr.ChatInterface(
         frequency_penalty_slider,
         seed_slider,
         custom_model_box,
-        featured_model_radio # Pass the radio button selection <<< ADDED
     ],
-    fill_height=True, # Make the interface fill the available height
-    chatbot=chatbot, # Use the predefined chatbot component
-    theme="Nymbo/Nymbo_Theme", # Apply a theme
 )
 print("ChatInterface object created.")
-# --- Add Model Selection Controls within the Interface ---
-with demo: # Use the ChatInterface as a context manager to add elements
-    with gr.Accordion("Model Selection & Parameters", open=False): # Group controls in an accordion
-        # --- Featured Model Selection ---
-        gr.Markdown("### Featured Models") # Section title
-        model_search_box = gr.Textbox(
-            label="Filter Models",
-            placeholder="Search featured models...",
-            lines=1
-        )
         print("Model search box created.")
-        # Place the radio buttons here
-        # No need to define `featured_model_radio` again, just use the variable defined above
-        demo.load(lambda: featured_model_radio, outputs=featured_model_radio) # Ensure it appears in the layout
-        print("Featured model radio added to layout.")
-        # --- Custom Model Input ---
-        gr.Markdown("### Custom Model") # Section title
-        # No need to define `custom_model_box` again, just use the variable defined above
-        demo.load(lambda: custom_model_box, outputs=custom_model_box) # Ensure it appears in the layout
-        print("Custom model box added to layout.")
-        # --- Parameters ---
-        gr.Markdown("### Parameters") # Section title
-        # Add sliders to the layout
-        demo.load(lambda: max_tokens_slider, outputs=max_tokens_slider)
-        demo.load(lambda: temperature_slider, outputs=temperature_slider)
-        demo.load(lambda: top_p_slider, outputs=top_p_slider)
-        demo.load(lambda: frequency_penalty_slider, outputs=frequency_penalty_slider)
-        demo.load(lambda: seed_slider, outputs=seed_slider)
-        print("Parameter sliders added to layout.")
-        # --- Event Listeners ---
-        # Function to filter the radio button choices based on search input
         def filter_models(search_term):
             print(f"Filtering models with search term: {search_term}")
-            # List comprehension to find models matching the search term (case-insensitive)
             filtered = [m for m in models_list if search_term.lower() in m.lower()]
             print(f"Filtered models: {filtered}")
-            # Update the 'choices' property of the radio button component
-            return gr.update(choices=filtered)
-        # Link the search box's 'change' event to the filter function
-        model_search_box.change(
-            fn=filter_models,          # Function to call
-            inputs=model_search_box,   # Input component triggering the event
-            outputs=featured_model_radio # Output component to update
-        )
-        print("Model search box change event linked.")
-print("Gradio interface layout defined.")
-# --- Launch the Application ---
 if __name__ == "__main__":
-    print("Launching the Gradio demo application.")
-    # Launch the Gradio app with API endpoint enabled
-    demo.launch(show_api=True)

 import gradio as gr
 from openai import OpenAI
 import os
+import requests # Added for potential future use, though OpenAI client handles it now
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
+if not ACCESS_TOKEN:
+    print("Warning: HF_TOKEN environment variable not set. Authentication might fail.")
+else:
+    print("Access token loaded.")
+# Base URLs for different providers
+HF_INFERENCE_BASE_URL = "https://api-inference.huggingface.co/v1/"
+CEREBRAS_ROUTER_BASE_URL = "https://router.huggingface.co/cerebras/v1/" # Use base URL for OpenAI client
+# Default provider
+DEFAULT_PROVIDER = "hf-inference"
+# --- Main Respond Function ---
 def respond(
     message,
     history: list[tuple[str, str]],
     top_p,
     frequency_penalty,
     seed,
+    custom_model,
+    inference_provider # New argument for provider selection
 ):
+    print(f"--- New Request ---")
+    print(f"Selected Inference Provider: {inference_provider}")
     print(f"Received message: {message}")
+    # print(f"History: {history}") # Can be verbose
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
+    print(f"Selected model (custom_model): {custom_model}")
+    # Determine the base URL based on the selected provider
+    if inference_provider == "cerebras":
+        base_url = CEREBRAS_ROUTER_BASE_URL
+        print(f"Using Cerebras Router endpoint: {base_url}")
+    else: # Default to hf-inference
+        base_url = HF_INFERENCE_BASE_URL
+        print(f"Using HF Inference API endpoint: {base_url}")
+    # Initialize the OpenAI client dynamically for each request
+    try:
+        client = OpenAI(
+            base_url=base_url,
+            api_key=ACCESS_TOKEN,
+        )
+        print("OpenAI client initialized for the request.")
+    except Exception as e:
+        print(f"Error initializing OpenAI client: {e}")
+        yield f"Error: Could not initialize API client for provider {inference_provider}. Check token and endpoint."
+        return
     # Convert seed to None if -1 (meaning random)
     if seed == -1:
         seed = None
     messages = [{"role": "system", "content": system_message}]
+    # print("Initial messages array constructed.") # Less verbose logging
+    # Add conversation history to the context
     for val in history:
+        user_part, assistant_part = val[0], val[1]
+        if user_part: messages.append({"role": "user", "content": user_part})
+        if assistant_part: messages.append({"role": "assistant", "content": assistant_part})
+    # Append the latest user message
+    messages.append({"role": "user", "content": message})
+    # print("Full message context prepared.") # Less verbose logging
+    # If user provided a model, use that; otherwise, fall back to a default model
+    # Ensure a default model is always set if custom_model is empty
+    model_to_use = custom_model.strip() if custom_model.strip() else "meta-llama/Llama-3.3-70B-Instruct"
+    print(f"Model selected for inference: {model_to_use}")
+    # Start streaming response
     response = ""
+    print(f"Sending request to {inference_provider} via {base_url}...")
+    try:
+        stream = client.chat.completions.create(
+            model=model_to_use,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+            frequency_penalty=frequency_penalty,
+            seed=seed,
+            messages=messages,
+        )
+        for message_chunk in stream:
+            token_text = message_chunk.choices[0].delta.content
+            # Handle potential None or empty tokens gracefully
+            if token_text:
+                # print(f"Received token: {token_text}") # Very verbose
+                response += token_text
+                yield response
+            # Handle potential finish reason if needed (e.g., length)
+            # finish_reason = message_chunk.choices[0].finish_reason
+            # if finish_reason:
+            #     print(f"Stream finished with reason: {finish_reason}")
+    except Exception as e:
+        print(f"Error during API call to {inference_provider}: {e}")
+        yield f"Error: API call failed. Details: {str(e)}"
+        return # Stop generation on error
     print("Completed response generation.")
+# --- GRADIO UI Elements ---
+chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and provider, then begin chatting", layout="panel")
 print("Chatbot interface created.")
+# Moved these inside the Accordion later
+system_message_box = gr.Textbox(value="You are a helpful assistant.", label="System Prompt")
+max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=1024, step=1, label="Max new tokens") # Increased default
+temperature_slider = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature") # Adjusted range
 top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
 frequency_penalty_slider = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
 seed_slider = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
 custom_model_box = gr.Textbox(
+    value="",
+    label="Custom Model Path",
+    info="(Optional) Provide a Hugging Face model path. Overrides featured model selection.",
+    placeholder="meta-llama/Llama-3.3-70B-Instruct"
 )
+# New UI Element for Provider Selection (will be placed in Accordion)
+inference_provider_radio = gr.Radio(
+    choices=["hf-inference", "cerebras"],
+    value=DEFAULT_PROVIDER,
+    label="Inference Provider",
+    info=f"Select the backend API. Default: {DEFAULT_PROVIDER}"
 )
+print("Inference provider radio button created.")
+# --- Gradio Chat Interface Definition ---
 demo = gr.ChatInterface(
+    fn=respond,
+    additional_inputs=[
+        # Order matters: must match the 'respond' function signature
         system_message_box,
         max_tokens_slider,
         temperature_slider,
         frequency_penalty_slider,
         seed_slider,
         custom_model_box,
+        inference_provider_radio, # Added the new input
     ],
+    fill_height=True,
+    chatbot=chatbot,
+    theme="Nymbo/Nymbo_Theme",
+    title="Multi-Provider Chat Hub",
+    description="Chat with various models using different inference backends (HF Inference API or Cerebras via HF Router)."
 )
 print("ChatInterface object created.")
+# --- Add Accordions for Settings within the Demo context ---
+with demo:
+    # Model Selection Accordion (existing logic)
+    with gr.Accordion("Model Selection", open=False):
+        model_search_box = gr.Textbox(label="Filter Featured Models", placeholder="Search...", lines=1)
         print("Model search box created.")
+        # Example models list (keep your extensive list)
+        models_list = [
+            "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.1-70B-Instruct", "meta-llama/Llama-3.1-8B-Instruct",
+            "NousResearch/Hermes-3-Llama-3.1-8B", "mistralai/Mistral-Nemo-Instruct-2407", "mistralai/Mixtral-8x7B-Instruct-v0.1",
+            "mistralai/Mistral-7B-Instruct-v0.3", "Qwen/Qwen3-32B", "microsoft/Phi-3.5-mini-instruct",
+            # Add the rest of your models here...
+        ]
+        print("Models list initialized.")
+        featured_model_radio = gr.Radio(
+            label="Select a Featured Model",
+            choices=models_list,
+            value="meta-llama/Llama-3.3-70B-Instruct", # Default featured model
+            interactive=True
+        )
+        print("Featured models radio button created.")
         def filter_models(search_term):
             print(f"Filtering models with search term: {search_term}")
             filtered = [m for m in models_list if search_term.lower() in m.lower()]
+            # Ensure a valid value is selected if the current one is filtered out
+            current_value = featured_model_radio.value
+            if current_value not in filtered and filtered:
+                 new_value = filtered[0] # Select the first available filtered model
+            elif not filtered:
+                 new_value = None # Or handle empty case as needed
+            else:
+                 new_value = current_value # Keep current if still valid
             print(f"Filtered models: {filtered}")
+            return gr.update(choices=filtered, value=new_value)
+        def set_custom_model_from_radio(selected_model):
+            """Updates the Custom Model text box when a featured model is selected."""
+            print(f"Featured model selected: {selected_model}")
+            return selected_model # Directly return the selected model name
+        model_search_box.change(fn=filter_models, inputs=model_search_box, outputs=featured_model_radio)
+        featured_model_radio.change(fn=set_custom_model_from_radio, inputs=featured_model_radio, outputs=custom_model_box)
+        print("Model selection events linked.")
+    # Advanced Settings Accordion (New)
+    with gr.Accordion("Advanced Settings", open=False):
+        # Place the provider selection and parameter sliders here
+        gr.Markdown("Configure inference parameters and select the backend provider.")
+        # Add the UI elements defined earlier into this accordion
+        gr.Textbox(value="You are a helpful assistant.", label="System Prompt").render() # Render system_message_box here
+        inference_provider_radio.render() # Render the provider radio here
+        max_tokens_slider.render()
+        temperature_slider.render()
+        top_p_slider.render()
+        frequency_penalty_slider.render()
+        seed_slider.render()
+        print("Advanced settings accordion created with provider selection and parameters.")
+print("Gradio interface fully initialized.")
 if __name__ == "__main__":
+    print("Launching the demo application.")
+    demo.launch(show_api=False)