# Import the Gradio library for creating the web interface import gradio as gr # Import the InferenceClient from huggingface_hub to interact with the language model from huggingface_hub import InferenceClient # --- Configuration Constants --- # Define the maximum number of tokens the model should generate in a single response FIXED_MAX_TOKENS = 99999 # Note: This is a very high value, typical values are much lower (e.g., 512, 1024, 2048, 4096 for many models) # --- Initialize the InferenceClient --- # For custom OpenAI-compatible APIs, initialize the InferenceClient with the base URL. # The specific model will be specified in the API call (e.g., chat_completion). API_BASE_URL = "https://gtjr14qdt3vjwgdj.us-east-1.aws.endpoints.huggingface.cloud" # Base URL for the custom API try: # Initialize the client with the base URL of your API. # If your API requires an authentication token, you might need to pass it here, # e.g., client = InferenceClient(base_url=API_BASE_URL, token="YOUR_API_TOKEN") # or ensure it's set as an environment variable if the client/API supports that. client = InferenceClient(base_url=API_BASE_URL) print(f"InferenceClient initialized with base_url: {API_BASE_URL}") except Exception as e: print(f"Error initializing InferenceClient with base_url '{API_BASE_URL}': {e}") # Handle the error appropriately, e.g., by exiting or using a fallback raise RuntimeError( "Could not initialize InferenceClient. " f"Please check the API base URL ('{API_BASE_URL}') and ensure the server is accessible. " f"Error: {e}" ) # --- Core Chatbot Logic --- def respond(message, history): """ This function processes the user's message and the chat history to generate a response from the language model using the custom API. Args: message (str): The latest message from the user. history (list of lists): A list where each inner list contains a pair of [user_message, ai_message]. Yields: str: The generated response token by token (for streaming). """ # Initialize the messages list messages = [] # Append past interactions from the history to the messages list # This provides context to the language model for user_message, ai_message in history: if user_message: # Ensure there's a user message messages.append({"role": "user", "content": user_message}) if ai_message: # Ensure there's an AI message messages.append({"role": "assistant", "content": ai_message}) # Append the current user's message to the messages list messages.append({"role": "user", "content": message}) # Initialize an empty string to accumulate the response response_text = "" try: # Make a streaming call to the language model's chat completions endpoint. # The `model` parameter specifies which model to use at the endpoint. stream = client.chat_completion( messages=messages, # The conversation history and current message max_tokens=FIXED_MAX_TOKENS, # Maximum tokens for the response stream=True, # Enable streaming for token-by-token output ) for chunk in stream: # Check if the chunk contains content and the content is not None # The exact structure of the chunk can vary based on the model/endpoint if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content is not None: token = chunk.choices[0].delta.content # Extract the token from the chunk response_text += token # Append the token to the response string yield response_text # Yield the accumulated response so far (for streaming UI update) except Exception as e: # If any error occurs during the API call, yield an error message error_message = f"An error occurred during model inference: {e}" print(error_message) # Also print to console for debugging yield error_message # --- Gradio Interface Definition --- # URL for the header image header_image_path = "https://cdn-uploads.huggingface.co/production/uploads/6540a02d1389943fef4d2640/j61iZTDaK9g0UW3aWGwWi.gif" # Ko-fi widget script kofi_script = """ """ # Create a Gradio Blocks layout for more control over the interface # theme=gr.themes.Soft() applies a soft visual theme # Add the kofi_script to the head of the HTML page with gr.Blocks(theme=gr.themes.Soft(), head=kofi_script) as demo: # Display an image at the top of the chatbot interface gr.Image( value=header_image_path, # Source of the image label="Chatbot Header", # Alt text or label (not shown due to show_label=False) show_label=False, # Hide the label text interactive=False, # Make the image non-interactive height=100, # Set the height of the image elem_id="chatbot-logo" # Assign an HTML ID for potential CSS styling ) # Create the chat interface component gr.ChatInterface( fn=respond, # The function to call when a message is sent chatbot=gr.Chatbot( # Configure the chatbot display area height=500 # Set the height of the chat history display ), # Additional parameters for ChatInterface can be added here, e.g.: # title="Xortron7 Chat", # description="Chat with Xortron7, your AI assistant.", # examples=[["Hello!", None], ["What is Gradio?", None]], # retry_btn=None, # Removes the retry button # undo_btn="Delete Previous", # Customizes the undo button # clear_btn="Clear Chat", # Customizes the clear button ) # --- Application Entry Point --- if __name__ == "__main__": # Launch the Gradio web server # show_api=False disables the API documentation page # share=False prevents creating a public Gradio link (for local development) try: demo.launch(show_api=False, share=False) except NameError as ne: # This might happen if 'client' was not defined due to an error during initialization print(f"Gradio demo could not be launched. 'client' might not have been initialized: {ne}") except RuntimeError as re: # This catches the RuntimeError raised if client initialization failed explicitly print(f"Gradio demo could not be launched due to an error during client initialization: {re}") except Exception as e: print(f"An unexpected error occurred when trying to launch Gradio demo: {e}")