Spaces:
Running
Running
File size: 7,453 Bytes
2aef932 1b4a909 2aef932 7561984 2aef932 919549c 2aef932 919549c 2aef932 7561984 2aef932 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# Import the Gradio library for creating the web interface
import gradio as gr
# Import the InferenceClient from huggingface_hub to interact with the language model
from huggingface_hub import InferenceClient
# --- Configuration Constants ---
# Define the maximum number of tokens the model should generate in a single response
FIXED_MAX_TOKENS = 99999 # Note: This is a very high value, typical values are much lower (e.g., 512, 1024, 2048, 4096 for many models)
# --- Initialize the InferenceClient ---
# For custom OpenAI-compatible APIs, initialize the InferenceClient with the base URL.
# The specific model will be specified in the API call (e.g., chat_completion).
API_BASE_URL = "https://vulture-awake-probably.ngrok-free.app/v1/chat/completions" # Base URL for the custom API
try:
# Initialize the client with the base URL of your API.
# If your API requires an authentication token, you might need to pass it here,
# e.g., client = InferenceClient(base_url=API_BASE_URL, token="YOUR_API_TOKEN")
# or ensure it's set as an environment variable if the client/API supports that.
client = InferenceClient(base_url=API_BASE_URL)
print(f"InferenceClient initialized with base_url: {API_BASE_URL}")
except Exception as e:
print(f"Error initializing InferenceClient with base_url '{API_BASE_URL}': {e}")
# Handle the error appropriately, e.g., by exiting or using a fallback
raise RuntimeError(
"Could not initialize InferenceClient. "
f"Please check the API base URL ('{API_BASE_URL}') and ensure the server is accessible. "
f"Error: {e}"
)
# --- Core Chatbot Logic ---
def respond(message, history):
"""
This function processes the user's message and the chat history to generate a response
from the language model using the custom API.
Args:
message (str): The latest message from the user.
history (list of lists): A list where each inner list contains a pair of
[user_message, ai_message].
Yields:
str: The generated response token by token (for streaming).
"""
# Initialize the messages list
messages = []
# Append past interactions from the history to the messages list
# This provides context to the language model
for user_message, ai_message in history:
if user_message: # Ensure there's a user message
messages.append({"role": "user", "content": user_message})
if ai_message: # Ensure there's an AI message
messages.append({"role": "assistant", "content": ai_message})
# Append the current user's message to the messages list
messages.append({"role": "user", "content": message})
# Initialize an empty string to accumulate the response
response_text = ""
try:
# Make a streaming call to the language model's chat completions endpoint.
# The `model` parameter specifies which model to use at the endpoint.
stream = client.chat_completion(
messages=messages, # The conversation history and current message
max_tokens=FIXED_MAX_TOKENS, # Maximum tokens for the response
stream=True, # Enable streaming for token-by-token output
)
for chunk in stream:
# Check if the chunk contains content and the content is not None
# The exact structure of the chunk can vary based on the model/endpoint
if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content is not None:
token = chunk.choices[0].delta.content # Extract the token from the chunk
response_text += token # Append the token to the response string
yield response_text # Yield the accumulated response so far (for streaming UI update)
except Exception as e:
# If any error occurs during the API call, yield an error message
error_message = f"An error occurred during model inference: {e}"
print(error_message) # Also print to console for debugging
yield error_message
# --- Gradio Interface Definition ---
# URL for the header image
header_image_path = "https://cdn-uploads.huggingface.co/production/uploads/6540a02d1389943fef4d2640/j61iZTDaK9g0UW3aWGwWi.gif"
# Ko-fi widget script
kofi_script = """
<script src='https://storage.ko-fi.com/cdn/scripts/overlay-widget.js'></script>
<script>
kofiWidgetOverlay.draw('sonnydesorbo', {
'type': 'floating-chat',
'floating-chat.donateButton.text': 'Support me',
'floating-chat.donateButton.background-color': '#00b9fe',
'floating-chat.donateButton.text-color': '#fff'
});
</script>
"""
# Ko-fi button HTML
kofi_button_html = """
<div style="text-align: center; padding: 20px;">
<a href='https://ko-fi.com/Z8Z51E5TIG' target='_blank'>
<img height='36' style='border:0px;height:36px;' src='https://storage.ko-fi.com/cdn/kofi5.png?v=6' border='0' alt='Buy Me a Coffee at ko-fi.com' />
</a>
</div>
"""
# Create a Gradio Blocks layout for more control over the interface
# theme=gr.themes.Soft() applies a soft visual theme
# Add the kofi_script to the head of the HTML page
with gr.Blocks(theme=gr.themes.Soft(), head=kofi_script) as demo:
# Display an image at the top of the chatbot interface
gr.Image(
value=header_image_path, # Source of the image
label="Chatbot Header", # Alt text or label (not shown due to show_label=False)
show_label=False, # Hide the label text
interactive=False, # Make the image non-interactive
height=150, # Set the height of the image
elem_id="chatbot-logo" # Assign an HTML ID for potential CSS styling
)
# Create the chat interface component
gr.ChatInterface(
fn=respond, # The function to call when a message is sent
chatbot=gr.Chatbot( # Configure the chatbot display area
height=650 # Set the height of the chat history display
),
# Additional parameters for ChatInterface can be added here, e.g.:
# title="Xortron7 Chat",
# description="Chat with Xortron7, your AI assistant.",
# examples=[["Hello!", None], ["What is Gradio?", None]],
# retry_btn=None, # Removes the retry button
# undo_btn="Delete Previous", # Customizes the undo button
# clear_btn="Clear Chat", # Customizes the clear button
)
# Add the Ko-fi button at the bottom
gr.HTML(kofi_button_html) #
# --- Application Entry Point ---
if __name__ == "__main__":
# Launch the Gradio web server
# show_api=False disables the API documentation page
# share=False prevents creating a public Gradio link (for local development)
try:
demo.launch(show_api=False, share=False)
except NameError as ne:
# This might happen if 'client' was not defined due to an error during initialization
print(f"Gradio demo could not be launched. 'client' might not have been initialized: {ne}")
except RuntimeError as re:
# This catches the RuntimeError raised if client initialization failed explicitly
print(f"Gradio demo could not be launched due to an error during client initialization: {re}")
except Exception as e:
print(f"An unexpected error occurred when trying to launch Gradio demo: {e}") |