File size: 7,453 Bytes
2aef932
 
 
 
 
 
 
 
 
 
 
 
 
1b4a909
2aef932
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7561984
 
 
 
 
 
 
 
 
2aef932
 
 
 
 
 
 
 
 
 
919549c
2aef932
 
 
 
 
 
 
919549c
2aef932
 
 
 
 
 
 
 
 
 
7561984
 
 
2aef932
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# Import the Gradio library for creating the web interface
import gradio as gr
# Import the InferenceClient from huggingface_hub to interact with the language model
from huggingface_hub import InferenceClient

# --- Configuration Constants ---
# Define the maximum number of tokens the model should generate in a single response
FIXED_MAX_TOKENS = 99999 # Note: This is a very high value, typical values are much lower (e.g., 512, 1024, 2048, 4096 for many models)


# --- Initialize the InferenceClient ---
# For custom OpenAI-compatible APIs, initialize the InferenceClient with the base URL.
# The specific model will be specified in the API call (e.g., chat_completion).
API_BASE_URL = "https://vulture-awake-probably.ngrok-free.app/v1/chat/completions" # Base URL for the custom API

try:
    # Initialize the client with the base URL of your API.
    # If your API requires an authentication token, you might need to pass it here,
    # e.g., client = InferenceClient(base_url=API_BASE_URL, token="YOUR_API_TOKEN")
    # or ensure it's set as an environment variable if the client/API supports that.
    client = InferenceClient(base_url=API_BASE_URL)
    print(f"InferenceClient initialized with base_url: {API_BASE_URL}")
except Exception as e:
    print(f"Error initializing InferenceClient with base_url '{API_BASE_URL}': {e}")
    # Handle the error appropriately, e.g., by exiting or using a fallback
    raise RuntimeError(
        "Could not initialize InferenceClient. "
        f"Please check the API base URL ('{API_BASE_URL}') and ensure the server is accessible. "
        f"Error: {e}"
    )


# --- Core Chatbot Logic ---
def respond(message, history):
    """
    This function processes the user's message and the chat history to generate a response
    from the language model using the custom API.

    Args:
        message (str): The latest message from the user.
        history (list of lists): A list where each inner list contains a pair of
                                 [user_message, ai_message].

    Yields:
        str: The generated response token by token (for streaming).
    """
    # Initialize the messages list
    messages = []

    # Append past interactions from the history to the messages list
    # This provides context to the language model
    for user_message, ai_message in history:
        if user_message: # Ensure there's a user message
            messages.append({"role": "user", "content": user_message})
        if ai_message: # Ensure there's an AI message
            messages.append({"role": "assistant", "content": ai_message})

    # Append the current user's message to the messages list
    messages.append({"role": "user", "content": message})

    # Initialize an empty string to accumulate the response
    response_text = ""

    try:
        # Make a streaming call to the language model's chat completions endpoint.
        # The `model` parameter specifies which model to use at the endpoint.
        stream = client.chat_completion(
            messages=messages,              # The conversation history and current message
            max_tokens=FIXED_MAX_TOKENS,    # Maximum tokens for the response
            stream=True,                    # Enable streaming for token-by-token output
        )

        for chunk in stream:
            # Check if the chunk contains content and the content is not None
            # The exact structure of the chunk can vary based on the model/endpoint
            if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content is not None:
                token = chunk.choices[0].delta.content # Extract the token from the chunk
                response_text += token                 # Append the token to the response string
                yield response_text                  # Yield the accumulated response so far (for streaming UI update)

    except Exception as e:
        # If any error occurs during the API call, yield an error message
        error_message = f"An error occurred during model inference: {e}"
        print(error_message) # Also print to console for debugging
        yield error_message

# --- Gradio Interface Definition ---

# URL for the header image
header_image_path = "https://cdn-uploads.huggingface.co/production/uploads/6540a02d1389943fef4d2640/j61iZTDaK9g0UW3aWGwWi.gif"

# Ko-fi widget script
kofi_script = """
<script src='https://storage.ko-fi.com/cdn/scripts/overlay-widget.js'></script>
<script>
  kofiWidgetOverlay.draw('sonnydesorbo', {
    'type': 'floating-chat',
    'floating-chat.donateButton.text': 'Support me',
    'floating-chat.donateButton.background-color': '#00b9fe',
    'floating-chat.donateButton.text-color': '#fff'
  });
</script>
"""

# Ko-fi button HTML
kofi_button_html = """
<div style="text-align: center; padding: 20px;">
    <a href='https://ko-fi.com/Z8Z51E5TIG' target='_blank'>
        <img height='36' style='border:0px;height:36px;' src='https://storage.ko-fi.com/cdn/kofi5.png?v=6' border='0' alt='Buy Me a Coffee at ko-fi.com' />
    </a>
</div>
"""

# Create a Gradio Blocks layout for more control over the interface
# theme=gr.themes.Soft() applies a soft visual theme
# Add the kofi_script to the head of the HTML page
with gr.Blocks(theme=gr.themes.Soft(), head=kofi_script) as demo:
    # Display an image at the top of the chatbot interface
    gr.Image(
        value=header_image_path, # Source of the image
        label="Chatbot Header",   # Alt text or label (not shown due to show_label=False)
        show_label=False,         # Hide the label text
        interactive=False,        # Make the image non-interactive
        height=150,               # Set the height of the image
        elem_id="chatbot-logo"    # Assign an HTML ID for potential CSS styling
    )

    # Create the chat interface component
    gr.ChatInterface(
        fn=respond,               # The function to call when a message is sent
        chatbot=gr.Chatbot(       # Configure the chatbot display area
            height=650            # Set the height of the chat history display
        ),
        # Additional parameters for ChatInterface can be added here, e.g.:
        # title="Xortron7 Chat",
        # description="Chat with Xortron7, your AI assistant.",
        # examples=[["Hello!", None], ["What is Gradio?", None]],
        # retry_btn=None, # Removes the retry button
        # undo_btn="Delete Previous", # Customizes the undo button
        # clear_btn="Clear Chat", # Customizes the clear button
    )

    # Add the Ko-fi button at the bottom
    gr.HTML(kofi_button_html) #

# --- Application Entry Point ---
if __name__ == "__main__":
    # Launch the Gradio web server
    # show_api=False disables the API documentation page
    # share=False prevents creating a public Gradio link (for local development)
    try:
        demo.launch(show_api=False, share=False)
    except NameError as ne:
        # This might happen if 'client' was not defined due to an error during initialization
        print(f"Gradio demo could not be launched. 'client' might not have been initialized: {ne}")
    except RuntimeError as re:
        # This catches the RuntimeError raised if client initialization failed explicitly
        print(f"Gradio demo could not be launched due to an error during client initialization: {re}")
    except Exception as e:
        print(f"An unexpected error occurred when trying to launch Gradio demo: {e}")