import gradio as gr
from huggingface_hub import InferenceClient

# Hugging Face client initialization
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

# Function to handle NLP responses and interaction with the model
def respond(message, history, system_message, max_tokens, temperature, top_p):
    """
    Function to handle user message and generate a response using the NLP model.
    
    Parameters:
    message (str): User's current message/input.
    history (list): List of tuples representing conversation history (user's and assistant's messages).
    system_message (str): System-level instructions to the assistant to guide its responses.
    max_tokens (int): Maximum number of tokens to generate in the response.
    temperature (float): Degree of randomness in the response generation.
    top_p (float): Controls the diversity of the response using nucleus sampling.
    
    Yields:
    str: Streamed response as tokens are generated.
    """
    # Prepare the message for the assistant, including system-level instructions and history.
    messages = [{"role": "system", "content": system_message}]
    
    # Loop through the history and add past conversation to the messages
    for user_message, assistant_message in history:
        if user_message:
            messages.append({"role": "user", "content": user_message})
        if assistant_message:
            messages.append({"role": "assistant", "content": assistant_message})
    
    # Append the current user message to the conversation
    messages.append({"role": "user", "content": message})

    # Initialize the response variable
    response = ""

    # Get the response stream from the Hugging Face model
    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        # Extract the token content and append it to the response
        token = message.choices[0].delta.content
        response += token
        yield response

# System prompt to guide the assistant's behavior
default_system_message = (
    "You are NLPToolkit Agent, an advanced natural language processing assistant. "
    "You specialize in tasks such as text summarization, sentiment analysis, text classification, "
    "entity recognition, and answering technical questions about NLP models and datasets. "
    "Assist users with clear, concise, and actionable outputs."
)

# Create the Gradio interface for user interaction
def create_interface():
    """
    Create and return a Gradio interface for the NLPToolkit Agent with customizable parameters.
    
    Parameters:
    None
    
    Returns:
    gr.Interface: The Gradio interface object.
    """
    return gr.ChatInterface(
        respond,
        additional_inputs=[
            gr.Textbox(
                value=default_system_message, 
                label="System Message"
            ),
            gr.Slider(
                minimum=1, 
                maximum=2048, 
                value=512, 
                step=1, 
                label="Max New Tokens"
            ),
            gr.Slider(
                minimum=0.1, 
                maximum=4.0, 
                value=0.7, 
                step=0.1, 
                label="Temperature"
            ),
            gr.Slider(
                minimum=0.1, 
                maximum=1.0, 
                value=0.95, 
                step=0.05, 
                label="Top-p (Nucleus Sampling)"
            ),
        ],
    )

# Run the Gradio interface
if __name__ == "__main__":
    demo = create_interface()
    demo.launch()