Spaces:
Running
Running
import gradio as gr | |
from huggingface_hub import InferenceClient | |
# Hugging Face client initialization | |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
# Function to handle NLP responses and interaction with the model | |
def respond(message, history, system_message, max_tokens, temperature, top_p): | |
""" | |
Function to handle user message and generate a response using the NLP model. | |
Parameters: | |
message (str): User's current message/input. | |
history (list): List of tuples representing conversation history (user's and assistant's messages). | |
system_message (str): System-level instructions to the assistant to guide its responses. | |
max_tokens (int): Maximum number of tokens to generate in the response. | |
temperature (float): Degree of randomness in the response generation. | |
top_p (float): Controls the diversity of the response using nucleus sampling. | |
Yields: | |
str: Streamed response as tokens are generated. | |
""" | |
# Prepare the message for the assistant, including system-level instructions and history. | |
messages = [{"role": "system", "content": system_message}] | |
# Loop through the history and add past conversation to the messages | |
for user_message, assistant_message in history: | |
if user_message: | |
messages.append({"role": "user", "content": user_message}) | |
if assistant_message: | |
messages.append({"role": "assistant", "content": assistant_message}) | |
# Append the current user message to the conversation | |
messages.append({"role": "user", "content": message}) | |
# Initialize the response variable | |
response = "" | |
# Get the response stream from the Hugging Face model | |
for message in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
# Extract the token content and append it to the response | |
token = message.choices[0].delta.content | |
response += token | |
yield response | |
# System prompt to guide the assistant's behavior | |
default_system_message = ( | |
"You are NLPToolkit Agent, an advanced natural language processing assistant. " | |
"You specialize in tasks such as text summarization, sentiment analysis, text classification, " | |
"entity recognition, and answering technical questions about NLP models and datasets. " | |
"Assist users with clear, concise, and actionable outputs." | |
) | |
# Create the Gradio interface for user interaction | |
def create_interface(): | |
""" | |
Create and return a Gradio interface for the NLPToolkit Agent with customizable parameters. | |
Parameters: | |
None | |
Returns: | |
gr.Interface: The Gradio interface object. | |
""" | |
return gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox( | |
value=default_system_message, | |
label="System Message" | |
), | |
gr.Slider( | |
minimum=1, | |
maximum=2048, | |
value=512, | |
step=1, | |
label="Max New Tokens" | |
), | |
gr.Slider( | |
minimum=0.1, | |
maximum=4.0, | |
value=0.7, | |
step=0.1, | |
label="Temperature" | |
), | |
gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="Top-p (Nucleus Sampling)" | |
), | |
], | |
) | |
# Run the Gradio interface | |
if __name__ == "__main__": | |
demo = create_interface() | |
demo.launch() |