Spaces:

Canstralian
/

TeamRed

Running

File size: 2,145 Bytes

b11000a
 
b1cdeed
b11000a
b1cdeed
 
b11000a
 
 
b1cdeed
 
 
 
 
 
b11000a
b1cdeed
b11000a
b1cdeed
 
 
 
 
 
 
 
 
b11000a
 
b1cdeed
b11000a
b1cdeed
 
b11000a
 
 
b1cdeed
b11000a
b1cdeed
 
b11000a
b1cdeed
 
 
 
 
 
 
b235487
b1cdeed
 
b11000a
b1cdeed
b11000a
b1cdeed
 
b11000a
 
 
b1cdeed

import gradio as gr
from huggingface_hub import InferenceClient
from typing import List, Tuple

# Initialize the Inference Client with the Canstralian/redteamai model
client = InferenceClient("Canstralian/redteamai")


def respond(
    message: str,
    history: List[Tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
):
    # Start with the system message in the conversation history
    messages = [{"role": "system", "content": system_message}]
    
    # Add the conversation history to the message
    for user_message, assistant_reply in history:
        if user_message:
            messages.append({"role": "user", "content": user_message})
        if assistant_reply:
            messages.append({"role": "assistant", "content": assistant_reply})
    
    # Add the current user message
    messages.append({"role": "user", "content": message})

    # Create the API request
    response = ""
    for result in client.chat_completion(
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        stream=True  # Enable streaming for real-time responses
    ):
        # Extract and accumulate the response as it streams
        token = result['choices'][0]['delta']['content']
        response += token
        yield response  # Yield response as it's generated

# Create the Gradio interface
demo = gr.Interface(
    fn=respond,
    inputs=[
        gr.Textbox(label="User Message", placeholder="Enter your message here..."),
        gr.State(value=[], label="Chat History"),  # Correct usage of State
        gr.Textbox(value="You are a friendly chatbot.", label="System Message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
    ],
    outputs=gr.Textbox(label="Assistant Response"),
    live=True,  # Enable real-time updating of the response
)

if __name__ == "__main__":
    demo.launch()