File size: 1,769 Bytes
192ca7c
 
5dcab9b
 
192ca7c
5dcab9b
 
 
 
 
192ca7c
5dcab9b
df8b191
8f370b1
5dcab9b
192ca7c
 
5dcab9b
df8b191
 
 
 
 
192ca7c
5dcab9b
192ca7c
 
5dcab9b
192ca7c
 
5dcab9b
192ca7c
 
 
 
 
 
 
 
 
 
 
5dcab9b
192ca7c
 
 
940d269
192ca7c
df8b191
8f370b1
192ca7c
 
5dcab9b
192ca7c
5dcab9b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import gradio as gr
from huggingface_hub import InferenceClient
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get the system message from environment variables
system_message = os.getenv("SYSTEM_MESSAGE")

client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta")

def respond(message, history, max_tokens, temperature, top_p):
    # Prepare the initial message list with the system message
    messages = [{"role": "system", "content": system_message}]

    # Add the conversation history to the messages list
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})

    # Add the latest user message to the messages list
    messages.append({"role": "user", "content": message})

    # Initialize an empty response string
    response = ""

    # Generate the response using the Hugging Face InferenceClient
    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content
        response += token
        yield response

# Define the Gradio interface
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ]
)

# Launch the Gradio app
if __name__ == "__main__":
    demo.launch()