File size: 2,563 Bytes
d88613b
492ffb8
651361d
d88613b
492ffb8
 
 
 
d88613b
b698bb4
8a33d58
b698bb4
8a33d58
492ffb8
d88613b
 
 
 
 
 
 
 
492ffb8
651361d
492ffb8
651361d
d88613b
 
651361d
d88613b
651361d
 
 
d88613b
492ffb8
651361d
 
d88613b
492ffb8
 
 
651361d
492ffb8
d88613b
 
492ffb8
651361d
492ffb8
 
d88613b
492ffb8
 
d88613b
492ffb8
 
d88613b
492ffb8
 
 
d88613b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492ffb8
d88613b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load model and tokenizer from Hugging Face Hub
model_name = "Electricarchmage/cookbookgpt"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Set the pad_token to eos_token and padding_side to 'left'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'

# Define the respond function
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    # Preparing the messages for context (the history and the new message)
    messages = [{"role": "system", "content": system_message}]
    
    # Convert history to the required format with 'role' and 'content'
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    
    messages.append({"role": "user", "content": message})

    # Tokenize the input and generate a response
    inputs = tokenizer([msg["content"] for msg in messages], return_tensors="pt", padding=True, truncation=True)
    attention_mask = inputs.get('attention_mask', torch.ones_like(inputs['input_ids']))  # Default to ones if not provided

    # Generate output tokens
    output = model.generate(
        inputs["input_ids"],
        attention_mask=attention_mask,
        max_length=max_tokens + len(inputs["input_ids"][0]),
        temperature=temperature,
        top_p=top_p,
        num_return_sequences=1,
        do_sample=True,  # Enable sampling for more dynamic responses
        no_repeat_ngram_size=2,
    )

    # Decode the output tokens into text
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    # Extract only the assistant's reply
    assistant_reply = response.split("Assistant:")[-1].strip()

    return assistant_reply

# Define the Gradio interface
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)

# Launch the app
if __name__ == "__main__":
    demo.launch()