File size: 2,563 Bytes
d88613b 492ffb8 651361d d88613b 492ffb8 d88613b b698bb4 8a33d58 b698bb4 8a33d58 492ffb8 d88613b 492ffb8 651361d 492ffb8 651361d d88613b 651361d d88613b 651361d d88613b 492ffb8 651361d d88613b 492ffb8 651361d 492ffb8 d88613b 492ffb8 651361d 492ffb8 d88613b 492ffb8 d88613b 492ffb8 d88613b 492ffb8 d88613b 492ffb8 d88613b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import gradio as gr
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
# Load model and tokenizer from Hugging Face Hub
model_name = "Electricarchmage/cookbookgpt"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
# Set the pad_token to eos_token and padding_side to 'left'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'
# Define the respond function
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
# Preparing the messages for context (the history and the new message)
messages = [{"role": "system", "content": system_message}]
# Convert history to the required format with 'role' and 'content'
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
# Tokenize the input and generate a response
inputs = tokenizer([msg["content"] for msg in messages], return_tensors="pt", padding=True, truncation=True)
attention_mask = inputs.get('attention_mask', torch.ones_like(inputs['input_ids'])) # Default to ones if not provided
# Generate output tokens
output = model.generate(
inputs["input_ids"],
attention_mask=attention_mask,
max_length=max_tokens + len(inputs["input_ids"][0]),
temperature=temperature,
top_p=top_p,
num_return_sequences=1,
do_sample=True, # Enable sampling for more dynamic responses
no_repeat_ngram_size=2,
)
# Decode the output tokens into text
response = tokenizer.decode(output[0], skip_special_tokens=True)
# Extract only the assistant's reply
assistant_reply = response.split("Assistant:")[-1].strip()
return assistant_reply
# Define the Gradio interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
# Launch the app
if __name__ == "__main__":
demo.launch()
|