import gradio as gr from transformers import GPT2LMHeadModel, GPT2Tokenizer import torch # Load model and tokenizer from Hugging Face Hub model_name = "Electricarchmage/cookbookgpt" model = GPT2LMHeadModel.from_pretrained(model_name) tokenizer = GPT2Tokenizer.from_pretrained(model_name) # Set the pad_token to eos_token tokenizer.pad_token = tokenizer.eos_token # Define the respond function def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): # Preparing the messages for context (the history and the new message) messages = [{"role": "system", "content": system_message}] # Convert history to the required format with 'role' and 'content' for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) # Tokenize the input and generate a response inputs = tokenizer([msg["content"] for msg in messages], return_tensors="pt", padding=True, truncation=True) attention_mask = inputs.get('attention_mask', torch.ones_like(inputs['input_ids'])) # Default to ones if not provided # Generate output tokens output = model.generate( inputs["input_ids"], attention_mask=attention_mask, max_length=max_tokens + len(inputs["input_ids"][0]), temperature=temperature, top_p=top_p, num_return_sequences=1, do_sample=True, # Enable sampling for more dynamic responses no_repeat_ngram_size=2, ) # Decode the output tokens into text response = tokenizer.decode(output[0], skip_special_tokens=True) # Extract only the assistant's reply assistant_reply = response.split("Assistant:")[-1].strip() return assistant_reply # Define the Gradio interface demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a friendly Chatbot.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], ) # Launch the app if __name__ == "__main__": demo.launch()