import gradio as gr from transformers import GPT2LMHeadModel, GPT2Tokenizer import torch import time # Load model and tokenizer from Hugging Face Hub model_name = "Electricarchmage/cookbookgpt" model = GPT2LMHeadModel.from_pretrained(model_name) tokenizer = GPT2Tokenizer.from_pretrained(model_name) # Set the pad_token to eos_token and padding_side to 'left' tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = 'left' # Define the respond function with logging for debugging def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): # Preparing the messages for context (the history and the new message) messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) # Tokenize the input inputs = tokenizer([msg["content"] for msg in messages], return_tensors="pt", padding=True, truncation=True) attention_mask = inputs.get('attention_mask', torch.ones_like(inputs['input_ids'])) start_time = time.time() # Start the timer # Generate output tokens try: output = model.generate( inputs["input_ids"], attention_mask=attention_mask, max_length=max_tokens + len(inputs["input_ids"][0]), temperature=temperature, top_p=top_p, num_return_sequences=1, do_sample=True, no_repeat_ngram_size=2, ) except Exception as e: return f"Error during generation: {str(e)}" generation_time = time.time() - start_time # Time taken for generation # Decode the output tokens into text response = tokenizer.decode(output[0], skip_special_tokens=True) # Extract only the assistant's reply assistant_reply = response.split("Assistant:")[-1].strip() # Add generation time in the response for debugging return f"Response: {assistant_reply}\nGeneration time: {generation_time:.2f} seconds" # Define the Gradio interface demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a friendly Chatbot.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], ) # Launch the app if __name__ == "__main__": demo.launch()