import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # Load the model and tokenizer from Hugging Face model_name = "google/gemma-3-1b-it" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) def respond(user_input, chat_history): """ Generates the chatbot's response. It builds a conversation string from the chat history, appends the latest user input, and generates text using the model. """ conversation = "" # Build a conversational prompt from past messages for user_msg, bot_msg in chat_history: conversation += f"User: {user_msg}\nBot: {bot_msg}\n" conversation += f"User: {user_input}\nBot: " # Tokenize the conversation prompt and generate a response inputs = tokenizer.encode(conversation, return_tensors="pt") outputs = model.generate( inputs, max_length=inputs.shape[1] + 100, # adjust max_length as needed do_sample=True, temperature=0.7, pad_token_id=tokenizer.eos_token_id ) generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # The generated text includes the prompt text, so extract only the new reply. bot_reply = generated_text[len(conversation):].split("User:", 1)[0].strip() chat_history.append((user_input, bot_reply)) return "", chat_history # Clear input and return updated chat history # Build the Gradio interface using Blocks for a conversational layout. with gr.Blocks() as demo: gr.Markdown("# Chatbot powered by google/gemma-3-1b-it") chatbot = gr.Chatbot() state = gr.State([]) # To keep track of the conversation history. txt = gr.Textbox(show_label=True, placeholder="Type a message here...", label="Your Message") # When the user submits a message, call the respond() function. txt.submit(respond, inputs=[txt, state], outputs=[txt, chatbot]) if __name__ == "__main__": demo.launch()