Gemma / app.py
AFG473319's picture
Update app.py
11655cf verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
# Load the model and tokenizer from Hugging Face
model_name = "google/gemma-3-1b-it"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
def respond(user_input, chat_history):
"""
Generates the chatbot's response.
It builds a conversation string from the chat history,
appends the latest user input, and generates text using the model.
"""
conversation = ""
# Build a conversational prompt from past messages
for user_msg, bot_msg in chat_history:
conversation += f"User: {user_msg}\nBot: {bot_msg}\n"
conversation += f"User: {user_input}\nBot: "
# Tokenize the conversation prompt and generate a response
inputs = tokenizer.encode(conversation, return_tensors="pt")
outputs = model.generate(
inputs,
max_length=inputs.shape[1] + 100, # adjust max_length as needed
do_sample=True,
temperature=0.7,
pad_token_id=tokenizer.eos_token_id
)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# The generated text includes the prompt text, so extract only the new reply.
bot_reply = generated_text[len(conversation):].split("User:", 1)[0].strip()
chat_history.append((user_input, bot_reply))
return "", chat_history # Clear input and return updated chat history
# Build the Gradio interface using Blocks for a conversational layout.
with gr.Blocks() as demo:
gr.Markdown("# Chatbot powered by google/gemma-3-1b-it")
chatbot = gr.Chatbot()
state = gr.State([]) # To keep track of the conversation history.
txt = gr.Textbox(show_label=True, placeholder="Type a message here...", label="Your Message")
# When the user submits a message, call the respond() function.
txt.submit(respond, inputs=[txt, state], outputs=[txt, chatbot])
if __name__ == "__main__":
demo.launch()