import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the model and tokenizer from Hugging Face
model_name = "google/gemma-3-1b-it"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

def respond(user_input, chat_history):
    """
    Generates the chatbot's response.
    
    It builds a conversation string from the chat history,
    appends the latest user input, and generates text using the model.
    """
    conversation = ""
    # Build a conversational prompt from past messages
    for user_msg, bot_msg in chat_history:
        conversation += f"User: {user_msg}\nBot: {bot_msg}\n"
    conversation += f"User: {user_input}\nBot: "

    # Tokenize the conversation prompt and generate a response
    inputs = tokenizer.encode(conversation, return_tensors="pt")
    outputs = model.generate(
        inputs,
        max_length=inputs.shape[1] + 100,  # adjust max_length as needed
        do_sample=True,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # The generated text includes the prompt text, so extract only the new reply.
    bot_reply = generated_text[len(conversation):].split("User:", 1)[0].strip()
    chat_history.append((user_input, bot_reply))
    return "", chat_history  # Clear input and return updated chat history

# Build the Gradio interface using Blocks for a conversational layout.
with gr.Blocks() as demo:
    gr.Markdown("# Chatbot powered by google/gemma-3-1b-it")
    chatbot = gr.Chatbot()
    state = gr.State([])  # To keep track of the conversation history.
    txt = gr.Textbox(show_label=True, placeholder="Type a message here...", label="Your Message")
    
    # When the user submits a message, call the respond() function.
    txt.submit(respond, inputs=[txt, state], outputs=[txt, chatbot])

if __name__ == "__main__":
    demo.launch()