import gradio as gr
import random
import time
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load models and tokenizers
model_names = ["lmsys/vicuna-7b-v1.3", "gpt2"]
models = [AutoModelForCausalLM.from_pretrained(name) for name in model_names]
tokenizers = [AutoTokenizer.from_pretrained(name) for name in model_names]

with gr.Blocks() as demo:
    with gr.Row():
        vicuna_chatbot = gr.Chatbot(label="Vicuna", live=True)
        gpt2_chatbot = gr.Chatbot(label="GPT-2", live=True)
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, vicuna_chatbot, gpt2_chatbot])

    def respond(message, chat_history, chatbot_idx):
        input_ids = tokenizers[chatbot_idx].encode(message, return_tensors="pt")
        output = models[chatbot_idx].generate(input_ids, max_length=50, num_beams=5, no_repeat_ngram_size=2)
        bot_message = tokenizers[chatbot_idx].decode(output[0], skip_special_tokens=True)
        chat_history.append((message, bot_message))
        time.sleep(2)
        return "", chat_history

    msg.submit(respond, [msg, vicuna_chatbot, 0], [msg, gpt2_chatbot, 1])

demo.launch()