import gradio as gr import random import time from transformers import AutoModelForCausalLM, AutoTokenizer # Load models and tokenizers model_names = ["lmsys/vicuna-7b-v1.3", "gpt2"] models = [AutoModelForCausalLM.from_pretrained(name) for name in model_names] tokenizers = [AutoTokenizer.from_pretrained(name) for name in model_names] with gr.Blocks() as demo: with gr.Row(): vicuna_chatbot = gr.Chatbot(label="Vicuna", live=True) gpt2_chatbot = gr.Chatbot(label="GPT-2", live=True) msg = gr.Textbox() clear = gr.ClearButton([msg, vicuna_chatbot, gpt2_chatbot]) def respond(message, chat_history, chatbot_idx): input_ids = tokenizers[chatbot_idx].encode(message, return_tensors="pt") output = models[chatbot_idx].generate(input_ids, max_length=50, num_beams=5, no_repeat_ngram_size=2) bot_message = tokenizers[chatbot_idx].decode(output[0], skip_special_tokens=True) chat_history.append((message, bot_message)) time.sleep(2) return "", chat_history msg.submit(respond, [msg, vicuna_chatbot, 0], [msg, gpt2_chatbot, 1]) demo.launch()