import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # Load the Vicuna 7B v1.3 LMSys model and tokenizer model_name = "lmsys/vicuna-7b-v1.3" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.ClearButton([msg, chatbot]) def respond(message, chat_history): input_ids = tokenizer.encode(message, return_tensors="pt") output_ids = model.generate(input_ids, max_length=50, num_beams=5, no_repeat_ngram_size=2) bot_message = tokenizer.decode(output_ids[0], skip_special_tokens=True) chat_history.append((message, bot_message)) time.sleep(2) return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) demo.launch()