import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the Vicuna 7B v1.3 LMSys model and tokenizer
model_name = "lmsys/vicuna-7b-v1.3"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])

    def respond(message, chat_history):
        input_ids = tokenizer.encode(message, return_tensors="pt")
        output_ids = model.generate(input_ids, max_length=50, num_beams=5, no_repeat_ngram_size=2)
        bot_message = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        
        chat_history.append((message, bot_message))
        time.sleep(2)
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])

demo.launch()