from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr # Load pre-trained model and tokenizer model_name = "mistralai/Mistral-7B-Instruct-v0.2" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) def generate_response(messages): """ Generate response based on the given user messages. Parameters: - messages (list): A list of dictionaries containing user messages with roles. Returns: - response (str): The generated response. """ # Apply chat template and encode messages encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt") # Move inputs to device model_inputs = encodeds.to("cuda") # Assuming CUDA device is available # Generate response generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True) # Decode the generated response response = tokenizer.batch_decode(generated_ids)[0] return response # Define Gradio interface components input_chat = gr.Textbox(lines=5, label="Input Chat", placeholder="Enter chat messages...") output_response = gr.Textbox(label="Generated Response", placeholder="Generated response will appear here...") # Create Gradio interface gr.Interface(generate_response, input_chat, output_response, title="Chat Response Generation", description="Generate responses based on user messages using Mistral AI model.", theme="default", allow_flagging="never").launch()