ajeetkumar01's picture
Create app.py
57f5d5d verified
raw
history blame
1.54 kB
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
# Load pre-trained model and tokenizer
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
def generate_response(messages):
"""
Generate response based on the given user messages.
Parameters:
- messages (list): A list of dictionaries containing user messages with roles.
Returns:
- response (str): The generated response.
"""
# Apply chat template and encode messages
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
# Move inputs to device
model_inputs = encodeds.to("cuda") # Assuming CUDA device is available
# Generate response
generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
# Decode the generated response
response = tokenizer.batch_decode(generated_ids)[0]
return response
# Define Gradio interface components
input_chat = gr.Textbox(lines=5, label="Input Chat", placeholder="Enter chat messages...")
output_response = gr.Textbox(label="Generated Response", placeholder="Generated response will appear here...")
# Create Gradio interface
gr.Interface(generate_response, input_chat, output_response,
title="Chat Response Generation",
description="Generate responses based on user messages using Mistral AI model.",
theme="default",
allow_flagging="never").launch()