|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import gradio as gr |
|
|
|
|
|
model_name = "mistralai/Mistral-7B-Instruct-v0.2" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
|
|
def generate_response(messages): |
|
""" |
|
Generate response based on the given user messages. |
|
Parameters: |
|
- messages (list): A list of dictionaries containing user messages with roles. |
|
Returns: |
|
- response (str): The generated response. |
|
""" |
|
|
|
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt") |
|
|
|
model_inputs = encodeds.to("cuda") |
|
|
|
generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True) |
|
|
|
response = tokenizer.batch_decode(generated_ids)[0] |
|
return response |
|
|
|
|
|
input_chat = gr.Textbox(lines=5, label="Input Chat", placeholder="Enter chat messages...") |
|
output_response = gr.Textbox(label="Generated Response", placeholder="Generated response will appear here...") |
|
|
|
|
|
gr.Interface(generate_response, input_chat, output_response, |
|
title="Chat Response Generation", |
|
description="Generate responses based on user messages using Mistral AI model.", |
|
theme="default", |
|
allow_flagging="never").launch() |
|
|