final-chatbot / app.py
rohitghosh1763's picture
Update app.py
c360f03 verified
raw
history blame
4.58 kB
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
"unsloth/mistral-7b-bnb-4bit",
"unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
"unsloth/llama-2-7b-bnb-4bit",
"unsloth/llama-2-13b-bnb-4bit",
"unsloth/codellama-34b-bnb-4bit",
"unsloth/tinyllama-bnb-4bit",
"unsloth/gemma-7b-bnb-4bit", # New Google 6 trillion tokens model 2.5x faster!
"unsloth/gemma-2b-bnb-4bit",
] # More models at https://huggingface.co/unsloth
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "unsloth/mistral-7b-v0.3", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
# token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)
import gradio as gr
# Function to handle user query and return response
def chatbot_response(user_query):
if True:
from unsloth import FastLanguageModel
from transformers import TextStreamer
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "/content/drive/MyDrive/Colab Notebooks/lora_model", # YOUR MODEL YOU USED FOR TRAINING
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model)
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:
{}"""
inputs = tokenizer(
[
alpaca_prompt.format(
"Category,Instruction General Response,'Answer the user’s query thoroughly and accurately, ensuring no details or points are omitted. Always recognize that 'AEC' refers to 'Assam Engineering College,' and vice versa, and use this understanding to provide clear, context-aware responses.' Formatting,'Structure the output to be attractive, engaging, and professional, using proper formatting. Break the response into multiple paragraphs or sections if necessary to improve readability and organization.' Use of Lists,'For queries that involve enumerations, options, or multiple steps, use bullet points or numbered lists to present the information clearly and concisely. For example: - When listing departments or facilities. - When explaining procedures or step-by-step guides. - When summarizing key features or FAQs.' Tone,'Maintain a friendly, informative tone, and deliver complete, standard answers to meet the user's expectations", # instruction
user_query, # input
"", # output - leave this blank for generation!
)
], return_tensors = "pt").to("cuda")
text_streamer = TextStreamer(tokenizer)
# Generate the response
response = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)
# Decode the response
decoded_output = tokenizer.batch_decode(response, skip_special_tokens=False)[0]
# Extract the desired portion
# Find the start of the Response section
response_start = decoded_output.find("### Response:") + len("### Response:")
# Extract only the response part
final_response = decoded_output[response_start:].strip()
# Input query
print("User Query:", user_query) # Just for debugging, can be removed
# --- Your model inference logic goes here ---
# Example: Replace the following line with your model's response
model_response = f"{final_response}"
# ---------------------------------------------
# Output response
print("Model Response:", final_response) # Just for debugging, can be removed
return model_response
# Gradio Interface
interface = gr.Interface(
fn=chatbot_response, # Function for processing user input
inputs=gr.Textbox(
label="Enter your query:", # Label for the input box
placeholder="Type something...", # Placeholder text
),
outputs=gr.Textbox(label="Response:"), # Output box for model response
title="Simple Chatbot",
description="This is a simple chatbot interface. Type your query and get a response.",
)
# Launch the Gradio app
interface.launch()