|
import argparse |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline |
|
import gradio as gr |
|
|
|
class llmChatbot: |
|
def __init__(self, model_name, temperature=0.3, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0): |
|
|
|
quantization_config = BitsAndBytesConfig( |
|
load_in_4bit=True, |
|
bnb_4bit_quant_type="nf4", |
|
bnb_4bit_compute_dtype="float16", |
|
) |
|
|
|
self.model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config, device_map="auto") |
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
if (self.tokenizer.pad_token is None): |
|
self.tokenizer.pad_token = self.tokenizer.eos_token |
|
|
|
self.temperature = temperature |
|
self.max_new_tokens = max_new_tokens |
|
self.top_p = top_p |
|
self.repetition_penalty = repetition_penalty |
|
|
|
def format_prompt(self, message, history): |
|
fixed_prompt = """ |
|
You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy". |
|
|
|
Note: Do not write anything else other than the classified mood if classified. |
|
|
|
Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood. |
|
|
|
Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood. |
|
|
|
Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples. |
|
|
|
Examples |
|
User: What is C programming? |
|
LLM Response: C programming is a programming language. How are you feeling now after knowing the answer? |
|
|
|
User: Can I get a coffee? |
|
LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee? |
|
User: I feel like rocking |
|
LLM Response: Party |
|
|
|
User: I'm feeling so energetic today! |
|
LLM Response: Happy |
|
|
|
User: I'm feeling down today. |
|
LLM Response: Sad |
|
|
|
User: I'm ready to have some fun tonight! |
|
LLM Response: Party |
|
|
|
User: I need some background music while I am stuck in traffic. |
|
LLM Response: Instrumental |
|
|
|
User: Hi |
|
LLM Response: Hi, how are you doing? |
|
|
|
User: Feeling okay only. |
|
LLM Response: Are you having a good day? |
|
User: I don't know |
|
LLM Response: Do you want to listen to some relaxing music? |
|
User: No |
|
LLM Response: How about listening to some rock and roll music? |
|
User: Yes |
|
LLM Response: Party |
|
|
|
User: Where do I find an encyclopedia? |
|
LLM Response: You can find it in any of the libraries or on the Internet. Does this answer make you happy? |
|
|
|
User: I need a coffee |
|
LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee? |
|
|
|
User: I just got promoted at work! |
|
LLM Response: Happy |
|
|
|
User: Today is my birthday! |
|
LLM Response: Happy |
|
|
|
User: I won a prize in the lottery. |
|
LLM Response: Happy |
|
|
|
User: I am so excited about my vacation next week! |
|
LLM Response: Happy |
|
|
|
User: I aced my exams! |
|
LLM Response: Happy |
|
|
|
User: I had a wonderful time with my family today. |
|
LLM Response: Happy |
|
|
|
User: I just finished a great workout! |
|
LLM Response: Happy |
|
|
|
User: I am feeling really good about myself today. |
|
LLM Response: Happy |
|
|
|
User: I finally finished my project and it was a success! |
|
LLM Response: Happy |
|
|
|
User: I just heard my favorite song on the radio. |
|
LLM Response: Happy |
|
|
|
User: My pet passed away yesterday. |
|
LLM Response: Sad |
|
|
|
User: I lost my job today. |
|
LLM Response: Sad |
|
|
|
User: I'm feeling really lonely. |
|
LLM Response: Sad |
|
|
|
User: I didn't get the results I wanted. |
|
LLM Response: Sad |
|
|
|
User: I had a fight with my best friend. |
|
LLM Response: Sad |
|
|
|
User: I'm feeling really overwhelmed with everything. |
|
LLM Response: Sad |
|
|
|
User: I just got some bad news. |
|
LLM Response: Sad |
|
|
|
User: I'm missing my family. |
|
LLM Response: Sad |
|
|
|
User: I am feeling really down today. |
|
LLM Response: Sad |
|
|
|
User: Nothing seems to be going right. |
|
LLM Response: Sad |
|
|
|
User: I need some music while I study. |
|
LLM Response: Instrumental |
|
|
|
User: I want to listen to something soothing while I work. |
|
LLM Response: Instrumental |
|
|
|
User: Do you have any recommendations for background music? |
|
LLM Response: Instrumental |
|
|
|
User: I'm looking for some relaxing tunes. |
|
LLM Response: Instrumental |
|
|
|
User: I need some music to focus on my tasks. |
|
LLM Response: Instrumental |
|
|
|
User: Can you suggest some ambient music for meditation? |
|
LLM Response: Instrumental |
|
|
|
User: What's good for background music during reading? |
|
LLM Response: Instrumental |
|
|
|
User: I need some calm music to help me sleep. |
|
LLM Response: Instrumental |
|
|
|
User: I prefer instrumental music while cooking. |
|
LLM Response: Instrumental |
|
|
|
User: What's the best music to play while doing yoga? |
|
LLM Response: Instrumental |
|
|
|
User: Let's have a blast tonight! |
|
LLM Response: Party |
|
|
|
User: I'm in the mood to dance! |
|
LLM Response: Party |
|
|
|
User: I want to celebrate all night long! |
|
LLM Response: Party |
|
|
|
User: Time to hit the club! |
|
LLM Response: Party |
|
|
|
User: I feel like partying till dawn. |
|
LLM Response: Party |
|
|
|
User: Let's get this party started! |
|
LLM Response: Party |
|
|
|
User: I'm ready to party hard tonight. |
|
LLM Response: Party |
|
|
|
User: I'm in the mood for some loud music and dancing! |
|
LLM Response: Party |
|
|
|
User: Tonight's going to be epic! |
|
LLM Response: Party |
|
|
|
User: Lets turn up the music and have some fun! |
|
LLM Response: Party |
|
""" |
|
|
|
|
|
prompt = f"<s>{fixed_prompt}" |
|
|
|
|
|
for user_prompt, bot_response in history: |
|
prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}" |
|
|
|
|
|
prompt += f"\nUser: {message}\nLLM Response:" |
|
|
|
return prompt |
|
|
|
def generate(self, message, history, temperature=None, max_new_tokens=None, top_p=None, repetition_penalty=None): |
|
if temperature is None: |
|
temperature = self.temperature |
|
if max_new_tokens is None: |
|
max_new_tokens = self.max_new_tokens |
|
if top_p is None: |
|
top_p = self.top_p |
|
if repetition_penalty is None: |
|
repetition_penalty = self.repetition_penalty |
|
|
|
prompt = self.format_prompt(message, history) |
|
inputs = self.tokenizer(prompt, return_tensors="pt", padding=True).to("cuda") |
|
generate_kwargs = dict( |
|
temperature=temperature, |
|
max_new_tokens=max_new_tokens, |
|
top_p=top_p, |
|
repetition_penalty=repetition_penalty, |
|
do_sample=True, |
|
pad_token_id=self.tokenizer.pad_token_id, |
|
) |
|
output_ids = self.model.generate(**inputs, **generate_kwargs) |
|
output = self.tokenizer.decode(output_ids[0], skip_special_tokens=True) |
|
return output[len(prompt):].strip() |
|
|
|
def classify_mood(input_string): |
|
input_string = input_string.lower() |
|
mood_words = {"happy", "sad", "instrumental", "party"} |
|
for word in mood_words: |
|
if word in input_string: |
|
return word, True |
|
return None, False |
|
|
|
def speech_to_text(speech): |
|
asr = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h") |
|
text = asr(speech)["text"] |
|
return text |
|
|
|
def text_to_speech(text): |
|
tts = pipeline("text-to-speech", model="facebook/fastspeech2-en-ljspeech") |
|
speech = tts(text)["audio"] |
|
return speech |
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser(description="Start the Mistral chatbot application.") |
|
parser.add_argument("--model_name", type=str, default="mistralai/Mistral-7B-Instruct-v0.2", help="The name of the model to use.") |
|
|
|
args = parser.parse_args() |
|
model_name = args.model_name |
|
|
|
|
|
mistral_chatbot = llmChatbot(model_name=model_name) |
|
history = [] |
|
print("How are you doing today?") |
|
|
|
def chatbot_response(audio_input): |
|
text_input = speech_to_text(audio_input) |
|
result = mistral_chatbot.generate(text_input, history) |
|
mood, is_classified = classify_mood(result) |
|
if is_classified: |
|
response_text = mood.capitalize() |
|
else: |
|
response_text = result |
|
audio_output = text_to_speech(response_text) |
|
history.append((text_input, response_text)) |
|
return audio_output, response_text |
|
|
|
gr.Interface( |
|
fn=chatbot_response, |
|
inputs=gr.Audio(source="microphone", type="filepath"), |
|
outputs=[gr.Audio(type="numpy"), "text"], |
|
live=True |
|
).launch() |
|
|