Spaces:

syedmudassir16
/

Mood_Based_Music_Recommender

Sleeping

File size: 10,720 Bytes

import argparse
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
import gradio as gr

class llmChatbot:
    def __init__(self, model_name, temperature=0.3, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
        # Specify how to quantize the model
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype="float16",  # Use the string "float16" instead of torch.float16
        )
        
        self.model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config, device_map="auto")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        
        # Set pad_token to eos_token if not already set
        if (self.tokenizer.pad_token is None):
            self.tokenizer.pad_token = self.tokenizer.eos_token
        
        self.temperature = temperature
        self.max_new_tokens = max_new_tokens
        self.top_p = top_p
        self.repetition_penalty = repetition_penalty

    def format_prompt(self, message, history):
        fixed_prompt = """
            You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".

            Note: Do not write anything else other than the classified mood if classified.

            Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.

            Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.

            Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.

            Examples
            User: What is C programming?
            LLM Response: C programming is a programming language. How are you feeling now after knowing the answer?

            User: Can I get a coffee?
            LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
            User: I feel like rocking
            LLM Response: Party

            User: I'm feeling so energetic today!
            LLM Response: Happy

            User: I'm feeling down today.
            LLM Response: Sad

            User: I'm ready to have some fun tonight!
            LLM Response: Party

            User: I need some background music while I am stuck in traffic.
            LLM Response: Instrumental

            User: Hi
            LLM Response: Hi, how are you doing?

            User: Feeling okay only.
            LLM Response: Are you having a good day?
            User: I don't know
            LLM Response: Do you want to listen to some relaxing music?
            User: No
            LLM Response: How about listening to some rock and roll music?
            User: Yes
            LLM Response: Party

            User: Where do I find an encyclopedia?
            LLM Response: You can find it in any of the libraries or on the Internet. Does this answer make you happy?

            User: I need a coffee
            LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?

            User: I just got promoted at work!
            LLM Response: Happy

            User: Today is my birthday!
            LLM Response: Happy

            User: I won a prize in the lottery.
            LLM Response: Happy

            User: I am so excited about my vacation next week!
            LLM Response: Happy

            User: I aced my exams!
            LLM Response: Happy

            User: I had a wonderful time with my family today.
            LLM Response: Happy

            User: I just finished a great workout!
            LLM Response: Happy

            User: I am feeling really good about myself today.
            LLM Response: Happy

            User: I finally finished my project and it was a success!
            LLM Response: Happy

            User: I just heard my favorite song on the radio.
            LLM Response: Happy

            User: My pet passed away yesterday.
            LLM Response: Sad

            User: I lost my job today.
            LLM Response: Sad

            User: I'm feeling really lonely.
            LLM Response: Sad

            User: I didn't get the results I wanted.
            LLM Response: Sad

            User: I had a fight with my best friend.
            LLM Response: Sad

            User: I'm feeling really overwhelmed with everything.
            LLM Response: Sad

            User: I just got some bad news.
            LLM Response: Sad

            User: I'm missing my family.
            LLM Response: Sad

            User: I am feeling really down today.
            LLM Response: Sad

            User: Nothing seems to be going right.
            LLM Response: Sad

            User: I need some music while I study.
            LLM Response: Instrumental

            User: I want to listen to something soothing while I work.
            LLM Response: Instrumental

            User: Do you have any recommendations for background music?
            LLM Response: Instrumental

            User: I'm looking for some relaxing tunes.
            LLM Response: Instrumental

            User: I need some music to focus on my tasks.
            LLM Response: Instrumental

            User: Can you suggest some ambient music for meditation?
            LLM Response: Instrumental

            User: What's good for background music during reading?
            LLM Response: Instrumental

            User: I need some calm music to help me sleep.
            LLM Response: Instrumental

            User: I prefer instrumental music while cooking.
            LLM Response: Instrumental

            User: What's the best music to play while doing yoga?
            LLM Response: Instrumental

            User: Let's have a blast tonight!
            LLM Response: Party

            User: I'm in the mood to dance!
            LLM Response: Party

            User: I want to celebrate all night long!
            LLM Response: Party

            User: Time to hit the club!
            LLM Response: Party

            User: I feel like partying till dawn.
            LLM Response: Party

            User: Let's get this party started!
            LLM Response: Party

            User: I'm ready to party hard tonight.
            LLM Response: Party

            User: I'm in the mood for some loud music and dancing!
            LLM Response: Party

            User: Tonight's going to be epic!
            LLM Response: Party

            User: Lets turn up the music and have some fun!
            LLM Response: Party
            """

        # Start with the fixed prompt
        prompt = f"<s>{fixed_prompt}"

        # Append the conversation history
        for user_prompt, bot_response in history:
            prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}"

        # Add the current message
        prompt += f"\nUser: {message}\nLLM Response:"
        
        return prompt

    def generate(self, message, history, temperature=None, max_new_tokens=None, top_p=None, repetition_penalty=None):
        if temperature is None:
            temperature = self.temperature
        if max_new_tokens is None:
            max_new_tokens = self.max_new_tokens
        if top_p is None:
            top_p = self.top_p
        if repetition_penalty is None:
            repetition_penalty = self.repetition_penalty

        prompt = self.format_prompt(message, history)
        inputs = self.tokenizer(prompt, return_tensors="pt", padding=True).to("cuda")
        generate_kwargs = dict(
            temperature=temperature,
            max_new_tokens=max_new_tokens,
            top_p=top_p,
            repetition_penalty=repetition_penalty,
            do_sample=True,
            pad_token_id=self.tokenizer.pad_token_id,  # Explicitly set the pad_token_id
        )
        output_ids = self.model.generate(**inputs, **generate_kwargs)
        output = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
        return output[len(prompt):].strip()

def classify_mood(input_string):
    input_string = input_string.lower()
    mood_words = {"happy", "sad", "instrumental", "party"}
    for word in mood_words:
        if word in input_string:
            return word, True
    return None, False

def speech_to_text(speech):
    asr = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
    text = asr(speech)["text"]
    return text

def text_to_speech(text):
    tts = pipeline("text-to-speech", model="facebook/fastspeech2-en-ljspeech")
    speech = tts(text)["audio"]
    return speech

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Start the Mistral chatbot application.")
    parser.add_argument("--model_name", type=str, default="mistralai/Mistral-7B-Instruct-v0.2", help="The name of the model to use.")
    
    args = parser.parse_args()
    model_name = args.model_name

    # Instantiate the chatbot with necessary parameters
    mistral_chatbot = llmChatbot(model_name=model_name)
    history = []
    print("How are you doing today?")
    
    def chatbot_response(audio_input):
        text_input = speech_to_text(audio_input)
        result = mistral_chatbot.generate(text_input, history)
        mood, is_classified = classify_mood(result)
        if is_classified:
            response_text = mood.capitalize()
        else:
            response_text = result
        audio_output = text_to_speech(response_text)
        history.append((text_input, response_text))
        return audio_output, response_text

    gr.Interface(
        fn=chatbot_response,
        inputs=gr.Audio(source="microphone", type="filepath"),
        outputs=[gr.Audio(type="numpy"), "text"],
        live=True
    ).launch()