Spaces:

syedmudassir16
/

Mood_Based_Music_Recommender

Running

App Files Files Community

syedmudassir16 commited on Jul 23, 2024

Commit

905a08e

verified ·

1 Parent(s): fb18002

dr_infer_voice_to_text

Browse files

Files changed (1) hide show

app.py +102 -57

app.py CHANGED Viewed

@@ -1,10 +1,31 @@
-from huggingface_hub import InferenceClient
 import gradio as gr
-client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
-def format_prompt(message, history):
-    fixed_prompt= """
             You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
             Note: Do not write anything else other than the classified mood if classified.
@@ -174,14 +195,43 @@ def format_prompt(message, history):
             User: Lets turn up the music and have some fun!
             LLM Response: Party
             """
-    prompt = f"<s>{fixed_prompt}"
-    for user_prompt, bot_response in history:
-        prompt += f"\n User:{user_prompt}\n LLM Response:{bot_response}"
-    # Add the current message
-    prompt += f"\nUser: {message}\nLLM Response:"
-#   breakpoint()
-    return prompt
 def classify_mood(input_string):
     input_string = input_string.lower()
     mood_words = {"happy", "sad", "instrumental", "party"}
@@ -190,48 +240,43 @@ def classify_mood(input_string):
             return word, True
     return None, False
-def generate(
-    prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0,
-):
-    temperature = float(temperature)
-    if temperature < 1e-2:
-        temperature = 1e-2
-    top_p = float(top_p)
-    generate_kwargs = dict(
-        temperature=temperature,
-        max_new_tokens=max_new_tokens,
-        top_p=top_p,
-        repetition_penalty=repetition_penalty,
-        do_sample=True,
-        seed=42,
-    )
-    formatted_prompt = format_prompt(prompt, history)
-    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-    output = ""
-    for response in stream:
-        output += response.token.text
-        mood, is_classified = classify_mood(output)
-        # Print the chatbot's response
         if is_classified:
-            print("Chatbot:", mood.capitalize())
-            playlist_message = f"Playing {mood.capitalize()} playlist for you!"
-            output=playlist_message
-            return output
-        # yield output
-    return output
-demo = gr.ChatInterface(fn=generate,
-                        title="Mood-Based Music Recommender",
-                        retry_btn=None,
-                        undo_btn=None,
-                        clear_btn=None,
-                        description="<span style='font-size: larger; font-weight: bold;'>Hi! I'm a music recommender app. What kind of music do you want to listen to, or how are you feeling today?</span>",
-                       )
-demo.queue().launch()

+import argparse
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
 import gradio as gr
+class llmChatbot:
+    def __init__(self, model_name, temperature=0.3, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
+        # Specify how to quantize the model
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype="float16",  # Use the string "float16" instead of torch.float16
+        )
+        self.model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config, device_map="auto")
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # Set pad_token to eos_token if not already set
+        if (self.tokenizer.pad_token is None):
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.temperature = temperature
+        self.max_new_tokens = max_new_tokens
+        self.top_p = top_p
+        self.repetition_penalty = repetition_penalty
+    def format_prompt(self, message, history):
+        fixed_prompt = """
             You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
             Note: Do not write anything else other than the classified mood if classified.
             User: Lets turn up the music and have some fun!
             LLM Response: Party
             """
+        # Start with the fixed prompt
+        prompt = f"<s>{fixed_prompt}"
+        # Append the conversation history
+        for user_prompt, bot_response in history:
+            prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}"
+        # Add the current message
+        prompt += f"\nUser: {message}\nLLM Response:"
+        return prompt
+    def generate(self, message, history, temperature=None, max_new_tokens=None, top_p=None, repetition_penalty=None):
+        if temperature is None:
+            temperature = self.temperature
+        if max_new_tokens is None:
+            max_new_tokens = self.max_new_tokens
+        if top_p is None:
+            top_p = self.top_p
+        if repetition_penalty is None:
+            repetition_penalty = self.repetition_penalty
+        prompt = self.format_prompt(message, history)
+        inputs = self.tokenizer(prompt, return_tensors="pt", padding=True).to("cuda")
+        generate_kwargs = dict(
+            temperature=temperature,
+            max_new_tokens=max_new_tokens,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+            do_sample=True,
+            pad_token_id=self.tokenizer.pad_token_id,  # Explicitly set the pad_token_id
+        )
+        output_ids = self.model.generate(**inputs, **generate_kwargs)
+        output = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
+        return output[len(prompt):].strip()
 def classify_mood(input_string):
     input_string = input_string.lower()
     mood_words = {"happy", "sad", "instrumental", "party"}
             return word, True
     return None, False
+def speech_to_text(speech):
+    asr = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
+    text = asr(speech)["text"]
+    return text
+def text_to_speech(text):
+    tts = pipeline("text-to-speech", model="facebook/fastspeech2-en-ljspeech")
+    speech = tts(text)["audio"]
+    return speech
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Start the Mistral chatbot application.")
+    parser.add_argument("--model_name", type=str, default="mistralai/Mistral-7B-Instruct-v0.2", help="The name of the model to use.")
+    args = parser.parse_args()
+    model_name = args.model_name
+    # Instantiate the chatbot with necessary parameters
+    mistral_chatbot = llmChatbot(model_name=model_name)
+    history = []
+    print("How are you doing today?")
+    def chatbot_response(audio_input):
+        text_input = speech_to_text(audio_input)
+        result = mistral_chatbot.generate(text_input, history)
+        mood, is_classified = classify_mood(result)
         if is_classified:
+            response_text = mood.capitalize()
+        else:
+            response_text = result
+        audio_output = text_to_speech(response_text)
+        history.append((text_input, response_text))
+        return audio_output, response_text
+    gr.Interface(
+        fn=chatbot_response,
+        inputs=gr.Audio(source="microphone", type="filepath"),
+        outputs=[gr.Audio(type="numpy"), "text"],
+        live=True
+    ).launch()