|
from huggingface_hub import InferenceClient |
|
from transformers import pipeline |
|
import gradio as gr |
|
import edge_tts |
|
import tempfile |
|
import os |
|
from streaming_stt_nemo import Model |
|
import torch |
|
import random |
|
|
|
|
|
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1") |
|
|
|
asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h") |
|
|
|
def speech_to_text(speech): |
|
"""Converts speech to text using the ASR pipeline.""" |
|
|
|
return asr(speech)["text"] |
|
|
|
|
|
def classify_mood(input_string): |
|
"""Classifies the mood based on keywords in the input string.""" |
|
input_string = input_string.lower() |
|
mood_words = {"happy", "sad", "instrumental", "party"} |
|
for word in mood_words: |
|
if word in input_string: |
|
return word, True |
|
return None, False |
|
|
|
def generate( |
|
prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0, |
|
): |
|
temperature = float(temperature) |
|
if temperature < 1e-2: |
|
temperature = 1e-2 |
|
top_p = float(top_p) |
|
|
|
generate_kwargs = dict( |
|
temperature=temperature, |
|
max_new_tokens=max_new_tokens, |
|
top_p=top_p, |
|
repetition_penalty=repetition_penalty, |
|
do_sample=True, |
|
seed=42, |
|
) |
|
|
|
formatted_prompt = format_prompt(prompt, history) |
|
|
|
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) |
|
output = "" |
|
|
|
for response in stream: |
|
output += response.token.text |
|
mood, is_classified = classify_mood(output) |
|
|
|
if is_classified: |
|
print("Chatbot:", mood.capitalize()) |
|
playlist_message = f"Playing {mood.capitalize()} playlist for you!" |
|
output=playlist_message |
|
return output |
|
|
|
return output |
|
|
|
def format_prompt(message, history): |
|
"""Formats the prompt including fixed instructions and conversation history.""" |
|
fixed_prompt = """ |
|
You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy". |
|
|
|
Note: Do not write anything else other than the classified mood if classified. |
|
|
|
Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood. |
|
|
|
Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood. |
|
|
|
Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples. |
|
|
|
Examples |
|
User: What is C programming? |
|
LLM Response: C programming is a programming language. How are you feeling now after knowing the answer? |
|
|
|
User: Can I get a coffee? |
|
LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee? |
|
User: I feel like rocking |
|
LLM Response: Party |
|
|
|
User: I'm feeling so energetic today! |
|
LLM Response: Happy |
|
|
|
User: I'm feeling down today. |
|
LLM Response: Sad |
|
|
|
User: I'm ready to have some fun tonight! |
|
LLM Response: Party |
|
|
|
User: I need some background music while I am stuck in traffic. |
|
LLM Response: Instrumental |
|
|
|
User: Hi |
|
LLM Response: Hi, how are you doing? |
|
|
|
User: Feeling okay only. |
|
LLM Response: Are you having a good day? |
|
User: I don't know |
|
LLM Response: Do you want to listen to some relaxing music? |
|
User: No |
|
LLM Response: How about listening to some rock and roll music? |
|
User: Yes |
|
LLM Response: Party |
|
|
|
User: Where do I find an encyclopedia? |
|
LLM Response: You can find it in any of the libraries or on the Internet. Does this answer make you happy? |
|
|
|
User: I need a coffee |
|
LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee? |
|
|
|
User: I just got promoted at work! |
|
LLM Response: Happy |
|
|
|
User: Today is my birthday! |
|
LLM Response: Happy |
|
|
|
User: I won a prize in the lottery. |
|
LLM Response: Happy |
|
|
|
User: I am so excited about my vacation next week! |
|
LLM Response: Happy |
|
|
|
User: I aced my exams! |
|
LLM Response: Happy |
|
|
|
User: I had a wonderful time with my family today. |
|
LLM Response: Happy |
|
|
|
User: I just finished a great workout! |
|
LLM Response: Happy |
|
|
|
User: I am feeling really good about myself today. |
|
LLM Response: Happy |
|
|
|
User: I finally finished my project and it was a success! |
|
LLM Response: Happy |
|
|
|
User: I just heard my favorite song on the radio. |
|
LLM Response: Happy |
|
|
|
User: My pet passed away yesterday. |
|
LLM Response: Sad |
|
|
|
User: I lost my job today. |
|
LLM Response: Sad |
|
|
|
User: I'm feeling really lonely. |
|
LLM Response: Sad |
|
|
|
User: I didn't get the results I wanted. |
|
LLM Response: Sad |
|
|
|
User: I had a fight with my best friend. |
|
LLM Response: Sad |
|
|
|
User: I'm feeling really overwhelmed with everything. |
|
LLM Response: Sad |
|
|
|
User: I just got some bad news. |
|
LLM Response: Sad |
|
|
|
User: I'm missing my family. |
|
LLM Response: Sad |
|
|
|
User: I am feeling really down today. |
|
LLM Response: Sad |
|
|
|
User: Nothing seems to be going right. |
|
LLM Response: Sad |
|
|
|
User: I need some music while I study. |
|
LLM Response: Instrumental |
|
|
|
User: I want to listen to something soothing while I work. |
|
LLM Response: Instrumental |
|
|
|
User: Do you have any recommendations for background music? |
|
LLM Response: Instrumental |
|
|
|
User: I'm looking for some relaxing tunes. |
|
LLM Response: Instrumental |
|
|
|
User: I need some music to focus on my tasks. |
|
LLM Response: Instrumental |
|
|
|
User: Can you suggest some ambient music for meditation? |
|
LLM Response: Instrumental |
|
|
|
User: What's good for background music during reading? |
|
LLM Response: Instrumental |
|
|
|
User: I need some calm music to help me sleep. |
|
LLM Response: Instrumental |
|
|
|
User: I prefer instrumental music while cooking. |
|
LLM Response: Instrumental |
|
|
|
User: What's the best music to play while doing yoga? |
|
LLM Response: Instrumental |
|
|
|
User: Let's have a blast tonight! |
|
LLM Response: Party |
|
|
|
User: I'm in the mood to dance! |
|
LLM Response: Party |
|
|
|
User: I want to celebrate all night long! |
|
LLM Response: Party |
|
|
|
User: Time to hit the club! |
|
LLM Response: Party |
|
|
|
User: I feel like partying till dawn. |
|
LLM Response: Party |
|
|
|
User: Let's get this party started! |
|
LLM Response: Party |
|
|
|
User: I'm ready to party hard tonight. |
|
LLM Response: Party |
|
|
|
User: I'm in the mood for some loud music and dancing! |
|
LLM Response: Party |
|
|
|
User: Tonight's going to be epic! |
|
LLM Response: Party |
|
|
|
User: Lets turn up the music and have some fun! |
|
LLM Response: Party |
|
""" |
|
prompt = f"{fixed_prompt}" |
|
for user_prompt, bot_response in history: |
|
prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}" |
|
prompt += f"\nUser: {message}\nLLM Response:" |
|
return prompt |
|
|
|
async def process_speech(speech_file): |
|
"""Processes speech input to text and then calls generate.""" |
|
text = speech_to_text(speech_file) |
|
reply = generate(text, history="") |
|
communicate = edge_tts.Communicate(reply) |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: |
|
tmp_path = tmp_file.name |
|
await communicate.save(tmp_path) |
|
yield tmp_path |
|
DESCRIPTION = """ # <center><b>Mood-Based Music Recommender⚡</b></center> |
|
### <center>Hi! I'm a music recommender app. |
|
### <center>What kind of music do you want to listen to, or how are you feeling today?</center> |
|
""" |
|
|
|
with gr.Blocks(css="style.css") as demo: |
|
gr.Markdown(DESCRIPTION) |
|
with gr.Row(): |
|
input = gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False) |
|
output = gr.Audio(label="AI", type="filepath", |
|
interactive=False, |
|
autoplay=True, |
|
elem_classes="audio") |
|
gr.Interface( |
|
batch=True, |
|
max_batch_size=10, |
|
fn=process_speech, |
|
inputs=[input], |
|
outputs=[output], live=True) |
|
|
|
if __name__ == "__main__": |
|
demo.queue(max_size=200).launch() |
|
|