Spaces:

Guhanselvam
/

Audio_recog

Runtime error

App Files Files Community

Guhanselvam commited on Nov 13, 2024

Commit

e59aa7b

verified ·

1 Parent(s): e098c22

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -136

app.py CHANGED Viewed

@@ -1,156 +1,82 @@
-import librosa as lb
 import soundfile as sf
 import numpy as np
-import os
-import glob
-import pickle
-import sounddevice as sd
-import time
 import requests
 import webbrowser
-import random
-from sklearn.model_selection import train_test_split
-from sklearn.neural_network import MLPClassifier
-from sklearn.metrics import accuracy_score
-from scipy.io.wavfile import write
-# Emotion labels mapping
-emotion_labels = {
-    '01': 'neutral',
-    '02': 'calm',
-    '03': 'happy',
-    '04': 'sad',
-    '05': 'angry',
-    '06': 'fearful',
-    '07': 'disgust',
-    '08': 'surprised'
-}
-# Focused emotions for recognition
-focused_emotion_labels = ['happy', 'sad', 'angry']
-def audio_features(file_title, mfcc=True, chroma=True, mel=True):
-    with sf.SoundFile(file_title) as audio_recording:
-        audio = audio_recording.read(dtype="float32")
-        sample_rate = audio_recording.samplerate
-        result = np.array([])
-        if mfcc:
-            mfccs = np.mean(lb.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0)
-            result = np.hstack((result, mfccs))
-        if chroma:
-            stft = np.abs(lb.stft(audio))
-            chroma = np.mean(lb.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
-            result = np.hstack((result, chroma))
-        if mel:
-            mel = np.mean(lb.feature.melspectrogram(audio, sr=sample_rate).T, axis=0)
-            result = np.hstack((result, mel))
-        return result
-def get_emotion_from_file_name(file_name):
-    return emotion_labels[file_name.split("-")[2]]  # Adjust based on your actual filename structure
-def loading_audio_data():
-    x = []  # Input - features
-    y = []  # Output - labels emotions
-    # Go through all sound files
-    for file in glob.glob("data/Actor_*/*.wav"):
-        file_name = os.path.basename(file)
-        emotion = get_emotion_from_file_name(file_name)
-        # Use only focused emotions
-        if emotion in focused_emotion_labels:
-            try:
-                feature = audio_features(file, mfcc=True, chroma=True, mel=True)
-                x.append(feature)
-                y.append(emotion)
-            except Exception as e:
-                print(f"This file wasn't processed due to an error: {file} - {e}")
-    # Split the dataset into training and testing
-    return train_test_split(np.array(x), y, test_size=0.1, random_state=9)
-def record_sound():
-    fs = 44100  # Sample rate
-    seconds = 3  # Duration of recording
-    print("Recording in 3")
-    time.sleep(1)
-    print("Recording in 2")
-    time.sleep(1)
-    print("Recording in 1")
-    time.sleep(1)
-    # Record and save
-    my_recording = sd.rec(int(seconds * fs), samplerate=fs, channels=1)
-    print("Recording: Started")
-    sd.wait()
-    print("Recording: Stopped")
-    write('output.wav', fs, my_recording)
-    return 'output.wav'
 def get_playlist(mood):
     url = "https://unsa-unofficial-spotify-api.p.rapidapi.com/search"
-    querystring = {"query": mood, "count": "10", "type": "playlists"}
-    headers = {'x-rapidapi-key': "your-api-key",  # Replace with your actual API key
         'x-rapidapi-host': "unsa-unofficial-spotify-api.p.rapidapi.com"
     }
-    try:
-        response = requests.get(url, headers=headers, params=querystring)
-        response.raise_for_status()  # Raises error for bad responses
-        playlist_id = response.json()["Results"][random.randint(0, 9)]["id"]
-        return playlist_id
-    except requests.exceptions.RequestException as e:
-        print(f"Error fetching playlist data: {e}")
-        return None
-def open_playlist_in_browser(playlist_id):
-    webbrowser.open('https://open.spotify.com/playlist/' + str(playlist_id))
-def train_model():
-    X_train, X_test, y_train, y_test = loading_audio_data()
-    model = MLPClassifier(hidden_layer_sizes=(200,), learning_rate="adaptive", max_iter=400)
-    model.fit(X_train, y_train)
-    # Save model
-    with open('emotion_model.pkl', 'wb') as model_file:
-        pickle.dump(model, model_file)
-    # Predictions and accuracy evaluation
-    y_pred = model.predict(X_test)
-    accuracy = accuracy_score(y_true=y_test, y_pred=y_pred) * 100
-    print("Accuracy of Recognizer is: %.2f" % accuracy)
-    return model, accuracy
-def recognize_your_mood(model):
-    while True:
-        my_sound_file = record_sound()
-        feature = audio_features(my_sound_file, mfcc=True, chroma=True, mel=True)
-        mood_prediction = model.predict([feature])[0]
-        print(f"Are you feeling {mood_prediction}? Type yes/no")
-        user_input = input().strip().lower()
-        if user_input == "yes":
-            return mood_prediction
-def main():
-    # Check if model exists, load it; otherwise, train and save it
-    if os.path.exists('emotion_model.pkl'):
-        with open('emotion_model.pkl', 'rb') as model_file:
-            model = pickle.load(model_file)
-        print("Loaded existing model.")
-    else:
-        model, accuracy = train_model()
-        if accuracy > 60:  # You can adjust this threshold as needed
-            mood = recognize_your_mood(model)
-            playlist_id = get_playlist(mood)
-            if playlist_id:
-                open_playlist_in_browser(playlist_id)
 if __name__ == "__main__":
     main()

+import torch
+from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForCTC
+import sounddevice as sd
 import soundfile as sf
 import numpy as np
 import requests
 import webbrowser
+from sklearn.preprocessing import LabelEncoder
+# Load pre-trained model and tokenizer
+model_name = "facebook/wav2vec2-large-xlsr-53"  # Change to the specific model you need for emotion recognition
+tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name)
+model = Wav2Vec2ForCTC.from_pretrained(model_name)
+# Function to record audio
+def record_audio(duration=5, fs=16000):
+    print("Recording...")
+    audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='float32')
+    sd.wait()  # Wait until recording is finished
+    print("Recording finished.")
+    return audio.flatten()
+# Function to save audio file
+def save_audio(filename, audio, fs=16000):
+    sf.write(filename, audio, fs)
+# Function for emotion recognition
+def recognize_emotion(audio):
+    # Convert audio array to input suitable for the model
+    input_values = tokenizer(audio, return_tensors='pt', padding='longest', sampling_rate=16000).input_values
+    # Store logits (raw predictions) and apply softmax to get probabilities
+    with torch.no_grad():
+        logits = model(input_values).logits
+        predicted_ids = torch.argmax(logits, dim=-1)
+    # Decode the predicted IDs to text
+    transcription = tokenizer.decode(predicted_ids[0])
+    return transcription  # Return the detected text
+# Function to map emotion text to playlist (customizable)
 def get_playlist(mood):
     url = "https://unsa-unofficial-spotify-api.p.rapidapi.com/search"
+    querystring = {"query": mood, "count":"10", "type": "playlists"}
+    headers = {
+        'x-rapidapi-key': "your-api-key",  # Replace with your actual API key
         'x-rapidapi-host': "unsa-unofficial-spotify-api.p.rapidapi.com"
     }
+    response = requests.get(url, headers=headers, params=querystring)
+    playlist_id = response.json()["Results"][0]["id"]  # Choose the first playlist
+    return playlist_id
+# Function to open playlist URL
+def open_playlist(playlist_id):
+    webbrowser.open(f'https://open.spotify.com/playlist/{playlist_id}')
+# Main function to run the recorder and emotion recognizer
+def main():
+    try:
+        # Record audio
+        audio = record_audio()
+        # Save audio to file
+        filename = "output.wav"
+        save_audio(filename, audio)
+        # Recognize the mood/emotion from audio
+        emotion_text = recognize_emotion(audio)
+        print(f"Detected Emotion: {emotion_text}")
+        # Get playlist based on detected emotion
+        playlist_id = get_playlist(emotion_text)
+        open_playlist(playlist_id)
+    except Exception as e:
+        print(f"An error occurred: {e}")
 if __name__ == "__main__":
     main()