Spaces:

Prathamesh1420
/

Virtual_assistant

Sleeping

File size: 6,161 Bytes

import streamlit as st
import pyttsx3
import speech_recognition as sr
from playsound import playsound
import random
import datetime
import webbrowser as wb
import tensorflow as tf
import numpy as np
import librosa
import matplotlib.pyplot as plt
import seaborn as sns
from modules import commands_answers, load_agenda

# Initial settings
sns.set()
commands = commands_answers.commands
answers = commands_answers.answers
my_name = 'Bob'

# Paths for browser
chrome_path = 'open -a /Applications/Google\ Chrome.app %s'  # MacOS
# chrome_path = 'C:/Program Files/Google/Chrome/Application/chrome.exe %s'  # Windows
# chrome_path = '/usr/bin/google-chrome %s'  # Linux

# Load model
MODEL_TYPES = ['EMOTION']
def load_model_by_name(model_type):
    if model_type == MODEL_TYPES[0]:
        model = tf.keras.models.load_model('models/speech_emotion_recognition.hdf5')
        model_dict = list(['calm', 'happy', 'fear', 'nervous', 'neutral', 'disgust', 'surprise', 'sad'])
        SAMPLE_RATE = 48000
    return model, model_dict, SAMPLE_RATE

loaded_model = load_model_by_name('EMOTION')

# Functions
def search(sentence):
    wb.get(chrome_path).open('https://www.google.com/search?q=' + sentence)

def predict_sound(AUDIO, SAMPLE_RATE, plot=True):
    results = []
    wav_data, sample_rate = librosa.load(AUDIO, sr=SAMPLE_RATE)
    clip, index = librosa.effects.trim(wav_data, top_db=60, frame_length=512, hop_length=64)
    splitted_audio_data = tf.signal.frame(clip, sample_rate, sample_rate, pad_end=True, pad_value=0)
    for i, data in enumerate(splitted_audio_data.numpy()):
        mfccs_features = librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40)
        mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
        mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)[:, :, np.newaxis]
        predictions = loaded_model[0].predict(mfccs_scaled_features)
        if plot:
            plt.figure(figsize=(len(splitted_audio_data), 5))
            plt.barh(loaded_model[1], predictions[0])
            plt.tight_layout()
            st.pyplot(plt)

        predictions = predictions.argmax(axis=1)
        predictions = predictions.astype(int).flatten()
        predictions = loaded_model[1][predictions[0]]
        results.append(predictions)

    count_results = [[results.count(x), x] for x in set(results)]
    return max(count_results)

def play_music_youtube(emotion):
    play = False
    if emotion == 'sad' or emotion == 'fear':
        wb.get(chrome_path).open('https://www.youtube.com/watch?v=k32IPg4dbz0&ab_channel=Amelhorm%C3%BAsicainstrumental')
        play = True
    if emotion == 'nervous' or emotion == 'surprise':
        wb.get(chrome_path).open('https://www.youtube.com/watch?v=pWjmpSD-ph0&ab_channel=CassioToledo')
        play = True
    return play

def speak(text):
    engine = pyttsx3.init()
    engine.setProperty('rate', 90)  # number of words per second
    engine.setProperty('volume', 1)  # min: 0, max: 1
    engine.say(text)
    engine.runAndWait()

def listen_microphone():
    microphone = sr.Recognizer()
    with sr.Microphone() as source:
        microphone.adjust_for_ambient_noise(source, duration=0.8)
        st.write('Listening...')
        audio = microphone.listen(source)
        with open('recordings/speech.wav', 'wb') as f:
            f.write(audio.get_wav_data())
    try:
        sentence = microphone.recognize_google(audio, language='en-US')
        st.write('You said: ' + sentence)
    except sr.UnknownValueError:
        sentence = ''
        st.write('Not understood')
    return sentence

def test_models():
    audio_source = 'recordings/speech.wav'
    prediction = predict_sound(audio_source, loaded_model[2], plot=False)
    return prediction

# Streamlit UI
st.title("Virtual Assistant")
st.write("This assistant can perform tasks based on your voice commands.")

if st.button("Activate Assistant"):
    result = listen_microphone()

    if my_name.lower() in result.lower():
        result = str(result.split(my_name + ' ')[1])
        result = result.lower()

        if result in commands[0]:
            speak('I will read my list of functionalities: ' + answers[0])

        elif result in commands[3]:
            speak('It is now ' + datetime.datetime.now().strftime('%H:%M'))

        elif result in commands[4]:
            date = datetime.date.today().strftime('%d/%B/%Y').split('/')
            speak('Today is ' + date[0] + ' of ' + date[1])

        elif result in commands[1]:
            speak('Please, tell me the activity!')
            result = listen_microphone()
            annotation = open('annotation.txt', mode='a+', encoding='utf-8')
            annotation.write(result + '\n')
            annotation.close()
            speak(''.join(random.sample(answers[1], k=1)))
            speak('Want me to read the notes?')
            result = listen_microphone()
            if result == 'yes' or result == 'sure':
                with open('annotation.txt') as file_source:
                    lines = file_source.readlines()
                    for line in lines:
                        speak(line)
            else:
                speak('Ok!')

        elif result in commands[2]:
            speak(''.join(random.sample(answers[2], k=1)))
            result = listen_microphone()
            search(result)

        elif result in commands[6]:
            if load_agenda.load_agenda():
                speak('These are the events for today:')
                for i in range(len(load_agenda.load_agenda()[1])):
                    speak(load_agenda.load_agenda()[1][i] + ' ' + load_agenda.load_agenda()[0][i] + ' schedule for ' + str(load_agenda.load_agenda()[2][i]))
            else:
                speak('There are no events for today considering the current time!')

        elif result in commands[5]:
            st.write('Emotion analysis mode activated!')
            analyse = test_models()
            st.write(f'I heard {analyse} in your voice!')
            play_music_youtube(analyse[1])

        elif result == 'turn off':
            speak(''.join(random.sample(answers[4], k=1)))
            st.write("Assistant turned off.")