File size: 6,161 Bytes
99b56c0
1145213
 
 
 
 
 
 
 
 
 
 
 
99b56c0
 
 
1145213
 
 
 
99b56c0
 
 
 
1145213
99b56c0
1145213
 
 
 
 
 
 
 
99b56c0
1145213
99b56c0
 
 
1145213
99b56c0
1145213
99b56c0
1145213
99b56c0
1145213
99b56c0
 
 
1145213
 
 
 
 
99b56c0
1145213
99b56c0
1145213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99b56c0
 
1145213
 
 
 
 
 
 
99b56c0
1145213
 
 
 
 
99b56c0
1145213
 
99b56c0
1145213
 
 
99b56c0
 
1145213
 
99b56c0
 
 
1145213
99b56c0
1145213
 
99b56c0
1145213
 
 
 
 
 
99b56c0
1145213
 
99b56c0
 
1145213
 
99b56c0
1145213
 
 
 
 
99b56c0
1145213
 
 
 
 
 
 
 
 
 
99b56c0
 
1145213
 
 
99b56c0
1145213
 
 
 
 
99b56c0
1145213
99b56c0
 
1145213
99b56c0
 
1145213
99b56c0
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import streamlit as st
import pyttsx3
import speech_recognition as sr
from playsound import playsound
import random
import datetime
import webbrowser as wb
import tensorflow as tf
import numpy as np
import librosa
import matplotlib.pyplot as plt
import seaborn as sns
from modules import commands_answers, load_agenda

# Initial settings
sns.set()
commands = commands_answers.commands
answers = commands_answers.answers
my_name = 'Bob'

# Paths for browser
chrome_path = 'open -a /Applications/Google\ Chrome.app %s'  # MacOS
# chrome_path = 'C:/Program Files/Google/Chrome/Application/chrome.exe %s'  # Windows
# chrome_path = '/usr/bin/google-chrome %s'  # Linux

# Load model
MODEL_TYPES = ['EMOTION']
def load_model_by_name(model_type):
    if model_type == MODEL_TYPES[0]:
        model = tf.keras.models.load_model('models/speech_emotion_recognition.hdf5')
        model_dict = list(['calm', 'happy', 'fear', 'nervous', 'neutral', 'disgust', 'surprise', 'sad'])
        SAMPLE_RATE = 48000
    return model, model_dict, SAMPLE_RATE

loaded_model = load_model_by_name('EMOTION')

# Functions
def search(sentence):
    wb.get(chrome_path).open('https://www.google.com/search?q=' + sentence)

def predict_sound(AUDIO, SAMPLE_RATE, plot=True):
    results = []
    wav_data, sample_rate = librosa.load(AUDIO, sr=SAMPLE_RATE)
    clip, index = librosa.effects.trim(wav_data, top_db=60, frame_length=512, hop_length=64)
    splitted_audio_data = tf.signal.frame(clip, sample_rate, sample_rate, pad_end=True, pad_value=0)
    for i, data in enumerate(splitted_audio_data.numpy()):
        mfccs_features = librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40)
        mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
        mfccs_scaled_features = mfccs_scaled_features.reshape(1, -1)[:, :, np.newaxis]
        predictions = loaded_model[0].predict(mfccs_scaled_features)
        if plot:
            plt.figure(figsize=(len(splitted_audio_data), 5))
            plt.barh(loaded_model[1], predictions[0])
            plt.tight_layout()
            st.pyplot(plt)

        predictions = predictions.argmax(axis=1)
        predictions = predictions.astype(int).flatten()
        predictions = loaded_model[1][predictions[0]]
        results.append(predictions)

    count_results = [[results.count(x), x] for x in set(results)]
    return max(count_results)

def play_music_youtube(emotion):
    play = False
    if emotion == 'sad' or emotion == 'fear':
        wb.get(chrome_path).open('https://www.youtube.com/watch?v=k32IPg4dbz0&ab_channel=Amelhorm%C3%BAsicainstrumental')
        play = True
    if emotion == 'nervous' or emotion == 'surprise':
        wb.get(chrome_path).open('https://www.youtube.com/watch?v=pWjmpSD-ph0&ab_channel=CassioToledo')
        play = True
    return play

def speak(text):
    engine = pyttsx3.init()
    engine.setProperty('rate', 90)  # number of words per second
    engine.setProperty('volume', 1)  # min: 0, max: 1
    engine.say(text)
    engine.runAndWait()

def listen_microphone():
    microphone = sr.Recognizer()
    with sr.Microphone() as source:
        microphone.adjust_for_ambient_noise(source, duration=0.8)
        st.write('Listening...')
        audio = microphone.listen(source)
        with open('recordings/speech.wav', 'wb') as f:
            f.write(audio.get_wav_data())
    try:
        sentence = microphone.recognize_google(audio, language='en-US')
        st.write('You said: ' + sentence)
    except sr.UnknownValueError:
        sentence = ''
        st.write('Not understood')
    return sentence

def test_models():
    audio_source = 'recordings/speech.wav'
    prediction = predict_sound(audio_source, loaded_model[2], plot=False)
    return prediction

# Streamlit UI
st.title("Virtual Assistant")
st.write("This assistant can perform tasks based on your voice commands.")

if st.button("Activate Assistant"):
    result = listen_microphone()

    if my_name.lower() in result.lower():
        result = str(result.split(my_name + ' ')[1])
        result = result.lower()

        if result in commands[0]:
            speak('I will read my list of functionalities: ' + answers[0])

        elif result in commands[3]:
            speak('It is now ' + datetime.datetime.now().strftime('%H:%M'))

        elif result in commands[4]:
            date = datetime.date.today().strftime('%d/%B/%Y').split('/')
            speak('Today is ' + date[0] + ' of ' + date[1])

        elif result in commands[1]:
            speak('Please, tell me the activity!')
            result = listen_microphone()
            annotation = open('annotation.txt', mode='a+', encoding='utf-8')
            annotation.write(result + '\n')
            annotation.close()
            speak(''.join(random.sample(answers[1], k=1)))
            speak('Want me to read the notes?')
            result = listen_microphone()
            if result == 'yes' or result == 'sure':
                with open('annotation.txt') as file_source:
                    lines = file_source.readlines()
                    for line in lines:
                        speak(line)
            else:
                speak('Ok!')

        elif result in commands[2]:
            speak(''.join(random.sample(answers[2], k=1)))
            result = listen_microphone()
            search(result)

        elif result in commands[6]:
            if load_agenda.load_agenda():
                speak('These are the events for today:')
                for i in range(len(load_agenda.load_agenda()[1])):
                    speak(load_agenda.load_agenda()[1][i] + ' ' + load_agenda.load_agenda()[0][i] + ' schedule for ' + str(load_agenda.load_agenda()[2][i]))
            else:
                speak('There are no events for today considering the current time!')

        elif result in commands[5]:
            st.write('Emotion analysis mode activated!')
            analyse = test_models()
            st.write(f'I heard {analyse} in your voice!')
            play_music_youtube(analyse[1])

        elif result == 'turn off':
            speak(''.join(random.sample(answers[4], k=1)))
            st.write("Assistant turned off.")