EmotionRecognition

Sleeping

App Files Files Community

jcvsalinas commited on Aug 31, 2024

Commit

ab766bf

verified ·

1 Parent(s): 4c52933

Upload 3 files

Browse files

Files changed (3) hide show

app.py +20 -6
happy.jpg +0 -0
surprise.jpg +0 -0

app.py CHANGED Viewed

@@ -31,6 +31,7 @@ id2label = {
 def predict(model, feature_extractor, data, max_length, id2label):
     # Extract features
     inputs = feature_extractor(data, sampling_rate=16000, max_length=max_length, return_tensors='tf', padding=True, truncation=True)
     torch_inputs = torch.tensor(inputs['input_values'].numpy(), dtype=torch.float32)
     print(datetime.now().strftime('%Y-%m-%d%H:%M:%S'), ":Predicting...")
@@ -130,6 +131,7 @@ def recognize_emotion(audio):
         audio_data = audio_data.astype(np.float32)
     # If you still want to process it with librosa, e.g., to change sample rate:
     if sample_rate != 16000:
         audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
     emotion, probabilities = predict(model, feature_extractor, audio_data, 48000, id2label) # limit to 3seconds
     print(probabilities)
@@ -155,7 +157,8 @@ def get_emotion_image(emotion):
         "fear": "fear.jpeg",
         "happy": "happy.jpeg",
         "neutral": "neutral.jpeg",
-        "sad": "sad.jpeg"
         # Add other emotions and their corresponding images
     }
@@ -166,6 +169,7 @@ def get_emotion_image(emotion):
 demo = gr.Blocks()
 with demo:
     theme= gr.themes.Soft(),
     audio_input = gr.Audio(type="numpy",
                             sources=["microphone"],
@@ -175,18 +179,28 @@ with demo:
     text_output = gr.Textbox(label="Recognized Emotion")
     output_df = gr.DataFrame(label="Emotion Probabilities")
     image_output = gr.Image(label="Emotion Image", scale = 1, interactive = False)
-    def process_audio(audio, emotion, image, state, df_probs):
         current_time = time.time()
-        if state is None or current_time - state >= 10:
             state = current_time
             emotion, image, df_probs = recognize_emotion(audio)
             print(datetime.now().strftime('%Y-%m-%d%H:%M:%S'), "Predicted emotion: ", emotion)
-            return emotion, image, state, df_probs
         else:
             print(datetime.now().strftime('%Y-%m-%d%H:%M:%S'), "Not yet time")
-            return emotion, image, state, df_probs
     # Automatically call the recognize_em otion function when audio is recorded
     state = gr.State(None)
-    audio_input.stream(fn=process_audio, inputs=[audio_input, text_output, image_output, state, output_df], outputs=[text_output, image_output, state, output_df])
 demo.launch(share=True)

 def predict(model, feature_extractor, data, max_length, id2label):
     # Extract features
+    print(datetime.now().strftime('%Y-%m-%d%H:%M:%S'), ":Extracting features...")
     inputs = feature_extractor(data, sampling_rate=16000, max_length=max_length, return_tensors='tf', padding=True, truncation=True)
     torch_inputs = torch.tensor(inputs['input_values'].numpy(), dtype=torch.float32)
     print(datetime.now().strftime('%Y-%m-%d%H:%M:%S'), ":Predicting...")
         audio_data = audio_data.astype(np.float32)
     # If you still want to process it with librosa, e.g., to change sample rate:
     if sample_rate != 16000:
+        print(datetime.now().strftime('%Y-%m-%d%H:%M:%S'), ":Resampling audio...")
         audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
     emotion, probabilities = predict(model, feature_extractor, audio_data, 48000, id2label) # limit to 3seconds
     print(probabilities)
         "fear": "fear.jpeg",
         "happy": "happy.jpeg",
         "neutral": "neutral.jpeg",
+        "sad": "sad.jpeg",
+        "surprise": "surprise.jpeg"
         # Add other emotions and their corresponding images
     }
 demo = gr.Blocks()
 with demo:
+    df_logs = pd.DataFrame(columns=['Timestamp', 'Emotion'])
     theme= gr.themes.Soft(),
     audio_input = gr.Audio(type="numpy",
                             sources=["microphone"],
     text_output = gr.Textbox(label="Recognized Emotion")
     output_df = gr.DataFrame(label="Emotion Probabilities")
     image_output = gr.Image(label="Emotion Image", scale = 1, interactive = False)
+    df_logs = gr.DataFrame(label="Output Logs", headers = ['Timestamp', 'Emotion'])
+    def process_audio(audio, emotion, image, state, df_probs, df_logs):
         current_time = time.time()
+        if state is None or (current_time - state >= 10):
             state = current_time
             emotion, image, df_probs = recognize_emotion(audio)
+            # Sample prediction data
+            timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+            # Create a dictionary for the new row
+            new_row = {'Timestamp': timestamp, 'Emotion': emotion}
+            # Append the new row to the DataFrame
+            df_logs = pd.concat([df_logs, pd.DataFrame([new_row])], ignore_index=True)
             print(datetime.now().strftime('%Y-%m-%d%H:%M:%S'), "Predicted emotion: ", emotion)
+            return emotion, image, state, df_probs, df_logs
         else:
             print(datetime.now().strftime('%Y-%m-%d%H:%M:%S'), "Not yet time")
+            return emotion, image, state, df_probs, df_logs
     # Automatically call the recognize_em otion function when audio is recorded
     state = gr.State(None)
+    audio_input.stream(fn=process_audio, inputs=[audio_input, text_output, image_output, state, output_df, df_logs], outputs=[text_output, image_output, state, output_df, df_logs])
 demo.launch(share=True)

happy.jpg ADDED Viewed

surprise.jpg ADDED Viewed