Spaces:

jcvsalinas
/

recorder

Sleeping

App Files Files Community

jcvsalinas commited on Aug 23, 2024

Commit

a46d411

verified ·

1 Parent(s): 39b88aa

Upload 8 files

Browse files

Files changed (8) hide show

angry.jpeg +0 -0
app.py +27 -7
default.jpg +0 -0
disgust.jpeg +0 -0
fear.jpeg +0 -0
happy.jpeg +0 -0
neutral.jpeg +0 -0
sad.jpeg +0 -0

angry.jpeg ADDED Viewed

app.py CHANGED Viewed

@@ -1,10 +1,13 @@
 import gradio as gr
 import numpy as np
 import matplotlib.pyplot as plt
 HOME_DIR = ""
 local_config_path = 'config.json'
 local_preprocessor_config_path = 'preprocessor_config.json'
 local_training_args_path = 'training_args.bin'
 import torch
@@ -115,7 +118,6 @@ model.eval()
 def recognize_emotion(audio):
-    import librosa
     # Load the audio file using librosa
     sample_rate, audio_data = audio
@@ -123,12 +125,28 @@ def recognize_emotion(audio):
      # Ensure audio data is in floating-point format
     if not np.issubdtype(audio_data.dtype, np.floating):
         audio_data = audio_data.astype(np.float32)
-        print(audio_data)
     # If you still want to process it with librosa, e.g., to change sample rate:
     if sample_rate != 16000:
         audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
-    return predict(model, feature_extractor, audio_data, len(audio_data), id2label)
 demo = gr.Blocks()
 with demo:
@@ -138,7 +156,9 @@ with demo:
                             show_label=True
                             )
     text_output = gr.Textbox(label="Recognized Emotion")
-    # Automatically call the recognize_emotion function when audio is recorded
-    audio_input.stop_recording(fn=recognize_emotion, inputs=audio_input, outputs=text_output)
 demo.launch(share=True)

 import gradio as gr
 import numpy as np
 import matplotlib.pyplot as plt
+from PIL import Image
+import librosa
 HOME_DIR = ""
 local_config_path = 'config.json'
 local_preprocessor_config_path = 'preprocessor_config.json'
+local_weights_path = 'pytorch_model.bin'
 local_training_args_path = 'training_args.bin'
 import torch
 def recognize_emotion(audio):
     # Load the audio file using librosa
     sample_rate, audio_data = audio
      # Ensure audio data is in floating-point format
     if not np.issubdtype(audio_data.dtype, np.floating):
         audio_data = audio_data.astype(np.float32)
     # If you still want to process it with librosa, e.g., to change sample rate:
     if sample_rate != 16000:
         audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
+    emotion = predict(model, feature_extractor, audio_data, len(audio_data), id2label)
+    return emotion, get_emotion_image(emotion)
+def get_emotion_image(emotion):
+    # Here, you would have a dictionary or logic to map emotions to images
+    emotion_to_image = {
+        "angry": "angry.jpeg",
+        "disgust": "disgust.jpeg",
+        "fear": "fear.jpeg",
+        "happy": "happy.jpeg",
+        "neutral": "neutral.png",
+        "sad": "sad.jpeg"
+        # Add other emotions and their corresponding images
+    }
+    # Default image if emotion is not found
+    image_path = emotion_to_image.get(emotion, "default.jpg")
+    # Load and return the image
+    return Image.open(image_path)
 demo = gr.Blocks()
 with demo:
                             show_label=True
                             )
     text_output = gr.Textbox(label="Recognized Emotion")
+    image_output = gr.Image(label="Emotion Image")
+    # Automatically call the recognize_em otion function when audio is recorded
+    audio_input.stop_recording(fn=recognize_emotion, inputs=audio_input, outputs=[text_output,image_output])
+    print("Emotion: ", text_output)
 demo.launch(share=True)

default.jpg ADDED Viewed

disgust.jpeg ADDED Viewed

fear.jpeg ADDED Viewed

happy.jpeg ADDED Viewed

neutral.jpeg ADDED Viewed

sad.jpeg ADDED Viewed