Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files- angry.jpeg +0 -0
- app.py +27 -7
- default.jpg +0 -0
- disgust.jpeg +0 -0
- fear.jpeg +0 -0
- happy.jpeg +0 -0
- neutral.jpeg +0 -0
- sad.jpeg +0 -0
angry.jpeg
ADDED
![]() |
app.py
CHANGED
@@ -1,10 +1,13 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import matplotlib.pyplot as plt
|
|
|
|
|
4 |
|
5 |
HOME_DIR = ""
|
6 |
local_config_path = 'config.json'
|
7 |
local_preprocessor_config_path = 'preprocessor_config.json'
|
|
|
8 |
local_training_args_path = 'training_args.bin'
|
9 |
|
10 |
import torch
|
@@ -115,7 +118,6 @@ model.eval()
|
|
115 |
|
116 |
|
117 |
def recognize_emotion(audio):
|
118 |
-
import librosa
|
119 |
# Load the audio file using librosa
|
120 |
|
121 |
sample_rate, audio_data = audio
|
@@ -123,12 +125,28 @@ def recognize_emotion(audio):
|
|
123 |
# Ensure audio data is in floating-point format
|
124 |
if not np.issubdtype(audio_data.dtype, np.floating):
|
125 |
audio_data = audio_data.astype(np.float32)
|
126 |
-
print(audio_data)
|
127 |
# If you still want to process it with librosa, e.g., to change sample rate:
|
128 |
if sample_rate != 16000:
|
129 |
audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
|
130 |
-
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
demo = gr.Blocks()
|
134 |
with demo:
|
@@ -138,7 +156,9 @@ with demo:
|
|
138 |
show_label=True
|
139 |
)
|
140 |
text_output = gr.Textbox(label="Recognized Emotion")
|
141 |
-
|
142 |
-
|
143 |
-
|
|
|
|
|
144 |
demo.launch(share=True)
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import matplotlib.pyplot as plt
|
4 |
+
from PIL import Image
|
5 |
+
import librosa
|
6 |
|
7 |
HOME_DIR = ""
|
8 |
local_config_path = 'config.json'
|
9 |
local_preprocessor_config_path = 'preprocessor_config.json'
|
10 |
+
local_weights_path = 'pytorch_model.bin'
|
11 |
local_training_args_path = 'training_args.bin'
|
12 |
|
13 |
import torch
|
|
|
118 |
|
119 |
|
120 |
def recognize_emotion(audio):
|
|
|
121 |
# Load the audio file using librosa
|
122 |
|
123 |
sample_rate, audio_data = audio
|
|
|
125 |
# Ensure audio data is in floating-point format
|
126 |
if not np.issubdtype(audio_data.dtype, np.floating):
|
127 |
audio_data = audio_data.astype(np.float32)
|
|
|
128 |
# If you still want to process it with librosa, e.g., to change sample rate:
|
129 |
if sample_rate != 16000:
|
130 |
audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
|
131 |
+
emotion = predict(model, feature_extractor, audio_data, len(audio_data), id2label)
|
132 |
+
return emotion, get_emotion_image(emotion)
|
133 |
+
|
134 |
+
def get_emotion_image(emotion):
|
135 |
+
# Here, you would have a dictionary or logic to map emotions to images
|
136 |
+
emotion_to_image = {
|
137 |
+
"angry": "angry.jpeg",
|
138 |
+
"disgust": "disgust.jpeg",
|
139 |
+
"fear": "fear.jpeg",
|
140 |
+
"happy": "happy.jpeg",
|
141 |
+
"neutral": "neutral.png",
|
142 |
+
"sad": "sad.jpeg"
|
143 |
+
# Add other emotions and their corresponding images
|
144 |
+
}
|
145 |
+
|
146 |
+
# Default image if emotion is not found
|
147 |
+
image_path = emotion_to_image.get(emotion, "default.jpg")
|
148 |
+
# Load and return the image
|
149 |
+
return Image.open(image_path)
|
150 |
|
151 |
demo = gr.Blocks()
|
152 |
with demo:
|
|
|
156 |
show_label=True
|
157 |
)
|
158 |
text_output = gr.Textbox(label="Recognized Emotion")
|
159 |
+
image_output = gr.Image(label="Emotion Image")
|
160 |
+
|
161 |
+
# Automatically call the recognize_em otion function when audio is recorded
|
162 |
+
audio_input.stop_recording(fn=recognize_emotion, inputs=audio_input, outputs=[text_output,image_output])
|
163 |
+
print("Emotion: ", text_output)
|
164 |
demo.launch(share=True)
|
default.jpg
ADDED
![]() |
disgust.jpeg
ADDED
![]() |
fear.jpeg
ADDED
![]() |
happy.jpeg
ADDED
![]() |
neutral.jpeg
ADDED
![]() |
sad.jpeg
ADDED
![]() |