Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,8 @@ from transformers import pipeline
|
|
13 |
from huggingface_hub import snapshot_download
|
14 |
from pydub import AudioSegment
|
15 |
import noisereduce as nr
|
|
|
|
|
16 |
|
17 |
# π¨ Apply Custom Dark Mode CSS
|
18 |
st.markdown(
|
@@ -100,12 +102,28 @@ if uploaded_file:
|
|
100 |
# Load audio
|
101 |
y, sr = librosa.load(file_path, sr=16000)
|
102 |
|
103 |
-
# π΅ Display waveform
|
104 |
-
st.markdown("<div class='subheader'>πΌ Audio Waveform:</div>", unsafe_allow_html=True)
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
# β
Noise Reduction
|
111 |
st.markdown("<div class='subheader'>π Applying Noise Reduction...</div>", unsafe_allow_html=True)
|
@@ -113,6 +131,47 @@ if uploaded_file:
|
|
113 |
denoised_path = file_path.replace(".wav", "_denoised.wav")
|
114 |
sf.write(denoised_path, y_denoised, sr)
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
# β
Speech-to-Text using Vosk
|
117 |
def transcribe_audio(audio_path):
|
118 |
wf = wave.open(audio_path, "rb")
|
@@ -131,7 +190,7 @@ if uploaded_file:
|
|
131 |
st.markdown("<div class='subheader'>π Transcribed Text:</div>", unsafe_allow_html=True)
|
132 |
st.markdown(f"<div class='stMarkdown'>{transcription}</div>", unsafe_allow_html=True)
|
133 |
|
134 |
-
# β
Emotion Detection
|
135 |
st.markdown("<div class='subheader'>π Emotion Analysis:</div>", unsafe_allow_html=True)
|
136 |
|
137 |
emotion_result = emotion_model(file_path)
|
@@ -156,8 +215,5 @@ if uploaded_file:
|
|
156 |
)
|
157 |
|
158 |
# β
Play Original & Denoised Audio
|
159 |
-
st.
|
160 |
-
st.audio(
|
161 |
-
|
162 |
-
st.markdown("<div class='subheader'>π Denoised Audio:</div>", unsafe_allow_html=True)
|
163 |
-
st.audio(denoised_path, format="audio/wav", start_time=0)
|
|
|
13 |
from huggingface_hub import snapshot_download
|
14 |
from pydub import AudioSegment
|
15 |
import noisereduce as nr
|
16 |
+
import plotly.graph_objects as go
|
17 |
+
import plotly.express as px
|
18 |
|
19 |
# π¨ Apply Custom Dark Mode CSS
|
20 |
st.markdown(
|
|
|
102 |
# Load audio
|
103 |
y, sr = librosa.load(file_path, sr=16000)
|
104 |
|
105 |
+
# π΅ Display waveform using Plotly
|
106 |
+
st.markdown("<div class='subheader'>πΌ Interactive Audio Waveform:</div>", unsafe_allow_html=True)
|
107 |
+
|
108 |
+
time_axis = np.linspace(0, len(y) / sr, num=len(y))
|
109 |
+
|
110 |
+
fig_waveform = go.Figure()
|
111 |
+
fig_waveform.add_trace(go.Scatter(
|
112 |
+
x=time_axis,
|
113 |
+
y=y,
|
114 |
+
mode='lines',
|
115 |
+
line=dict(color='cyan'),
|
116 |
+
name="Waveform"
|
117 |
+
))
|
118 |
+
|
119 |
+
fig_waveform.update_layout(
|
120 |
+
title="Audio Waveform",
|
121 |
+
xaxis_title="Time (seconds)",
|
122 |
+
yaxis_title="Amplitude",
|
123 |
+
template="plotly_dark"
|
124 |
+
)
|
125 |
+
|
126 |
+
st.plotly_chart(fig_waveform)
|
127 |
|
128 |
# β
Noise Reduction
|
129 |
st.markdown("<div class='subheader'>π Applying Noise Reduction...</div>", unsafe_allow_html=True)
|
|
|
131 |
denoised_path = file_path.replace(".wav", "_denoised.wav")
|
132 |
sf.write(denoised_path, y_denoised, sr)
|
133 |
|
134 |
+
# β
Spectrogram using Plotly
|
135 |
+
st.markdown("<div class='subheader'>π€ Spectrogram (Frequency Analysis):</div>", unsafe_allow_html=True)
|
136 |
+
|
137 |
+
S = librosa.stft(y)
|
138 |
+
S_db = librosa.amplitude_to_db(np.abs(S), ref=np.max)
|
139 |
+
|
140 |
+
fig_spectrogram = px.imshow(
|
141 |
+
S_db,
|
142 |
+
aspect='auto',
|
143 |
+
origin='lower',
|
144 |
+
labels={"x": "Time (frames)", "y": "Frequency (bins)", "color": "Intensity (dB)"},
|
145 |
+
color_continuous_scale="plasma"
|
146 |
+
)
|
147 |
+
|
148 |
+
fig_spectrogram.update_layout(
|
149 |
+
title="Spectrogram",
|
150 |
+
template="plotly_dark"
|
151 |
+
)
|
152 |
+
|
153 |
+
st.plotly_chart(fig_spectrogram)
|
154 |
+
|
155 |
+
# β
MFCC using Plotly
|
156 |
+
st.markdown("<div class='subheader'>π΅ MFCC Feature Extraction:</div>", unsafe_allow_html=True)
|
157 |
+
|
158 |
+
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
159 |
+
|
160 |
+
fig_mfcc = px.imshow(
|
161 |
+
mfccs,
|
162 |
+
aspect='auto',
|
163 |
+
origin='lower',
|
164 |
+
labels={"x": "Time (frames)", "y": "MFCC Coefficients", "color": "Magnitude"},
|
165 |
+
color_continuous_scale="viridis"
|
166 |
+
)
|
167 |
+
|
168 |
+
fig_mfcc.update_layout(
|
169 |
+
title="Mel-Frequency Cepstral Coefficients (MFCC)",
|
170 |
+
template="plotly_dark"
|
171 |
+
)
|
172 |
+
|
173 |
+
st.plotly_chart(fig_mfcc)
|
174 |
+
|
175 |
# β
Speech-to-Text using Vosk
|
176 |
def transcribe_audio(audio_path):
|
177 |
wf = wave.open(audio_path, "rb")
|
|
|
190 |
st.markdown("<div class='subheader'>π Transcribed Text:</div>", unsafe_allow_html=True)
|
191 |
st.markdown(f"<div class='stMarkdown'>{transcription}</div>", unsafe_allow_html=True)
|
192 |
|
193 |
+
# β
Emotion Detection
|
194 |
st.markdown("<div class='subheader'>π Emotion Analysis:</div>", unsafe_allow_html=True)
|
195 |
|
196 |
emotion_result = emotion_model(file_path)
|
|
|
215 |
)
|
216 |
|
217 |
# β
Play Original & Denoised Audio
|
218 |
+
st.audio(file_path, format="audio/wav")
|
219 |
+
st.audio(denoised_path, format="audio/wav")
|
|
|
|
|
|