Spaces:

ojas121
/

speech_emotion_project

Running

App Files Files Community

ojas121 commited on Mar 17

Commit

9f79df9

verified ·

1 Parent(s): d6a1faf

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -12

app.py CHANGED Viewed

@@ -13,6 +13,8 @@ from transformers import pipeline
 from huggingface_hub import snapshot_download
 from pydub import AudioSegment
 import noisereduce as nr
 # 🎨 Apply Custom Dark Mode CSS
 st.markdown(
@@ -100,12 +102,28 @@ if uploaded_file:
     # Load audio
     y, sr = librosa.load(file_path, sr=16000)
-    # 🎵 Display waveform
-    st.markdown("<div class='subheader'>🎼 Audio Waveform:</div>", unsafe_allow_html=True)
-    fig, ax = plt.subplots(figsize=(10, 4))
-    librosa.display.waveshow(y, sr=sr, ax=ax, color="cyan")
-    ax.set_facecolor("#121212")  # Dark background for waveform
-    st.pyplot(fig)
     # ✅ Noise Reduction
     st.markdown("<div class='subheader'>🔇 Applying Noise Reduction...</div>", unsafe_allow_html=True)
@@ -113,6 +131,47 @@ if uploaded_file:
     denoised_path = file_path.replace(".wav", "_denoised.wav")
     sf.write(denoised_path, y_denoised, sr)
     # ✅ Speech-to-Text using Vosk
     def transcribe_audio(audio_path):
         wf = wave.open(audio_path, "rb")
@@ -131,7 +190,7 @@ if uploaded_file:
     st.markdown("<div class='subheader'>📝 Transcribed Text:</div>", unsafe_allow_html=True)
     st.markdown(f"<div class='stMarkdown'>{transcription}</div>", unsafe_allow_html=True)
-    # ✅ Emotion Detection (Formatted Output)
     st.markdown("<div class='subheader'>😊 Emotion Analysis:</div>", unsafe_allow_html=True)
     emotion_result = emotion_model(file_path)
@@ -156,8 +215,5 @@ if uploaded_file:
     )
     # ✅ Play Original & Denoised Audio
-    st.markdown("<div class='subheader'>🔊 Play Audio:</div>", unsafe_allow_html=True)
-    st.audio(file_path, format="audio/wav", start_time=0)
-    st.markdown("<div class='subheader'>🔇 Denoised Audio:</div>", unsafe_allow_html=True)
-    st.audio(denoised_path, format="audio/wav", start_time=0)

 from huggingface_hub import snapshot_download
 from pydub import AudioSegment
 import noisereduce as nr
+import plotly.graph_objects as go
+import plotly.express as px
 # 🎨 Apply Custom Dark Mode CSS
 st.markdown(
     # Load audio
     y, sr = librosa.load(file_path, sr=16000)
+    # 🎵 Display waveform using Plotly
+    st.markdown("<div class='subheader'>🎼 Interactive Audio Waveform:</div>", unsafe_allow_html=True)
+    time_axis = np.linspace(0, len(y) / sr, num=len(y))
+    fig_waveform = go.Figure()
+    fig_waveform.add_trace(go.Scatter(
+        x=time_axis,
+        y=y,
+        mode='lines',
+        line=dict(color='cyan'),
+        name="Waveform"
+    ))
+    fig_waveform.update_layout(
+        title="Audio Waveform",
+        xaxis_title="Time (seconds)",
+        yaxis_title="Amplitude",
+        template="plotly_dark"
+    )
+    st.plotly_chart(fig_waveform)
     # ✅ Noise Reduction
     st.markdown("<div class='subheader'>🔇 Applying Noise Reduction...</div>", unsafe_allow_html=True)
     denoised_path = file_path.replace(".wav", "_denoised.wav")
     sf.write(denoised_path, y_denoised, sr)
+    # ✅ Spectrogram using Plotly
+    st.markdown("<div class='subheader'>🎤 Spectrogram (Frequency Analysis):</div>", unsafe_allow_html=True)
+    S = librosa.stft(y)
+    S_db = librosa.amplitude_to_db(np.abs(S), ref=np.max)
+    fig_spectrogram = px.imshow(
+        S_db,
+        aspect='auto',
+        origin='lower',
+        labels={"x": "Time (frames)", "y": "Frequency (bins)", "color": "Intensity (dB)"},
+        color_continuous_scale="plasma"
+    )
+    fig_spectrogram.update_layout(
+        title="Spectrogram",
+        template="plotly_dark"
+    )
+    st.plotly_chart(fig_spectrogram)
+    # ✅ MFCC using Plotly
+    st.markdown("<div class='subheader'>🎵 MFCC Feature Extraction:</div>", unsafe_allow_html=True)
+    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
+    fig_mfcc = px.imshow(
+        mfccs,
+        aspect='auto',
+        origin='lower',
+        labels={"x": "Time (frames)", "y": "MFCC Coefficients", "color": "Magnitude"},
+        color_continuous_scale="viridis"
+    )
+    fig_mfcc.update_layout(
+        title="Mel-Frequency Cepstral Coefficients (MFCC)",
+        template="plotly_dark"
+    )
+    st.plotly_chart(fig_mfcc)
     # ✅ Speech-to-Text using Vosk
     def transcribe_audio(audio_path):
         wf = wave.open(audio_path, "rb")
     st.markdown("<div class='subheader'>📝 Transcribed Text:</div>", unsafe_allow_html=True)
     st.markdown(f"<div class='stMarkdown'>{transcription}</div>", unsafe_allow_html=True)
+    # ✅ Emotion Detection
     st.markdown("<div class='subheader'>😊 Emotion Analysis:</div>", unsafe_allow_html=True)
     emotion_result = emotion_model(file_path)
     )
     # ✅ Play Original & Denoised Audio
+    st.audio(file_path, format="audio/wav")
+    st.audio(denoised_path, format="audio/wav")