Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -10,16 +10,52 @@ import wave
|
|
10 |
import json
|
11 |
from vosk import Model, KaldiRecognizer
|
12 |
from transformers import pipeline
|
13 |
-
from huggingface_hub import snapshot_download
|
14 |
from pydub import AudioSegment
|
15 |
import noisereduce as nr
|
16 |
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# β
Auto-Download Vosk Model (Speech-to-Text)
|
20 |
VOSK_MODEL = "vosk-model-small-en-us-0.15"
|
21 |
if not os.path.exists(VOSK_MODEL):
|
22 |
-
st.write("Downloading Vosk Model...")
|
23 |
subprocess.run(["wget", "-O", "vosk.zip", "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"])
|
24 |
subprocess.run(["unzip", "vosk.zip"])
|
25 |
subprocess.run(["rm", "vosk.zip"])
|
@@ -30,15 +66,15 @@ model = Model(VOSK_MODEL)
|
|
30 |
# β
Auto-Download Wav2Vec2 Model (Emotion Detection)
|
31 |
WAV2VEC_MODEL = "facebook/wav2vec2-large-xlsr-53"
|
32 |
if not os.path.exists(WAV2VEC_MODEL):
|
33 |
-
st.write(f"Downloading {WAV2VEC_MODEL}...")
|
34 |
snapshot_download(repo_id=WAV2VEC_MODEL, local_dir=WAV2VEC_MODEL)
|
35 |
|
36 |
# Load emotion detection model
|
37 |
emotion_model = pipeline("audio-classification", model=WAV2VEC_MODEL)
|
38 |
|
39 |
# β
Streamlit UI
|
40 |
-
st.
|
41 |
-
st.
|
42 |
|
43 |
uploaded_file = st.file_uploader("Upload an MP3/WAV file", type=["mp3", "wav"])
|
44 |
|
@@ -58,12 +94,14 @@ if uploaded_file:
|
|
58 |
# Load audio
|
59 |
y, sr = librosa.load(file_path, sr=16000)
|
60 |
|
61 |
-
# Display waveform
|
|
|
62 |
fig, ax = plt.subplots(figsize=(10, 4))
|
63 |
librosa.display.waveshow(y, sr=sr, ax=ax)
|
64 |
st.pyplot(fig)
|
65 |
|
66 |
# β
Noise Reduction
|
|
|
67 |
y_denoised = nr.reduce_noise(y=y, sr=sr)
|
68 |
denoised_path = file_path.replace(".wav", "_denoised.wav")
|
69 |
sf.write(denoised_path, y_denoised, sr)
|
@@ -82,16 +120,18 @@ if uploaded_file:
|
|
82 |
return result["text"]
|
83 |
|
84 |
transcription = transcribe_audio(file_path)
|
85 |
-
|
86 |
-
st.
|
|
|
87 |
|
88 |
# β
Emotion Detection
|
|
|
89 |
emotion_result = emotion_model(file_path)
|
90 |
-
|
91 |
-
st.subheader("π Emotion Analysis:")
|
92 |
st.write(emotion_result)
|
93 |
|
94 |
# β
Play Original & Denoised Audio
|
|
|
95 |
st.audio(file_path, format="audio/wav", start_time=0)
|
96 |
-
|
|
|
97 |
st.audio(denoised_path, format="audio/wav", start_time=0)
|
|
|
10 |
import json
|
11 |
from vosk import Model, KaldiRecognizer
|
12 |
from transformers import pipeline
|
13 |
+
from huggingface_hub import snapshot_download
|
14 |
from pydub import AudioSegment
|
15 |
import noisereduce as nr
|
16 |
|
17 |
+
# π¨ Apply Custom CSS Styling
|
18 |
+
st.markdown(
|
19 |
+
"""
|
20 |
+
<style>
|
21 |
+
.stApp {
|
22 |
+
background-color: #f0f2f6;
|
23 |
+
}
|
24 |
+
.title {
|
25 |
+
font-size: 32px;
|
26 |
+
text-align: center;
|
27 |
+
color: #4A90E2;
|
28 |
+
font-weight: bold;
|
29 |
+
}
|
30 |
+
.subheader {
|
31 |
+
font-size: 20px;
|
32 |
+
font-weight: bold;
|
33 |
+
color: #333;
|
34 |
+
}
|
35 |
+
.stButton>button {
|
36 |
+
background-color: #4A90E2 !important;
|
37 |
+
color: white !important;
|
38 |
+
font-size: 18px !important;
|
39 |
+
padding: 10px 24px !important;
|
40 |
+
border-radius: 10px !important;
|
41 |
+
border: none !important;
|
42 |
+
}
|
43 |
+
.stAudio {
|
44 |
+
width: 100% !important;
|
45 |
+
}
|
46 |
+
.stMarkdown {
|
47 |
+
font-size: 16px;
|
48 |
+
color: #333;
|
49 |
+
}
|
50 |
+
</style>
|
51 |
+
""",
|
52 |
+
unsafe_allow_html=True
|
53 |
+
)
|
54 |
|
55 |
# β
Auto-Download Vosk Model (Speech-to-Text)
|
56 |
VOSK_MODEL = "vosk-model-small-en-us-0.15"
|
57 |
if not os.path.exists(VOSK_MODEL):
|
58 |
+
st.write("π₯ Downloading Vosk Model...")
|
59 |
subprocess.run(["wget", "-O", "vosk.zip", "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"])
|
60 |
subprocess.run(["unzip", "vosk.zip"])
|
61 |
subprocess.run(["rm", "vosk.zip"])
|
|
|
66 |
# β
Auto-Download Wav2Vec2 Model (Emotion Detection)
|
67 |
WAV2VEC_MODEL = "facebook/wav2vec2-large-xlsr-53"
|
68 |
if not os.path.exists(WAV2VEC_MODEL):
|
69 |
+
st.write(f"π₯ Downloading {WAV2VEC_MODEL}...")
|
70 |
snapshot_download(repo_id=WAV2VEC_MODEL, local_dir=WAV2VEC_MODEL)
|
71 |
|
72 |
# Load emotion detection model
|
73 |
emotion_model = pipeline("audio-classification", model=WAV2VEC_MODEL)
|
74 |
|
75 |
# β
Streamlit UI
|
76 |
+
st.markdown("<div class='title'>ποΈ Speech Detection System</div>", unsafe_allow_html=True)
|
77 |
+
st.markdown("<div class='subheader'>π Upload an audio file for speech-to-text, noise filtering, and emotion analysis.</div>", unsafe_allow_html=True)
|
78 |
|
79 |
uploaded_file = st.file_uploader("Upload an MP3/WAV file", type=["mp3", "wav"])
|
80 |
|
|
|
94 |
# Load audio
|
95 |
y, sr = librosa.load(file_path, sr=16000)
|
96 |
|
97 |
+
# π΅ Display waveform
|
98 |
+
st.markdown("<div class='subheader'>πΌ Audio Waveform:</div>", unsafe_allow_html=True)
|
99 |
fig, ax = plt.subplots(figsize=(10, 4))
|
100 |
librosa.display.waveshow(y, sr=sr, ax=ax)
|
101 |
st.pyplot(fig)
|
102 |
|
103 |
# β
Noise Reduction
|
104 |
+
st.markdown("<div class='subheader'>π Applying Noise Reduction...</div>", unsafe_allow_html=True)
|
105 |
y_denoised = nr.reduce_noise(y=y, sr=sr)
|
106 |
denoised_path = file_path.replace(".wav", "_denoised.wav")
|
107 |
sf.write(denoised_path, y_denoised, sr)
|
|
|
120 |
return result["text"]
|
121 |
|
122 |
transcription = transcribe_audio(file_path)
|
123 |
+
|
124 |
+
st.markdown("<div class='subheader'>π Transcribed Text:</div>", unsafe_allow_html=True)
|
125 |
+
st.markdown(f"<div class='stMarkdown'>{transcription}</div>", unsafe_allow_html=True)
|
126 |
|
127 |
# β
Emotion Detection
|
128 |
+
st.markdown("<div class='subheader'>π Emotion Analysis:</div>", unsafe_allow_html=True)
|
129 |
emotion_result = emotion_model(file_path)
|
|
|
|
|
130 |
st.write(emotion_result)
|
131 |
|
132 |
# β
Play Original & Denoised Audio
|
133 |
+
st.markdown("<div class='subheader'>π Play Audio:</div>", unsafe_allow_html=True)
|
134 |
st.audio(file_path, format="audio/wav", start_time=0)
|
135 |
+
|
136 |
+
st.markdown("<div class='subheader'>π Denoised Audio:</div>", unsafe_allow_html=True)
|
137 |
st.audio(denoised_path, format="audio/wav", start_time=0)
|