Knight-coderr commited on
Commit
0f06115
·
verified ·
1 Parent(s): b871032

Upload 6 files

Browse files
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import librosa
3
+ import soundfile as sf
4
+ import tempfile
5
+ import os
6
+ from utils.noise_removal import remove_noise
7
+ from utils.vad_segmentation import detect_speech_segments
8
+ from utils.speaker_diarization import diarize_speakers
9
+ from utils.noise_classification import classify_noise
10
+
11
+ st.set_page_config(page_title="Audio Analyzer", layout="wide")
12
+ st.title(" Audio Analysis Pipeline")
13
+
14
+ uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"])
15
+
16
+ if uploaded_file:
17
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
18
+ tmp.write(uploaded_file.read())
19
+ tmp_path = tmp.name
20
+
21
+ st.audio(tmp_path, format='audio/wav')
22
+
23
+ st.subheader("1️⃣ Noise Removal")
24
+ denoised_path = tmp_path.replace(".wav", "_denoised.wav")
25
+ remove_noise(tmp_path, denoised_path)
26
+ st.audio(denoised_path, format="audio/wav")
27
+
28
+ st.subheader("2️⃣ Speech Segmentation")
29
+ speech_segments = detect_speech_segments(denoised_path)
30
+ st.write(f"Detected {len(speech_segments)} speech segments.")
31
+ for i, (start, end) in enumerate(speech_segments[:5]):
32
+ st.write(f"Segment {i+1}: {start:.2f}s to {end:.2f}s")
33
+
34
+ st.subheader("3️⃣ Speaker Diarization")
35
+ diarization = diarize_speakers(denoised_path)
36
+ st.text("Speakers detected:")
37
+ for turn, _, speaker in diarization.itertracks(yield_label=True):
38
+ st.write(f"{turn.start:.2f}s - {turn.end:.2f}s: {speaker}")
39
+
40
+ st.subheader("4️⃣ Noise Classification")
41
+ noise_predictions = classify_noise(denoised_path)
42
+ st.write("Top predicted noise classes:")
43
+ for cls, prob in noise_predictions:
44
+ st.write(f"{cls}: {prob:.2f}")
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ soundfile
3
+ librosa
4
+ speechbrain
5
+ pyannote.audio
6
+ torchaudio
7
+ scikit-learn
utils/noise_classification.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import librosa
3
+ import joblib
4
+
5
+ # Load your trained model + label encoder
6
+ clf = joblib.load("models/noise_classifier.pkl")
7
+ label_encoder = joblib.load("models/label_encoder.pkl")
8
+
9
+ def classify_noise(audio_path):
10
+ y, sr = librosa.load(audio_path, sr=None)
11
+ mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
12
+ feature = np.mean(mfcc.T, axis=0).reshape(1, -1)
13
+ probs = clf.predict_proba(feature)[0]
14
+ top_idx = np.argsort(probs)[::-1][:5]
15
+ return [(label_encoder.inverse_transform([i])[0], probs[i]) for i in top_idx]
utils/noise_removal.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from speechbrain.pretrained import SpectralMaskEnhancement
2
+ import torchaudio
3
+
4
+ model = SpectralMaskEnhancement.from_hparams(source="speechbrain/metricgan-plus-voicebank")
5
+
6
+ def remove_noise(input_path, output_path):
7
+ enhanced = model.enhance_file(input_path)
8
+ torchaudio.save(output_path, enhanced[0], enhanced[1])
utils/speaker_diarization.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from pyannote.audio import Pipeline
2
+
3
+ diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
4
+
5
+ def diarize_speakers(audio_path):
6
+ return diarization_pipeline(audio_path)
utils/vad_segmentation.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import torchaudio
2
+ from pyannote.audio import Pipeline
3
+
4
+ pipeline = Pipeline.from_pretrained("pyannote/voice-activity-detection")
5
+
6
+ def detect_speech_segments(audio_path):
7
+ vad_result = pipeline(audio_path)
8
+ return [(segment.start, segment.end) for segment in vad_result.get_timeline().support()]