File size: 2,784 Bytes
fad9b18
 
ae33b95
 
fad9b18
 
 
 
 
 
ae33b95
fad9b18
ae33b95
 
307d0c6
 
ae33b95
307d0c6
 
 
 
 
ae33b95
fad9b18
 
ae33b95
fad9b18
307d0c6
 
fad9b18
 
 
 
 
 
 
 
307d0c6
fad9b18
 
 
 
 
 
 
 
 
 
307d0c6
fad9b18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307d0c6
fad9b18
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import streamlit as st
import tempfile
import os
from pydub import AudioSegment
from utils.noise_removal import remove_noise
from utils.vad_segmentation import vad_segmentation
from utils.speaker_diarization import diarize_speakers
from utils.noise_classification import classify_noise

st.set_page_config(page_title="Audio Analyzer", layout="wide")
st.title("Audio Analysis Pipeline")

uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "m4a", "mp4a"])

def prepare_audio(uploaded_file):
    file_ext = uploaded_file.name.split('.')[-1].lower()
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as out_wav:
        if file_ext == "wav":
            out_wav.write(uploaded_file.read())
        else:
            audio = AudioSegment.from_file(uploaded_file, format=file_ext)
            audio.export(out_wav.name, format="wav")
        return out_wav.name

if uploaded_file:
    st.audio(uploaded_file, format="audio/wav")

    with st.spinner("🔄 Preparing audio..."):
        tmp_path = prepare_audio(uploaded_file)

    try:
        st.subheader("1️⃣ Noise Removal")
        denoised_path = tmp_path.replace(".wav", "_denoised.wav")
        with st.spinner("Removing noise..."):
            remove_noise(tmp_path, denoised_path)
        st.audio(denoised_path, format="audio/wav")
    except Exception as e:
        st.error(f" Noise removal failed: {e}")

    try:
        st.subheader("2️⃣ Speech Segmentation")
        with st.spinner("Running Voice Activity Detection..."):
            speech_annotation = vad_segmentation(denoised_path)
            segments = [(seg.start, seg.end) for seg in speech_annotation.itersegments()]
        st.write(f" Detected {len(segments)} speech segments.")
        for i, (start, end) in enumerate(segments[:5]):
            st.write(f"Segment {i+1}: {start:.2f}s to {end:.2f}s")
    except Exception as e:
        st.error(f" VAD failed: {e}")

    try:
        st.subheader("3️⃣ Speaker Diarization")
        with st.spinner("Diarizing speakers..."):
            diarization = diarize_speakers(denoised_path)
        st.text(" Speakers detected:")
        for turn, _, speaker in diarization.itertracks(yield_label=True):
            st.write(f"{turn.start:.2f}s - {turn.end:.2f}s: Speaker {speaker}")
    except Exception as e:
        st.error(f"Speaker diarization failed: {e}")

    try:
        st.subheader("4️⃣ Noise Classification")
        with st.spinner("Classifying background noise..."):
            noise_predictions = classify_noise(denoised_path)
        st.write("Top predicted noise classes:")
        for cls, prob in noise_predictions:
            st.write(f"{cls}: {prob:.2f}")
    except Exception as e:
        st.error(f"Noise classification failed: {e}")