denoise_and_diarization / main_pipeline.py
agorlanov
denoise_diar_app
156571b
raw
history blame
1.34 kB
import librosa
import torch
import os
from utils.denoise_pipeline import denoise
from utils.diarization_pipeline import diarization
import numpy as np
import pandas as pd
import soundfile as sf
def save_speaker_audios(segments, denoised_audio_path, out_folder='out', out_f=48000):
signal, sr = librosa.load(denoised_audio_path, sr=out_f, mono=True)
os.makedirs(out_folder, exist_ok=True)
out_wav_paths = []
segments = pd.DataFrame(segments)
for label in set(segments.label):
temp_df = segments[segments.label == label]
output_signal = []
for _, r in temp_df.iterrows():
start = int(r["start"] * out_f)
end = int(r["end"] * out_f)
output_signal[start:end] = signal[start:end]
out_wav_path = f'{out_folder}/{label}.wav'
sf.write(out_wav_path, np.concatenate(output_signal), out_f, 'PCM_24')
out_wav_paths.append(out_wav_path)
return out_wav_paths
def main_pipeline(audio_path):
device = 'cuda' if torch.cuda.is_available() else 'cpu'
denoised_audio_path = denoise(audio_path, device)
segments = diarization(denoised_audio_path)
result_diarization = save_speaker_audios(segments, denoised_audio_path)
return denoised_audio_path, result_diarization
if __name__ == '__main__':
main_pipeline('dialog.mp3')