import argparse import librosa import torch import os from scipy.io.wavfile import write from tqdm import tqdm from utils.denoise_pipeline import denoise from utils.diarization_pipeline import DiarizationPipeline import numpy as np import pandas as pd import soundfile as sf class CleaningPipeline: def __init__(self, device): """ Cleaning audio pipeline. Contains: - denoising - diarization """ self.device = device self.denoiser = denoise self.diarization = DiarizationPipeline(device) def __call__(self, input_audio_path: str): denoised_audio_path = self.denoiser(input_audio_path, self.device) result_diarization = self.diarization(denoised_audio_path) if result_diarization != {}: output_diar_audio_paths = result_diarization['output_diar_audio_paths'] count_speakers = result_diarization['count_speakers'] return [denoised_audio_path] + output_diar_audio_paths + [None] * (19 - count_speakers) else: return [] if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--audio-path', default='dialog.mp3', help='Path to audio') parser.add_argument('--device', default='cpu', help='Path to audio') parser.add_argument('--out-folder-path', default='out', help='Path to result folder') opt = parser.parse_args() cleaning_pipeline = CleaningPipeline('cuda:0') cleaning_pipeline(input_audio_path=opt.audio_path) # for _ in tqdm(range(10)): # main_pipeline(audio_path=opt.audio_path, device=opt.device)