denoise_and_diarization / main_pipeline.py
agorlanov
train_fix
3ff6c9f
raw
history blame
1.64 kB
import argparse
import librosa
import torch
import os
from scipy.io.wavfile import write
from tqdm import tqdm
from utils.denoise_pipeline import denoise
from utils.diarization_pipeline import DiarizationPipeline
import numpy as np
import pandas as pd
import soundfile as sf
class CleaningPipeline:
def __init__(self, device):
"""
Cleaning audio pipeline. Contains:
- denoising
- diarization
"""
self.device = device
self.denoiser = denoise
self.diarization = DiarizationPipeline(device)
def __call__(self, input_audio_path: str):
denoised_audio_path = self.denoiser(input_audio_path, self.device)
result_diarization = self.diarization(denoised_audio_path)
if result_diarization != {}:
output_diar_audio_paths = result_diarization['output_diar_audio_paths']
count_speakers = result_diarization['count_speakers']
return [denoised_audio_path] + output_diar_audio_paths + [None] * (19 - count_speakers)
else:
return []
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--audio-path', default='dialog.mp3', help='Path to audio')
parser.add_argument('--device', default='cpu', help='Path to audio')
parser.add_argument('--out-folder-path', default='out', help='Path to result folder')
opt = parser.parse_args()
cleaning_pipeline = CleaningPipeline('cuda:0')
cleaning_pipeline(input_audio_path=opt.audio_path)
# for _ in tqdm(range(10)):
# main_pipeline(audio_path=opt.audio_path, device=opt.device)