Spaces:
Runtime error
Runtime error
File size: 2,046 Bytes
a227627 c39b8bf 9872c27 156571b a227627 c39b8bf d6b32ee 2f53d2f d6b32ee c39b8bf 156571b c39b8bf d6b32ee c39b8bf d6b32ee c39b8bf 156571b c39b8bf 4c18976 c39b8bf 156571b c39b8bf ad99144 c39b8bf 87d93bf 9872c27 c39b8bf 87d93bf 6144c99 c39b8bf a227627 2f53d2f d172563 a227627 ad99144 a227627 87d93bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import argparse
import librosa
import torch
import os
from tqdm import tqdm
from utils.denoise_pipeline import denoise
from utils.diarization_pipeline import diarization
import numpy as np
import pandas as pd
import soundfile as sf
def filter_small_speech(segments):
segments['duration'] = segments.end - segments.start
durs = segments.groupby('label').sum()
labels = durs[durs['duration'] / durs.sum()['duration'] > 0.015].index
return segments[segments.label.isin(labels)]
def save_speaker_audios(segments, denoised_audio_path, out_folder='out', out_f=48000):
signal, sr = librosa.load(denoised_audio_path, sr=out_f, mono=True)
os.makedirs(out_folder, exist_ok=True)
out_wav_paths = []
segments = pd.DataFrame(segments)
segments = filter_small_speech(segments)
for label in set(segments.label):
temp_df = segments[segments.label == label]
output_signal = []
for _, r in temp_df.iterrows():
start = int(r["start"] * out_f)
end = int(r["end"] * out_f)
output_signal.append(signal[start:end])
out_wav_path = f'{out_folder}/{label}.wav'
sf.write(out_wav_path, np.concatenate(output_signal), out_f, 'PCM_24')
out_wav_paths.append(out_wav_path)
return out_wav_paths[:10]
def main_pipeline(audio_path, out_folder='out'):
device = 'cuda' if torch.cuda.is_available() else 'cpu'
denoised_audio_path = denoise(audio_path, device)
segments = diarization(denoised_audio_path)
denoised_audio_paths = save_speaker_audios(segments, denoised_audio_path, out_folder)
return denoised_audio_path, denoised_audio_paths
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--audio-path', default='dialog.mp3', help='Path to audio')
parser.add_argument('--out-folder-path', default='out', help='Path to result folder')
opt = parser.parse_args()
for _ in tqdm(range(10)):
main_pipeline(audio_path=opt.audio_path, out_folder=opt.out_folder_path)
|