|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import argparse |
|
import copy |
|
import json |
|
import multiprocessing |
|
import os |
|
import random |
|
|
|
import numpy as np |
|
import soundfile as sf |
|
|
|
from nemo.collections.asr.parts.preprocessing.perturb import NoisePerturbation |
|
from nemo.collections.asr.parts.preprocessing.segment import AudioSegment |
|
|
|
rng = None |
|
att_factor = 0.8 |
|
save_noise = False |
|
sample_rate = 16000 |
|
|
|
|
|
def get_out_dir_name(out_dir, input_name, noise_name, snr): |
|
return os.path.join(out_dir, input_name, noise_name + "_" + str(snr) + "db") |
|
|
|
|
|
def create_manifest(input_manifest, noise_manifest, snrs, out_path, save_noise): |
|
os.makedirs(os.path.join(out_path, "manifests"), exist_ok=True) |
|
for snr in snrs: |
|
out_dir = get_out_dir_name( |
|
out_path, |
|
os.path.splitext(os.path.basename(input_manifest))[0], |
|
os.path.splitext(os.path.basename(noise_manifest))[0], |
|
snr, |
|
) |
|
out_mfst = os.path.join( |
|
os.path.join(out_path, "manifests"), |
|
os.path.splitext(os.path.basename(input_manifest))[0] |
|
+ "_" |
|
+ os.path.splitext(os.path.basename(noise_manifest))[0] |
|
+ "_" |
|
+ str(snr) |
|
+ "db" |
|
+ ".json", |
|
) |
|
with open(input_manifest, "r") as inf, open(out_mfst, "w") as outf: |
|
for line in inf: |
|
row = json.loads(line.strip()) |
|
row['audio_filepath'] = os.path.join(out_dir, os.path.basename(row['audio_filepath'])) |
|
if save_noise: |
|
file_ext = os.path.splitext(row['audio_filepath'])[1] |
|
noise_filename = os.path.basename(row['audio_filepath']).replace(file_ext, "_noise" + file_ext) |
|
row['noise_filepath'] = os.path.join(out_dir, noise_filename) |
|
outf.write(json.dumps(row) + "\n") |
|
|
|
|
|
def process_row(row): |
|
audio_file = row['audio_filepath'] |
|
global sample_rate |
|
data_orig = AudioSegment.from_file(audio_file, target_sr=sample_rate, offset=0) |
|
for snr in row['snrs']: |
|
min_snr_db = snr |
|
max_snr_db = snr |
|
global att_factor |
|
perturber = NoisePerturbation( |
|
manifest_path=row['noise_manifest'], min_snr_db=min_snr_db, max_snr_db=max_snr_db, rng=rng |
|
) |
|
out_dir = get_out_dir_name( |
|
row['out_dir'], |
|
os.path.splitext(os.path.basename(row['input_manifest']))[0], |
|
os.path.splitext(os.path.basename(row['noise_manifest']))[0], |
|
snr, |
|
) |
|
os.makedirs(out_dir, exist_ok=True) |
|
out_f = os.path.join(out_dir, os.path.basename(audio_file)) |
|
if os.path.exists(out_f): |
|
continue |
|
data = copy.deepcopy(data_orig) |
|
perturber.perturb(data) |
|
|
|
max_level = np.max(np.abs(data.samples)) |
|
|
|
norm_factor = att_factor / max_level |
|
new_samples = norm_factor * data.samples |
|
sf.write(out_f, new_samples.transpose(), sample_rate) |
|
|
|
global save_noise |
|
if save_noise: |
|
noise_samples = new_samples - norm_factor * data_orig.samples |
|
out_f_ext = os.path.splitext(out_f)[1] |
|
out_f_noise = out_f.replace(out_f_ext, "_noise" + out_f_ext) |
|
sf.write(out_f_noise, noise_samples.transpose(), sample_rate) |
|
|
|
|
|
def add_noise(infile, snrs, noise_manifest, out_dir, num_workers=1): |
|
allrows = [] |
|
|
|
with open(infile, "r") as inf: |
|
for line in inf: |
|
row = json.loads(line.strip()) |
|
row['snrs'] = snrs |
|
row['out_dir'] = out_dir |
|
row['noise_manifest'] = noise_manifest |
|
row['input_manifest'] = infile |
|
allrows.append(row) |
|
pool = multiprocessing.Pool(num_workers) |
|
pool.map(process_row, allrows) |
|
pool.close() |
|
print('Done!') |
|
|
|
|
|
def main(): |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
"--input_manifest", type=str, required=True, help="clean test set", |
|
) |
|
parser.add_argument("--noise_manifest", type=str, required=True, help="path to noise manifest file") |
|
parser.add_argument("--out_dir", type=str, required=True, help="destination directory for audio and manifests") |
|
parser.add_argument("--snrs", type=int, nargs="+", default=[0, 10, 20, 30]) |
|
parser.add_argument("--seed", type=int, default=42) |
|
parser.add_argument("--num_workers", default=1, type=int) |
|
parser.add_argument("--sample_rate", default=16000, type=int) |
|
parser.add_argument( |
|
"--attenuation_factor", |
|
default=0.8, |
|
type=float, |
|
help="Attenuation factor applied on the normalized noise-added samples before writing to wave", |
|
) |
|
parser.add_argument( |
|
"--save_noise", default=False, action="store_true", help="save the noise added to the input signal" |
|
) |
|
|
|
args = parser.parse_args() |
|
global sample_rate |
|
sample_rate = args.sample_rate |
|
global att_factor |
|
att_factor = args.attenuation_factor |
|
global save_noise |
|
save_noise = args.save_noise |
|
global rng |
|
rng = args.seed |
|
num_workers = args.num_workers |
|
|
|
add_noise(args.input_manifest, args.snrs, args.noise_manifest, args.out_dir, num_workers=num_workers) |
|
create_manifest(args.input_manifest, args.noise_manifest, args.snrs, args.out_dir, args.save_noise) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|