crystal-technologies's picture
Upload 1287 files
2d8da09
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# USAGE: python add_noise.py --input_manifest=<manifest file of original "clean" dataset>
# --noise_manifest=<manifest file poinitng to noise data>
# --out_dir=<destination directory for noisy audio and manifests>
# --snrs=<list of snrs at which noise should be added to the audio>
# --seed=<seed for random number generator>
# --num_workers=<number of parallel workers>
# To be able to reproduce the same noisy dataset, use a fixed seed and num_workers=1
import argparse
import copy
import json
import multiprocessing
import os
import random
import numpy as np
import soundfile as sf
from nemo.collections.asr.parts.preprocessing.perturb import NoisePerturbation
from nemo.collections.asr.parts.preprocessing.segment import AudioSegment
rng = None
att_factor = 0.8
save_noise = False
sample_rate = 16000
def get_out_dir_name(out_dir, input_name, noise_name, snr):
return os.path.join(out_dir, input_name, noise_name + "_" + str(snr) + "db")
def create_manifest(input_manifest, noise_manifest, snrs, out_path, save_noise):
os.makedirs(os.path.join(out_path, "manifests"), exist_ok=True)
for snr in snrs:
out_dir = get_out_dir_name(
out_path,
os.path.splitext(os.path.basename(input_manifest))[0],
os.path.splitext(os.path.basename(noise_manifest))[0],
snr,
)
out_mfst = os.path.join(
os.path.join(out_path, "manifests"),
os.path.splitext(os.path.basename(input_manifest))[0]
+ "_"
+ os.path.splitext(os.path.basename(noise_manifest))[0]
+ "_"
+ str(snr)
+ "db"
+ ".json",
)
with open(input_manifest, "r") as inf, open(out_mfst, "w") as outf:
for line in inf:
row = json.loads(line.strip())
row['audio_filepath'] = os.path.join(out_dir, os.path.basename(row['audio_filepath']))
if save_noise:
file_ext = os.path.splitext(row['audio_filepath'])[1]
noise_filename = os.path.basename(row['audio_filepath']).replace(file_ext, "_noise" + file_ext)
row['noise_filepath'] = os.path.join(out_dir, noise_filename)
outf.write(json.dumps(row) + "\n")
def process_row(row):
audio_file = row['audio_filepath']
global sample_rate
data_orig = AudioSegment.from_file(audio_file, target_sr=sample_rate, offset=0)
for snr in row['snrs']:
min_snr_db = snr
max_snr_db = snr
global att_factor
perturber = NoisePerturbation(
manifest_path=row['noise_manifest'], min_snr_db=min_snr_db, max_snr_db=max_snr_db, rng=rng
)
out_dir = get_out_dir_name(
row['out_dir'],
os.path.splitext(os.path.basename(row['input_manifest']))[0],
os.path.splitext(os.path.basename(row['noise_manifest']))[0],
snr,
)
os.makedirs(out_dir, exist_ok=True)
out_f = os.path.join(out_dir, os.path.basename(audio_file))
if os.path.exists(out_f):
continue
data = copy.deepcopy(data_orig)
perturber.perturb(data)
max_level = np.max(np.abs(data.samples))
norm_factor = att_factor / max_level
new_samples = norm_factor * data.samples
sf.write(out_f, new_samples.transpose(), sample_rate)
global save_noise
if save_noise:
noise_samples = new_samples - norm_factor * data_orig.samples
out_f_ext = os.path.splitext(out_f)[1]
out_f_noise = out_f.replace(out_f_ext, "_noise" + out_f_ext)
sf.write(out_f_noise, noise_samples.transpose(), sample_rate)
def add_noise(infile, snrs, noise_manifest, out_dir, num_workers=1):
allrows = []
with open(infile, "r") as inf:
for line in inf:
row = json.loads(line.strip())
row['snrs'] = snrs
row['out_dir'] = out_dir
row['noise_manifest'] = noise_manifest
row['input_manifest'] = infile
allrows.append(row)
pool = multiprocessing.Pool(num_workers)
pool.map(process_row, allrows)
pool.close()
print('Done!')
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--input_manifest", type=str, required=True, help="clean test set",
)
parser.add_argument("--noise_manifest", type=str, required=True, help="path to noise manifest file")
parser.add_argument("--out_dir", type=str, required=True, help="destination directory for audio and manifests")
parser.add_argument("--snrs", type=int, nargs="+", default=[0, 10, 20, 30])
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--num_workers", default=1, type=int)
parser.add_argument("--sample_rate", default=16000, type=int)
parser.add_argument(
"--attenuation_factor",
default=0.8,
type=float,
help="Attenuation factor applied on the normalized noise-added samples before writing to wave",
)
parser.add_argument(
"--save_noise", default=False, action="store_true", help="save the noise added to the input signal"
)
args = parser.parse_args()
global sample_rate
sample_rate = args.sample_rate
global att_factor
att_factor = args.attenuation_factor
global save_noise
save_noise = args.save_noise
global rng
rng = args.seed
num_workers = args.num_workers
add_noise(args.input_manifest, args.snrs, args.noise_manifest, args.out_dir, num_workers=num_workers)
create_manifest(args.input_manifest, args.noise_manifest, args.snrs, args.out_dir, args.save_noise)
if __name__ == '__main__':
main()