File size: 6,356 Bytes
2d8da09 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# USAGE: python add_noise.py --input_manifest=<manifest file of original "clean" dataset>
# --noise_manifest=<manifest file poinitng to noise data>
# --out_dir=<destination directory for noisy audio and manifests>
# --snrs=<list of snrs at which noise should be added to the audio>
# --seed=<seed for random number generator>
# --num_workers=<number of parallel workers>
# To be able to reproduce the same noisy dataset, use a fixed seed and num_workers=1
import argparse
import copy
import json
import multiprocessing
import os
import random
import numpy as np
import soundfile as sf
from nemo.collections.asr.parts.preprocessing.perturb import NoisePerturbation
from nemo.collections.asr.parts.preprocessing.segment import AudioSegment
rng = None
att_factor = 0.8
save_noise = False
sample_rate = 16000
def get_out_dir_name(out_dir, input_name, noise_name, snr):
return os.path.join(out_dir, input_name, noise_name + "_" + str(snr) + "db")
def create_manifest(input_manifest, noise_manifest, snrs, out_path, save_noise):
os.makedirs(os.path.join(out_path, "manifests"), exist_ok=True)
for snr in snrs:
out_dir = get_out_dir_name(
out_path,
os.path.splitext(os.path.basename(input_manifest))[0],
os.path.splitext(os.path.basename(noise_manifest))[0],
snr,
)
out_mfst = os.path.join(
os.path.join(out_path, "manifests"),
os.path.splitext(os.path.basename(input_manifest))[0]
+ "_"
+ os.path.splitext(os.path.basename(noise_manifest))[0]
+ "_"
+ str(snr)
+ "db"
+ ".json",
)
with open(input_manifest, "r") as inf, open(out_mfst, "w") as outf:
for line in inf:
row = json.loads(line.strip())
row['audio_filepath'] = os.path.join(out_dir, os.path.basename(row['audio_filepath']))
if save_noise:
file_ext = os.path.splitext(row['audio_filepath'])[1]
noise_filename = os.path.basename(row['audio_filepath']).replace(file_ext, "_noise" + file_ext)
row['noise_filepath'] = os.path.join(out_dir, noise_filename)
outf.write(json.dumps(row) + "\n")
def process_row(row):
audio_file = row['audio_filepath']
global sample_rate
data_orig = AudioSegment.from_file(audio_file, target_sr=sample_rate, offset=0)
for snr in row['snrs']:
min_snr_db = snr
max_snr_db = snr
global att_factor
perturber = NoisePerturbation(
manifest_path=row['noise_manifest'], min_snr_db=min_snr_db, max_snr_db=max_snr_db, rng=rng
)
out_dir = get_out_dir_name(
row['out_dir'],
os.path.splitext(os.path.basename(row['input_manifest']))[0],
os.path.splitext(os.path.basename(row['noise_manifest']))[0],
snr,
)
os.makedirs(out_dir, exist_ok=True)
out_f = os.path.join(out_dir, os.path.basename(audio_file))
if os.path.exists(out_f):
continue
data = copy.deepcopy(data_orig)
perturber.perturb(data)
max_level = np.max(np.abs(data.samples))
norm_factor = att_factor / max_level
new_samples = norm_factor * data.samples
sf.write(out_f, new_samples.transpose(), sample_rate)
global save_noise
if save_noise:
noise_samples = new_samples - norm_factor * data_orig.samples
out_f_ext = os.path.splitext(out_f)[1]
out_f_noise = out_f.replace(out_f_ext, "_noise" + out_f_ext)
sf.write(out_f_noise, noise_samples.transpose(), sample_rate)
def add_noise(infile, snrs, noise_manifest, out_dir, num_workers=1):
allrows = []
with open(infile, "r") as inf:
for line in inf:
row = json.loads(line.strip())
row['snrs'] = snrs
row['out_dir'] = out_dir
row['noise_manifest'] = noise_manifest
row['input_manifest'] = infile
allrows.append(row)
pool = multiprocessing.Pool(num_workers)
pool.map(process_row, allrows)
pool.close()
print('Done!')
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--input_manifest", type=str, required=True, help="clean test set",
)
parser.add_argument("--noise_manifest", type=str, required=True, help="path to noise manifest file")
parser.add_argument("--out_dir", type=str, required=True, help="destination directory for audio and manifests")
parser.add_argument("--snrs", type=int, nargs="+", default=[0, 10, 20, 30])
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--num_workers", default=1, type=int)
parser.add_argument("--sample_rate", default=16000, type=int)
parser.add_argument(
"--attenuation_factor",
default=0.8,
type=float,
help="Attenuation factor applied on the normalized noise-added samples before writing to wave",
)
parser.add_argument(
"--save_noise", default=False, action="store_true", help="save the noise added to the input signal"
)
args = parser.parse_args()
global sample_rate
sample_rate = args.sample_rate
global att_factor
att_factor = args.attenuation_factor
global save_noise
save_noise = args.save_noise
global rng
rng = args.seed
num_workers = args.num_workers
add_noise(args.input_manifest, args.snrs, args.noise_manifest, args.out_dir, num_workers=num_workers)
create_manifest(args.input_manifest, args.noise_manifest, args.snrs, args.out_dir, args.save_noise)
if __name__ == '__main__':
main()
|