|
import os
|
|
import torch
|
|
import torchaudio
|
|
import numpy as np
|
|
from utils import load_audio, preprocess_audio, extract_features
|
|
|
|
def preprocess_dataset(input_dir, output_dir, target_sample_rate=16000, target_length=16000, n_mfcc=13):
|
|
"""
|
|
Preprocess the entire dataset by loading, processing, and saving the audio files.
|
|
|
|
Args:
|
|
input_dir (str): Directory containing the raw audio files.
|
|
output_dir (str): Directory where the processed files will be saved.
|
|
target_sample_rate (int): Desired sample rate for the audio.
|
|
target_length (int): Desired length of the audio in samples.
|
|
n_mfcc (int): Number of MFCC features to extract.
|
|
"""
|
|
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
|
|
for file_name in os.listdir(input_dir):
|
|
if file_name.endswith('.wav'):
|
|
|
|
file_path = os.path.join(input_dir, file_name)
|
|
waveform, sample_rate = load_audio(file_path)
|
|
|
|
|
|
waveform = preprocess_audio(waveform, sample_rate, target_sample_rate, target_length)
|
|
|
|
|
|
features = extract_features(waveform, target_sample_rate, n_mfcc)
|
|
|
|
|
|
output_path = os.path.join(output_dir, file_name.replace('.wav', '.pt'))
|
|
torch.save(features, output_path)
|
|
print(f"Processed and saved {file_name} to {output_path}")
|
|
|
|
def preprocess_single_file(file_path, target_sample_rate=16000, target_length=16000, n_mfcc=13):
|
|
"""
|
|
Preprocess a single audio file and return the processed features.
|
|
|
|
Args:
|
|
file_path (str): Path to the audio file.
|
|
target_sample_rate (int): Desired sample rate for the audio.
|
|
target_length (int): Desired length of the audio in samples.
|
|
n_mfcc (int): Number of MFCC features to extract.
|
|
|
|
Returns:
|
|
features (Tensor): Preprocessed and feature-extracted audio.
|
|
"""
|
|
|
|
waveform, sample_rate = load_audio(file_path)
|
|
|
|
|
|
waveform = preprocess_audio(waveform, sample_rate, target_sample_rate, target_length)
|
|
|
|
|
|
features = extract_features(waveform, target_sample_rate, n_mfcc)
|
|
|
|
return features
|
|
|
|
|
|
if __name__ == "__main__":
|
|
input_dir = "path/to/raw/audio"
|
|
output_dir = "path/to/processed/audio"
|
|
|
|
|
|
preprocess_dataset(input_dir, output_dir)
|
|
|
|
|
|
|
|
|
|
|