music_mixing_style_transfer

Runtime error

App Files Files Community

jhtonyKoo commited on Oct 24, 2023

Commit

45ea23f

•

1 Parent(s): cb6d216

Update inference/mastering_transfer.py

Browse files

Files changed (1) hide show

inference/mastering_transfer.py +18 -2

inference/mastering_transfer.py CHANGED Viewed

@@ -21,6 +21,7 @@ sys.path.append(os.path.join(os.path.dirname(currentdir), "mixing_style_transfer
 from networks import FXencoder, TCNModel
 from data_loader import *
 import librosa
@@ -83,7 +84,7 @@ class Mastering_Style_Transfer_Inference:
     # Inference whole song
     def inference(self, input_track_path, reference_track_path):
-        print("\n======= Start to inference music mixing style transfer =======")
         # normalized input
         output_name_tag = 'output' if self.args.normalize_input else 'output_notnormed'
@@ -92,6 +93,16 @@ class Mastering_Style_Transfer_Inference:
         input_aud = torch.FloatTensor(input_aud).to(self.device)
         reference_aud = torch.FloatTensor(reference_aud).to(self.device)
         cur_out_dir = './yt_dir/0/'
         os.makedirs(cur_out_dir, exist_ok=True)
         ''' segmentize whole songs into batch '''
@@ -120,7 +131,7 @@ class Mastering_Style_Transfer_Inference:
                 self.models["effects_encoder"].eval()
                 reference_feature = self.models["effects_encoder"](cur_ref_data)
             infered_ref_data_list.append(reference_feature)
-        # compute average value from the extracted exbeddings
         infered_ref_data = torch.stack(infered_ref_data_list)
         infered_ref_data_avg = torch.mean(infered_ref_data.reshape(infered_ref_data.shape[0]*infered_ref_data.shape[1], infered_ref_data.shape[2]), axis=0)
@@ -140,6 +151,11 @@ class Mastering_Style_Transfer_Inference:
         # final output of current instrument
         fin_data_out_mastered = fin_data_out[:, :input_aud.shape[-1]].numpy()
         # remix
         fin_output_path_mastering = os.path.join(cur_out_dir, f"remastered_output.wav")
         sf.write(fin_output_path_mastering, fin_data_out_mastered.transpose(-1, -2), self.args.sample_rate, 'PCM_16')

 from networks import FXencoder, TCNModel
 from data_loader import *
 import librosa
+import pyloudnorm
     # Inference whole song
     def inference(self, input_track_path, reference_track_path):
+        print("\n======= Start to inference music mastering style transfer =======")
         # normalized input
         output_name_tag = 'output' if self.args.normalize_input else 'output_notnormed'
         input_aud = torch.FloatTensor(input_aud).to(self.device)
         reference_aud = torch.FloatTensor(reference_aud).to(self.device)
+        # loudness normalization for stability
+        meter = pyloudnorm.Meter(44100)
+        loudness_in = meter.integrated_loudness(input_aud.transpose())
+        loudness_ref = meter.integrated_loudness(reference_aud.transpose())
+        input_aud = pyloudnorm.normalize.loudness(input_aud, loudness_in, -12)
+        input_aud = np.clip(input_aud, -1., 1.)
+        reference_aud = pyloudnorm.normalize.loudness(reference_aud, loudness_ref, -12)
+        reference_aud = np.clip(reference_aud, -1., 1.)
         cur_out_dir = './yt_dir/0/'
         os.makedirs(cur_out_dir, exist_ok=True)
         ''' segmentize whole songs into batch '''
                 self.models["effects_encoder"].eval()
                 reference_feature = self.models["effects_encoder"](cur_ref_data)
             infered_ref_data_list.append(reference_feature)
+        # compute average value from the extracted embeddings
         infered_ref_data = torch.stack(infered_ref_data_list)
         infered_ref_data_avg = torch.mean(infered_ref_data.reshape(infered_ref_data.shape[0]*infered_ref_data.shape[1], infered_ref_data.shape[2]), axis=0)
         # final output of current instrument
         fin_data_out_mastered = fin_data_out[:, :input_aud.shape[-1]].numpy()
+        # adjust to reference's loudness
+        loudness_out = meter.integrated_loudness(fin_data_out_mastered.transpose())
+        fin_data_out_mastered = pyloudnorm.normalize.loudness(fin_data_out_mastered, loudness_out, loudness_ref)
+        fin_data_out_mastered = np.clip(fin_data_out_mastered, -1., 1.)
         # remix
         fin_output_path_mastering = os.path.join(cur_out_dir, f"remastered_output.wav")
         sf.write(fin_output_path_mastering, fin_data_out_mastered.transpose(-1, -2), self.args.sample_rate, 'PCM_16')