jhtonyKoo commited on
Commit
45ea23f
1 Parent(s): cb6d216

Update inference/mastering_transfer.py

Browse files
Files changed (1) hide show
  1. inference/mastering_transfer.py +18 -2
inference/mastering_transfer.py CHANGED
@@ -21,6 +21,7 @@ sys.path.append(os.path.join(os.path.dirname(currentdir), "mixing_style_transfer
21
  from networks import FXencoder, TCNModel
22
  from data_loader import *
23
  import librosa
 
24
 
25
 
26
 
@@ -83,7 +84,7 @@ class Mastering_Style_Transfer_Inference:
83
 
84
  # Inference whole song
85
  def inference(self, input_track_path, reference_track_path):
86
- print("\n======= Start to inference music mixing style transfer =======")
87
  # normalized input
88
  output_name_tag = 'output' if self.args.normalize_input else 'output_notnormed'
89
 
@@ -92,6 +93,16 @@ class Mastering_Style_Transfer_Inference:
92
  input_aud = torch.FloatTensor(input_aud).to(self.device)
93
  reference_aud = torch.FloatTensor(reference_aud).to(self.device)
94
 
 
 
 
 
 
 
 
 
 
 
95
  cur_out_dir = './yt_dir/0/'
96
  os.makedirs(cur_out_dir, exist_ok=True)
97
  ''' segmentize whole songs into batch '''
@@ -120,7 +131,7 @@ class Mastering_Style_Transfer_Inference:
120
  self.models["effects_encoder"].eval()
121
  reference_feature = self.models["effects_encoder"](cur_ref_data)
122
  infered_ref_data_list.append(reference_feature)
123
- # compute average value from the extracted exbeddings
124
  infered_ref_data = torch.stack(infered_ref_data_list)
125
  infered_ref_data_avg = torch.mean(infered_ref_data.reshape(infered_ref_data.shape[0]*infered_ref_data.shape[1], infered_ref_data.shape[2]), axis=0)
126
 
@@ -140,6 +151,11 @@ class Mastering_Style_Transfer_Inference:
140
  # final output of current instrument
141
  fin_data_out_mastered = fin_data_out[:, :input_aud.shape[-1]].numpy()
142
 
 
 
 
 
 
143
  # remix
144
  fin_output_path_mastering = os.path.join(cur_out_dir, f"remastered_output.wav")
145
  sf.write(fin_output_path_mastering, fin_data_out_mastered.transpose(-1, -2), self.args.sample_rate, 'PCM_16')
 
21
  from networks import FXencoder, TCNModel
22
  from data_loader import *
23
  import librosa
24
+ import pyloudnorm
25
 
26
 
27
 
 
84
 
85
  # Inference whole song
86
  def inference(self, input_track_path, reference_track_path):
87
+ print("\n======= Start to inference music mastering style transfer =======")
88
  # normalized input
89
  output_name_tag = 'output' if self.args.normalize_input else 'output_notnormed'
90
 
 
93
  input_aud = torch.FloatTensor(input_aud).to(self.device)
94
  reference_aud = torch.FloatTensor(reference_aud).to(self.device)
95
 
96
+ # loudness normalization for stability
97
+ meter = pyloudnorm.Meter(44100)
98
+ loudness_in = meter.integrated_loudness(input_aud.transpose())
99
+ loudness_ref = meter.integrated_loudness(reference_aud.transpose())
100
+
101
+ input_aud = pyloudnorm.normalize.loudness(input_aud, loudness_in, -12)
102
+ input_aud = np.clip(input_aud, -1., 1.)
103
+ reference_aud = pyloudnorm.normalize.loudness(reference_aud, loudness_ref, -12)
104
+ reference_aud = np.clip(reference_aud, -1., 1.)
105
+
106
  cur_out_dir = './yt_dir/0/'
107
  os.makedirs(cur_out_dir, exist_ok=True)
108
  ''' segmentize whole songs into batch '''
 
131
  self.models["effects_encoder"].eval()
132
  reference_feature = self.models["effects_encoder"](cur_ref_data)
133
  infered_ref_data_list.append(reference_feature)
134
+ # compute average value from the extracted embeddings
135
  infered_ref_data = torch.stack(infered_ref_data_list)
136
  infered_ref_data_avg = torch.mean(infered_ref_data.reshape(infered_ref_data.shape[0]*infered_ref_data.shape[1], infered_ref_data.shape[2]), axis=0)
137
 
 
151
  # final output of current instrument
152
  fin_data_out_mastered = fin_data_out[:, :input_aud.shape[-1]].numpy()
153
 
154
+ # adjust to reference's loudness
155
+ loudness_out = meter.integrated_loudness(fin_data_out_mastered.transpose())
156
+ fin_data_out_mastered = pyloudnorm.normalize.loudness(fin_data_out_mastered, loudness_out, loudness_ref)
157
+ fin_data_out_mastered = np.clip(fin_data_out_mastered, -1., 1.)
158
+
159
  # remix
160
  fin_output_path_mastering = os.path.join(cur_out_dir, f"remastered_output.wav")
161
  sf.write(fin_output_path_mastering, fin_data_out_mastered.transpose(-1, -2), self.args.sample_rate, 'PCM_16')