Spaces:
Runtime error
Runtime error
Update inference/mastering_transfer.py
Browse files
inference/mastering_transfer.py
CHANGED
@@ -21,6 +21,7 @@ sys.path.append(os.path.join(os.path.dirname(currentdir), "mixing_style_transfer
|
|
21 |
from networks import FXencoder, TCNModel
|
22 |
from data_loader import *
|
23 |
import librosa
|
|
|
24 |
|
25 |
|
26 |
|
@@ -83,7 +84,7 @@ class Mastering_Style_Transfer_Inference:
|
|
83 |
|
84 |
# Inference whole song
|
85 |
def inference(self, input_track_path, reference_track_path):
|
86 |
-
print("\n======= Start to inference music
|
87 |
# normalized input
|
88 |
output_name_tag = 'output' if self.args.normalize_input else 'output_notnormed'
|
89 |
|
@@ -92,6 +93,16 @@ class Mastering_Style_Transfer_Inference:
|
|
92 |
input_aud = torch.FloatTensor(input_aud).to(self.device)
|
93 |
reference_aud = torch.FloatTensor(reference_aud).to(self.device)
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
cur_out_dir = './yt_dir/0/'
|
96 |
os.makedirs(cur_out_dir, exist_ok=True)
|
97 |
''' segmentize whole songs into batch '''
|
@@ -120,7 +131,7 @@ class Mastering_Style_Transfer_Inference:
|
|
120 |
self.models["effects_encoder"].eval()
|
121 |
reference_feature = self.models["effects_encoder"](cur_ref_data)
|
122 |
infered_ref_data_list.append(reference_feature)
|
123 |
-
# compute average value from the extracted
|
124 |
infered_ref_data = torch.stack(infered_ref_data_list)
|
125 |
infered_ref_data_avg = torch.mean(infered_ref_data.reshape(infered_ref_data.shape[0]*infered_ref_data.shape[1], infered_ref_data.shape[2]), axis=0)
|
126 |
|
@@ -140,6 +151,11 @@ class Mastering_Style_Transfer_Inference:
|
|
140 |
# final output of current instrument
|
141 |
fin_data_out_mastered = fin_data_out[:, :input_aud.shape[-1]].numpy()
|
142 |
|
|
|
|
|
|
|
|
|
|
|
143 |
# remix
|
144 |
fin_output_path_mastering = os.path.join(cur_out_dir, f"remastered_output.wav")
|
145 |
sf.write(fin_output_path_mastering, fin_data_out_mastered.transpose(-1, -2), self.args.sample_rate, 'PCM_16')
|
|
|
21 |
from networks import FXencoder, TCNModel
|
22 |
from data_loader import *
|
23 |
import librosa
|
24 |
+
import pyloudnorm
|
25 |
|
26 |
|
27 |
|
|
|
84 |
|
85 |
# Inference whole song
|
86 |
def inference(self, input_track_path, reference_track_path):
|
87 |
+
print("\n======= Start to inference music mastering style transfer =======")
|
88 |
# normalized input
|
89 |
output_name_tag = 'output' if self.args.normalize_input else 'output_notnormed'
|
90 |
|
|
|
93 |
input_aud = torch.FloatTensor(input_aud).to(self.device)
|
94 |
reference_aud = torch.FloatTensor(reference_aud).to(self.device)
|
95 |
|
96 |
+
# loudness normalization for stability
|
97 |
+
meter = pyloudnorm.Meter(44100)
|
98 |
+
loudness_in = meter.integrated_loudness(input_aud.transpose())
|
99 |
+
loudness_ref = meter.integrated_loudness(reference_aud.transpose())
|
100 |
+
|
101 |
+
input_aud = pyloudnorm.normalize.loudness(input_aud, loudness_in, -12)
|
102 |
+
input_aud = np.clip(input_aud, -1., 1.)
|
103 |
+
reference_aud = pyloudnorm.normalize.loudness(reference_aud, loudness_ref, -12)
|
104 |
+
reference_aud = np.clip(reference_aud, -1., 1.)
|
105 |
+
|
106 |
cur_out_dir = './yt_dir/0/'
|
107 |
os.makedirs(cur_out_dir, exist_ok=True)
|
108 |
''' segmentize whole songs into batch '''
|
|
|
131 |
self.models["effects_encoder"].eval()
|
132 |
reference_feature = self.models["effects_encoder"](cur_ref_data)
|
133 |
infered_ref_data_list.append(reference_feature)
|
134 |
+
# compute average value from the extracted embeddings
|
135 |
infered_ref_data = torch.stack(infered_ref_data_list)
|
136 |
infered_ref_data_avg = torch.mean(infered_ref_data.reshape(infered_ref_data.shape[0]*infered_ref_data.shape[1], infered_ref_data.shape[2]), axis=0)
|
137 |
|
|
|
151 |
# final output of current instrument
|
152 |
fin_data_out_mastered = fin_data_out[:, :input_aud.shape[-1]].numpy()
|
153 |
|
154 |
+
# adjust to reference's loudness
|
155 |
+
loudness_out = meter.integrated_loudness(fin_data_out_mastered.transpose())
|
156 |
+
fin_data_out_mastered = pyloudnorm.normalize.loudness(fin_data_out_mastered, loudness_out, loudness_ref)
|
157 |
+
fin_data_out_mastered = np.clip(fin_data_out_mastered, -1., 1.)
|
158 |
+
|
159 |
# remix
|
160 |
fin_output_path_mastering = os.path.join(cur_out_dir, f"remastered_output.wav")
|
161 |
sf.write(fin_output_path_mastering, fin_data_out_mastered.transpose(-1, -2), self.args.sample_rate, 'PCM_16')
|