Spaces:
Runtime error
Runtime error
File size: 5,039 Bytes
7b18186 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import os
import cog
import tempfile
import zipfile
from pathlib import Path
import argparse
import data.utils
import model.utils as model_utils
from test import predict_song
from model.waveunet import Waveunet
class waveunetPredictor(cog.Predictor):
def setup(self):
"""Init wave u net model"""
parser = argparse.ArgumentParser()
parser.add_argument(
"--instruments",
type=str,
nargs="+",
default=["bass", "drums", "other", "vocals"],
help='List of instruments to separate (default: "bass drums other vocals")',
)
parser.add_argument(
"--cuda", action="store_true", help="Use CUDA (default: False)"
)
parser.add_argument(
"--features",
type=int,
default=32,
help="Number of feature channels per layer",
)
parser.add_argument(
"--load_model",
type=str,
default="checkpoints/waveunet/model",
help="Reload a previously trained model",
)
parser.add_argument("--batch_size", type=int, default=4, help="Batch size")
parser.add_argument(
"--levels", type=int, default=6, help="Number of DS/US blocks"
)
parser.add_argument(
"--depth", type=int, default=1, help="Number of convs per block"
)
parser.add_argument("--sr", type=int, default=44100, help="Sampling rate")
parser.add_argument(
"--channels", type=int, default=2, help="Number of input audio channels"
)
parser.add_argument(
"--kernel_size",
type=int,
default=5,
help="Filter width of kernels. Has to be an odd number",
)
parser.add_argument(
"--output_size", type=float, default=2.0, help="Output duration"
)
parser.add_argument(
"--strides", type=int, default=4, help="Strides in Waveunet"
)
parser.add_argument(
"--conv_type",
type=str,
default="gn",
help="Type of convolution (normal, BN-normalised, GN-normalised): normal/bn/gn",
)
parser.add_argument(
"--res",
type=str,
default="fixed",
help="Resampling strategy: fixed sinc-based lowpass filtering or learned conv layer: fixed/learned",
)
parser.add_argument(
"--separate",
type=int,
default=1,
help="Train separate model for each source (1) or only one (0)",
)
parser.add_argument(
"--feature_growth",
type=str,
default="double",
help="How the features in each layer should grow, either (add) the initial number of features each time, or multiply by 2 (double)",
)
"""
parser.add_argument('--input', type=str, default=str(input),
help="Path to input mixture to be separated")
parser.add_argument('--output', type=str, default=out_path, help="Output path (same folder as input path if not set)")
"""
args = parser.parse_args([])
self.args = args
num_features = (
[args.features * i for i in range(1, args.levels + 1)]
if args.feature_growth == "add"
else [args.features * 2 ** i for i in range(0, args.levels)]
)
target_outputs = int(args.output_size * args.sr)
self.model = Waveunet(
args.channels,
num_features,
args.channels,
args.instruments,
kernel_size=args.kernel_size,
target_output_size=target_outputs,
depth=args.depth,
strides=args.strides,
conv_type=args.conv_type,
res=args.res,
separate=args.separate,
)
if args.cuda:
self.model = model_utils.DataParallel(model)
print("move model to gpu")
self.model.cuda()
print("Loading model from checkpoint " + str(args.load_model))
state = model_utils.load_model(self.model, None, args.load_model, args.cuda)
print("Step", state["step"])
@cog.input("input", type=Path, help="audio mixture path")
def predict(self, input):
"""Separate tracks from input mixture audio"""
out_path = Path(tempfile.mkdtemp())
zip_path = Path(tempfile.mkdtemp()) / "output.zip"
preds = predict_song(self.args, input, self.model)
out_names = []
for inst in preds.keys():
temp_n = os.path.join(
str(out_path), os.path.basename(str(input)) + "_" + inst + ".wav"
)
data.utils.write_wav(temp_n, preds[inst], self.args.sr)
out_names.append(temp_n)
with zipfile.ZipFile(str(zip_path), "w") as zf:
for i in out_names:
zf.write(str(i))
return zip_path
|