Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,777 Bytes
91d712c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# Env - chords_extraction on devfair
import pickle
import argparse
from chord_extractor.extractors import Chordino # type: ignore
from chord_extractor import clear_conversion_cache, LabelledChordSequence # type: ignore
import os
from tqdm import tqdm
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--src_jsonl_file', type=str, required=True,
help='abs path to .jsonl file containing list of absolute file paths seperated by new line')
parser.add_argument('--target_output_dir', type=str, required=True,
help='target directory to save parsed chord files to, individual files will be saved inside')
parser.add_argument("--override", action="store_true")
args = parser.parse_args()
return args
def save_to_db_cb(tgt_dir: str):
# Every time one of the files has had chords extracted, receive the chords here
# along with the name of the original file and then run some logic here, e.g. to
# save the latest data to DB
def inner(results: LabelledChordSequence):
path = results.id.split(".wav")
sequence = [(item.chord, item.timestamp) for item in results.sequence]
if len(path) != 2:
print("Something")
print(path)
else:
file_idx = path[0].split("/")[-1]
with open(f"{tgt_dir}/{file_idx}.chords", "wb") as f:
# dump the object to the file
pickle.dump(sequence, f)
return inner
if __name__ == "__main__":
'''This script extracts chord data from a list of audio files using the Chordino extractor,
and saves the extracted chords to individual files in a target directory.'''
print("parsed args")
args = parse_args()
files_to_extract_from = list()
with open(args.src_jsonl_file, "r") as json_file:
for line in tqdm(json_file.readlines()):
# fpath = json.loads(line.replace("\n", ""))['path']
fpath = line.replace("\n", "")
if not args.override:
fname = fpath.split("/")[-1].replace(".wav", ".chords")
if os.path.exists(f"{args.target_output_dir}/{fname}"):
continue
files_to_extract_from.append(line.replace("\n", ""))
print(f"num files to parse: {len(files_to_extract_from)}")
chordino = Chordino()
# Optionally clear cache of file conversions (e.g. wav files that have been converted from midi)
clear_conversion_cache()
# Run bulk extraction
res = chordino.extract_many(
files_to_extract_from,
callback=save_to_db_cb(args.target_output_dir),
num_extractors=80,
num_preprocessors=80,
max_files_in_cache=400,
stop_on_error=False,
) |