tkell
/

tracklist-artist-to-vec

Model card Files Files and versions Community

tracklist-artist-to-vec / parse_tracklists.py

Thor Kell

add python code

628e563 over 1 year ago

history blame contribute delete

1.85 kB

	import csv
	import re


	def load_lines(filename):
	lines = []
	with open(filename) as f:
	for line in f:
	lines.append(line.strip())
	return lines


	def remove_titles_and_bad_tracks(lines):
	is_track = re.compile(r"^\d.*")
	better_lines = []
	for line in lines:
	if is_track.match(line) and "???" not in line:
	better_lines.append(line)
	return better_lines


	def group_by_set(lines):
	is_set_title = re.compile(r".*:$")
	is_track = re.compile(r"^\d.*:")
	grouped_lines = []

	current_set = []
	for line in lines:
	if not line.strip():
	continue
	if is_set_title.match(line) and len(current_set) > 0:
	grouped_lines.append(current_set)
	current_set = []
	elif is_track.match(line) and "???" not in line:
	current_set.append(line)

	return grouped_lines


	def get_grouped_artists(grouped_lines):
	artist_from_track = re.compile(r"\d+\: (.+?) - .+?")
	artist_names = []
	for dj_set_lines in grouped_lines:
	dj_set_artists = []
	for line in dj_set_lines:
	if artist_match := artist_from_track.match(line):
	artist_name = artist_match.group(1).strip().lower()
	dj_set_artists.append(artist_name)
	artist_names.append(dj_set_artists)

	return artist_names


	def write_to_csv(filename):
	with open(output_filename, "w", newline="") as csvfile:
	writer = csv.writer(csvfile)
	for artists in artist_names:
	writer.writerow(artists)


	if __name__ == "__main__":
	filename = "data/radio-original.txt"
	output_filename = "data/artist-names-per-row.csv"

	lines = load_lines(filename)
	grouped_lines = group_by_set(lines)
	artist_names = get_grouped_artists(grouped_lines)
	write_to_csv(output_filename)