|
import csv |
|
import re |
|
|
|
|
|
def load_lines(filename): |
|
lines = [] |
|
with open(filename) as f: |
|
for line in f: |
|
lines.append(line.strip()) |
|
return lines |
|
|
|
|
|
def remove_titles_and_bad_tracks(lines): |
|
is_track = re.compile(r"^\d.*") |
|
better_lines = [] |
|
for line in lines: |
|
if is_track.match(line) and "???" not in line: |
|
better_lines.append(line) |
|
return better_lines |
|
|
|
|
|
def group_by_set(lines): |
|
is_set_title = re.compile(r".*:$") |
|
is_track = re.compile(r"^\d.*:") |
|
grouped_lines = [] |
|
|
|
current_set = [] |
|
for line in lines: |
|
if not line.strip(): |
|
continue |
|
if is_set_title.match(line) and len(current_set) > 0: |
|
grouped_lines.append(current_set) |
|
current_set = [] |
|
elif is_track.match(line) and "???" not in line: |
|
current_set.append(line) |
|
|
|
return grouped_lines |
|
|
|
|
|
def get_grouped_artists(grouped_lines): |
|
artist_from_track = re.compile(r"\d+\: (.+?) - .+?") |
|
artist_names = [] |
|
for dj_set_lines in grouped_lines: |
|
dj_set_artists = [] |
|
for line in dj_set_lines: |
|
if artist_match := artist_from_track.match(line): |
|
artist_name = artist_match.group(1).strip().lower() |
|
dj_set_artists.append(artist_name) |
|
artist_names.append(dj_set_artists) |
|
|
|
return artist_names |
|
|
|
|
|
def write_to_csv(filename): |
|
with open(output_filename, "w", newline="") as csvfile: |
|
writer = csv.writer(csvfile) |
|
for artists in artist_names: |
|
writer.writerow(artists) |
|
|
|
|
|
if __name__ == "__main__": |
|
filename = "data/radio-original.txt" |
|
output_filename = "data/artist-names-per-row.csv" |
|
|
|
lines = load_lines(filename) |
|
grouped_lines = group_by_set(lines) |
|
artist_names = get_grouped_artists(grouped_lines) |
|
write_to_csv(output_filename) |
|
|