TIMBOVILL commited on
Commit
2542bcb
·
verified ·
1 Parent(s): 3ed071b

Upload 4 files

Browse files
src/modules/console_colors.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Colors for the console"""
2
+
3
+ ULTRASINGER_HEAD = "\033[92m[UltraSinger]\033[0m"
4
+
5
+
6
+ def blue_highlighted(text: str) -> str:
7
+ """Returns a blue highlighted text"""
8
+ return f"{Bcolors.blue}{text}{Bcolors.endc}"
9
+
10
+
11
+ def gold_highlighted(text: str) -> str:
12
+ """Returns a gold highlighted text"""
13
+ return f"{Bcolors.gold}{text}{Bcolors.endc}"
14
+
15
+
16
+ def light_blue_highlighted(text: str) -> str:
17
+ """Returns a light blue highlighted text"""
18
+ return f"{Bcolors.light_blue}{text}{Bcolors.endc}"
19
+
20
+
21
+ def underlined(text: str) -> str:
22
+ """Returns an underlined text"""
23
+ return f"{Bcolors.underline}{text}{Bcolors.endc}"
24
+
25
+
26
+ def red_highlighted(text: str) -> str:
27
+ """Returns a red highlighted text"""
28
+ return f"{Bcolors.red}{text}{Bcolors.endc}"
29
+
30
+
31
+ def cyan_highlighted(text: str) -> str:
32
+ """Returns a cyan highlighted text"""
33
+ return f"{Bcolors.cyan}{text}{Bcolors.endc}"
34
+
35
+
36
+ class Bcolors:
37
+ """Colors for the console"""
38
+
39
+ blue = "\033[94m"
40
+ red = "\033[91m"
41
+ light_blue = "\033[96m"
42
+ cyan = "\033[36m"
43
+ gold = "\033[93m"
44
+ underline = "\033[4m"
45
+ endc = "\033[0m"
src/modules/csv_handler.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """CSV export module"""
2
+
3
+ import csv
4
+
5
+ from modules.console_colors import ULTRASINGER_HEAD
6
+ from modules.Speech_Recognition.TranscribedData import TranscribedData
7
+
8
+
9
+ def export_transcribed_data_to_csv(transcribed_data: list[TranscribedData], filename: str) -> None:
10
+ """Export transcribed data to csv"""
11
+ print(f"{ULTRASINGER_HEAD} Exporting transcribed data to CSV")
12
+
13
+ with open(filename, "w", encoding="utf-8", newline="") as csvfile:
14
+ writer = csv.writer(csvfile)
15
+ header = ["word", "start", "end", "confidence"]
16
+ writer.writerow(header)
17
+ for i, data in enumerate(transcribed_data):
18
+ writer.writerow(
19
+ [
20
+ data.word,
21
+ data.start,
22
+ data.end,
23
+ data.conf,
24
+ ]
25
+ )
26
+
27
+
28
+ def write_lists_to_csv(times, frequencies, confidences, filename: str):
29
+ """Write lists to csv"""
30
+ with open(filename, "w", encoding="utf-8", newline="") as csvfile:
31
+ writer = csv.writer(csvfile)
32
+ header = ["time", "frequency", "confidence"]
33
+ writer.writerow(header)
34
+ for i in enumerate(times):
35
+ pos = i[0]
36
+ writer.writerow([times[pos], frequencies[pos], confidences[pos]])
37
+
38
+
39
+ def read_data_from_csv(filename: str):
40
+ """Read data from csv"""
41
+ csv_data = []
42
+ with open(filename, "r", encoding="utf-8") as csv_file:
43
+ csv_reader = csv.reader(csv_file)
44
+ for line in csv_reader:
45
+ csv_data.append(line)
46
+ headless_data = csv_data[1:]
47
+ return headless_data
src/modules/plot.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Plot transcribed data"""
2
+ import os
3
+ from dataclasses import dataclass
4
+ from re import sub
5
+
6
+ import librosa
7
+ import numpy
8
+ from matplotlib import pyplot as plt
9
+ from matplotlib.patches import Rectangle
10
+
11
+ from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue
12
+ from modules.console_colors import ULTRASINGER_HEAD
13
+ from modules.Pitcher.pitched_data import PitchedData
14
+ from modules.Pitcher.pitcher import get_pitched_data_with_high_confidence
15
+ from modules.Speech_Recognition.TranscribedData import TranscribedData
16
+
17
+
18
+ @dataclass
19
+ class PlottedNote:
20
+ """Plotted note"""
21
+
22
+ note: str
23
+ frequency: float
24
+ frequency_log_10: float
25
+ octave: int
26
+
27
+
28
+ NOTES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
29
+ OCTAVES = [0, 1, 2, 3, 4, 5, 6, 7, 8]
30
+ X_TICK_SIZE = 5
31
+
32
+
33
+ def get_frequency_range(midi_note: str) -> float:
34
+ """Get frequency range"""
35
+ midi = librosa.note_to_midi(midi_note)
36
+ frequency_range = librosa.midi_to_hz(midi + 1) - librosa.midi_to_hz(midi)
37
+ return frequency_range
38
+
39
+
40
+ def create_plot_notes(notes: list[str], octaves: list[int]) -> list[PlottedNote]:
41
+ """Create list of notes for plot y axis"""
42
+ plotted_notes = []
43
+ for octave in octaves:
44
+ for note in notes:
45
+ note_with_octave = note + str(octave)
46
+ frequency = librosa.note_to_hz(note_with_octave)
47
+ frequency_log_10 = numpy.log10([frequency])[0]
48
+ plotted_notes.append(
49
+ PlottedNote(note_with_octave, frequency, frequency_log_10, octave)
50
+ )
51
+
52
+ return plotted_notes
53
+
54
+
55
+ PLOTTED_NOTES = create_plot_notes(NOTES, OCTAVES)
56
+
57
+
58
+ def plot(
59
+ pitched_data: PitchedData,
60
+ output_path: str,
61
+ transcribed_data: list[TranscribedData] = None,
62
+ ultrastar_class: UltrastarTxtValue = None,
63
+ midi_notes: list[str] = None,
64
+ title: str = None,
65
+ ) -> None:
66
+ """Plot transcribed data"""
67
+
68
+ # determine time between to datapoints if there is no gap (this is the step size crepe ran with)
69
+ step_size = pitched_data.times[1]
70
+ pitched_data = get_pitched_data_with_high_confidence(pitched_data)
71
+
72
+ if len(pitched_data.frequencies) < 2:
73
+ print(f"{ULTRASINGER_HEAD} Plot can't be created; too few datapoints")
74
+ return
75
+
76
+ print(
77
+ f"{ULTRASINGER_HEAD} Creating plot{': ' + title if title is not None else ''}"
78
+ )
79
+
80
+ # map each frequency to logarithm with base 10 for a linear progression of values between the musical notes
81
+ # see http://www.phon.ox.ac.uk/jcoleman/LOGARITH.htm
82
+ frequencies_log_10 = numpy.log10(pitched_data.frequencies)
83
+
84
+ # add 'nan' where there are gaps for frequency values so the graph is only continuous where it should be
85
+ pitched_data_with_gaps = create_gaps(pitched_data, step_size)
86
+ frequencies_log_10_with_gaps = numpy.log10(pitched_data_with_gaps.frequencies)
87
+
88
+ # dynamically set the minimum and maximum values for x and y axes based on data
89
+ y_lower_bound, y_upper_bound = determine_bounds(frequencies_log_10)
90
+ ymin = max(0, y_lower_bound - 0.05)
91
+ ymax = y_upper_bound + 0.05
92
+ plt.ylim(ymin, ymax)
93
+ xmin = min(pitched_data.times)
94
+ xmax = max(pitched_data.times)
95
+ plt.xlim(xmin, xmax)
96
+
97
+ plt.xlabel("Time (s)")
98
+ plt.ylabel("log10 of Frequency (Hz)")
99
+
100
+ notes_within_range = set_axes_ticks_and_labels(pitched_data.times, ymin, ymax)
101
+
102
+ # draw horizontal lines for each note
103
+ for note in notes_within_range:
104
+ color = "b"
105
+ if note.note.startswith("C") and not note.note.startswith("C#"):
106
+ color = "r"
107
+ plt.axhline(y=note.frequency_log_10, color=color, linestyle="-", linewidth=0.2)
108
+
109
+ # create line and scatter plot of pitched data
110
+ plt.plot(pitched_data_with_gaps.times, frequencies_log_10_with_gaps, linewidth=0.1)
111
+ scatter_path_collection = plt.scatter(
112
+ pitched_data_with_gaps.times,
113
+ frequencies_log_10_with_gaps,
114
+ s=5,
115
+ c=pitched_data_with_gaps.confidence,
116
+ cmap=plt.colormaps.get_cmap("gray").reversed(),
117
+ vmin=0,
118
+ vmax=1,
119
+ )
120
+ plt.figure(1).colorbar(scatter_path_collection, label="confidence")
121
+
122
+ set_figure_dimensions(xmax - xmin, y_upper_bound - y_lower_bound)
123
+
124
+ plot_words(transcribed_data, ultrastar_class, midi_notes)
125
+
126
+ if title is not None:
127
+ plt.title(label=title)
128
+
129
+ plt.figure(1).tight_layout(h_pad=1.4)
130
+
131
+ dpi = 200
132
+ plt.savefig(
133
+ os.path.join(
134
+ output_path, f"plot{'' if title is None else '_' + snake(title)}.svg"
135
+ ),
136
+ dpi=dpi,
137
+ )
138
+ plt.clf()
139
+ plt.cla()
140
+
141
+
142
+ def set_axes_ticks_and_labels(confidence, ymin, ymax):
143
+ """Set ticks and their labels for x and y axes"""
144
+ notes_within_range = [
145
+ x for x in PLOTTED_NOTES if ymin <= x.frequency_log_10 <= ymax
146
+ ]
147
+ plt.yticks(
148
+ [x.frequency_log_10 for x in notes_within_range],
149
+ [x.note for x in notes_within_range],
150
+ )
151
+
152
+ first_time = min(confidence)
153
+ min_tick = first_time // X_TICK_SIZE * X_TICK_SIZE + X_TICK_SIZE
154
+
155
+ last_time = max(confidence)
156
+ max_tick = last_time // X_TICK_SIZE * X_TICK_SIZE + 0.1
157
+ ticks = numpy.arange(min_tick, max_tick, X_TICK_SIZE, dtype=int).tolist()
158
+
159
+ if len(ticks) == 0 or ticks[0] != first_time:
160
+ ticks.insert(0, first_time)
161
+
162
+ if len(ticks) == 1 or ticks[-1] != last_time:
163
+ ticks.append(last_time)
164
+
165
+ plt.xticks(ticks, [str(x) for x in ticks])
166
+ return notes_within_range
167
+
168
+
169
+ def determine_bounds(frequency_log_10: list[float]) -> tuple[float, float]:
170
+ """Determine bounds based on 1st and 99th percentile of data"""
171
+ lower = numpy.percentile(numpy.array(frequency_log_10), 1)
172
+ upper = numpy.percentile(numpy.array(frequency_log_10), 99)
173
+
174
+ return lower, upper
175
+
176
+
177
+ def set_figure_dimensions(time_range, frequency_log_10_range):
178
+ """Dynamically scale the figure dimensions based on the duration/frequency amplitude of the song"""
179
+ height = frequency_log_10_range / 0.06
180
+ width = time_range / 2
181
+
182
+ plt.figure(1).set_figwidth(max(6.4, width))
183
+ plt.figure(1).set_figheight(max(4, height))
184
+
185
+
186
+ def create_gaps(pitched_data: PitchedData, step_size: float) -> PitchedData:
187
+ """
188
+ Add 'nan' where there are no high confidence frequency values.
189
+ This way the graph is only continuous where it should be.
190
+
191
+ """
192
+ pitched_data_with_gaps = PitchedData([], [], [])
193
+
194
+ previous_time = 0
195
+ for i, time in enumerate(pitched_data.times):
196
+ comes_right_after_previous = time - previous_time <= step_size
197
+ previous_frequency_is_not_gap = (
198
+ len(pitched_data_with_gaps.frequencies) > 0
199
+ and str(pitched_data_with_gaps.frequencies[-1]) != "nan"
200
+ )
201
+ if previous_frequency_is_not_gap and not comes_right_after_previous:
202
+ pitched_data_with_gaps.times.append(time)
203
+ pitched_data_with_gaps.frequencies.append(float("nan"))
204
+ pitched_data_with_gaps.confidence.append(pitched_data.confidence[i])
205
+
206
+ pitched_data_with_gaps.times.append(time)
207
+ pitched_data_with_gaps.frequencies.append(pitched_data.frequencies[i])
208
+ pitched_data_with_gaps.confidence.append(pitched_data.confidence[i])
209
+
210
+ previous_time = time
211
+
212
+ return pitched_data_with_gaps
213
+
214
+
215
+ def plot_word(midi_note: str, start, end, word):
216
+ note_frequency = librosa.note_to_hz(midi_note)
217
+ frequency_range = get_frequency_range(midi_note)
218
+
219
+ half_frequency_range = frequency_range / 2
220
+ height = (
221
+ numpy.log10([note_frequency + half_frequency_range])[0]
222
+ - numpy.log10([note_frequency - half_frequency_range])[0]
223
+ )
224
+ xy_start_pos = (
225
+ start,
226
+ numpy.log10([note_frequency - half_frequency_range])[0],
227
+ )
228
+ width = end - start
229
+ rect = Rectangle(
230
+ xy_start_pos,
231
+ width,
232
+ height,
233
+ edgecolor="none",
234
+ facecolor="red",
235
+ alpha=0.5,
236
+ )
237
+ plt.gca().add_patch(rect)
238
+ plt.text(start + width / 4, numpy.log10([note_frequency + half_frequency_range])[0], word, rotation=90)
239
+
240
+
241
+ def plot_words(transcribed_data: list[TranscribedData], ultrastar_class: UltrastarTxtValue, midi_notes: list[str]):
242
+ """Draw rectangles for each word"""
243
+ if transcribed_data is not None:
244
+ for i, data in enumerate(transcribed_data):
245
+ plot_word(midi_notes[i], data.start, data.end, data.word)
246
+
247
+ elif ultrastar_class is not None:
248
+ for i, data in enumerate(ultrastar_class.words):
249
+ plot_word(midi_notes[i], ultrastar_class.startTimes[i], ultrastar_class.endTimes[i],
250
+ ultrastar_class.words[i])
251
+
252
+
253
+ def snake(s):
254
+ """Turn any string into a snake case string"""
255
+ return "_".join(
256
+ sub(
257
+ "([A-Z][a-z]+)", r" \1", sub("([A-Z]+)", r" \1", s.replace("-", " "))
258
+ ).split()
259
+ ).lower()
260
+
261
+
262
+ def plot_spectrogram(audio_seperation_path: str,
263
+ output_path: str,
264
+ title: str = "Spectrogram",
265
+
266
+ ) -> None:
267
+ """Plot spectrogram of data"""
268
+
269
+ print(
270
+ f"{ULTRASINGER_HEAD} Creating plot{': ' + title}"
271
+ )
272
+
273
+ audio, sr = librosa.load(audio_seperation_path, sr=None)
274
+ powerSpectrum, frequenciesFound, time, imageAxis = plt.specgram(audio, Fs=sr)
275
+ plt.colorbar()
276
+
277
+ if title is not None:
278
+ plt.title(label=title)
279
+
280
+ plt.xlabel("Time (s)")
281
+ plt.ylabel("Frequency (Hz)")
282
+
283
+ ymin = 0
284
+ ymax = max(frequenciesFound) + 0.05
285
+ plt.ylim(ymin, ymax)
286
+ xmin = 0
287
+ xmax = max(time)
288
+ plt.xlim(xmin, xmax)
289
+
290
+ plt.figure(1).set_figwidth(max(6.4, xmax))
291
+ plt.figure(1).set_figheight(4)
292
+
293
+ plt.figure(1).tight_layout(h_pad=1.4)
294
+
295
+ dpi = 200
296
+ plt.savefig(
297
+ os.path.join(
298
+ output_path, f"plot{'_' + snake(title)}.svg"
299
+ ),
300
+ dpi=dpi,
301
+ )
302
+ plt.clf()
303
+ plt.cla()
src/modules/timer.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import atexit
2
+ from functools import reduce
3
+ from time import process_time
4
+
5
+ from modules.console_colors import ULTRASINGER_HEAD
6
+
7
+
8
+ def seconds_to_str(t):
9
+ """Format seconds to string"""
10
+ return "%d:%02d:%02d.%03d" % reduce(
11
+ lambda ll, b: divmod(ll[0], b) + ll[1:], [(t * 1000,), 1000, 60, 60]
12
+ )
13
+
14
+
15
+ def log(s):
16
+ """Log line with optional time elapsed"""
17
+ print(f"{ULTRASINGER_HEAD} {seconds_to_str(process_time())} - {s}")
18
+
19
+
20
+ def end_log():
21
+ """Log at program end"""
22
+ log("End Program")
23
+
24
+
25
+ atexit.register(end_log)
26
+ log("Initialized...")