File size: 9,742 Bytes
2542bcb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
"""Plot transcribed data"""
import os
from dataclasses import dataclass
from re import sub

import librosa
import numpy
from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle

from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue
from modules.console_colors import ULTRASINGER_HEAD
from modules.Pitcher.pitched_data import PitchedData
from modules.Pitcher.pitcher import get_pitched_data_with_high_confidence
from modules.Speech_Recognition.TranscribedData import TranscribedData


@dataclass
class PlottedNote:
    """Plotted note"""

    note: str
    frequency: float
    frequency_log_10: float
    octave: int


NOTES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
OCTAVES = [0, 1, 2, 3, 4, 5, 6, 7, 8]
X_TICK_SIZE = 5


def get_frequency_range(midi_note: str) -> float:
    """Get frequency range"""
    midi = librosa.note_to_midi(midi_note)
    frequency_range = librosa.midi_to_hz(midi + 1) - librosa.midi_to_hz(midi)
    return frequency_range


def create_plot_notes(notes: list[str], octaves: list[int]) -> list[PlottedNote]:
    """Create list of notes for plot y axis"""
    plotted_notes = []
    for octave in octaves:
        for note in notes:
            note_with_octave = note + str(octave)
            frequency = librosa.note_to_hz(note_with_octave)
            frequency_log_10 = numpy.log10([frequency])[0]
            plotted_notes.append(
                PlottedNote(note_with_octave, frequency, frequency_log_10, octave)
            )

    return plotted_notes


PLOTTED_NOTES = create_plot_notes(NOTES, OCTAVES)


def plot(
        pitched_data: PitchedData,
        output_path: str,
        transcribed_data: list[TranscribedData] = None,
        ultrastar_class: UltrastarTxtValue = None,
        midi_notes: list[str] = None,
        title: str = None,
) -> None:
    """Plot transcribed data"""

    # determine time between to datapoints if there is no gap (this is the step size crepe ran with)
    step_size = pitched_data.times[1]
    pitched_data = get_pitched_data_with_high_confidence(pitched_data)

    if len(pitched_data.frequencies) < 2:
        print(f"{ULTRASINGER_HEAD} Plot can't be created; too few datapoints")
        return

    print(
        f"{ULTRASINGER_HEAD} Creating plot{': ' + title if title is not None else ''}"
    )

    # map each frequency to logarithm with base 10 for a linear progression of values between the musical notes
    # see http://www.phon.ox.ac.uk/jcoleman/LOGARITH.htm
    frequencies_log_10 = numpy.log10(pitched_data.frequencies)

    # add 'nan' where there are gaps for frequency values so the graph is only continuous where it should be
    pitched_data_with_gaps = create_gaps(pitched_data, step_size)
    frequencies_log_10_with_gaps = numpy.log10(pitched_data_with_gaps.frequencies)

    # dynamically set the minimum and maximum values for x and y axes based on data
    y_lower_bound, y_upper_bound = determine_bounds(frequencies_log_10)
    ymin = max(0, y_lower_bound - 0.05)
    ymax = y_upper_bound + 0.05
    plt.ylim(ymin, ymax)
    xmin = min(pitched_data.times)
    xmax = max(pitched_data.times)
    plt.xlim(xmin, xmax)

    plt.xlabel("Time (s)")
    plt.ylabel("log10 of Frequency (Hz)")

    notes_within_range = set_axes_ticks_and_labels(pitched_data.times, ymin, ymax)

    # draw horizontal lines for each note
    for note in notes_within_range:
        color = "b"
        if note.note.startswith("C") and not note.note.startswith("C#"):
            color = "r"
        plt.axhline(y=note.frequency_log_10, color=color, linestyle="-", linewidth=0.2)

    # create line and scatter plot of pitched data
    plt.plot(pitched_data_with_gaps.times, frequencies_log_10_with_gaps, linewidth=0.1)
    scatter_path_collection = plt.scatter(
        pitched_data_with_gaps.times,
        frequencies_log_10_with_gaps,
        s=5,
        c=pitched_data_with_gaps.confidence,
        cmap=plt.colormaps.get_cmap("gray").reversed(),
        vmin=0,
        vmax=1,
    )
    plt.figure(1).colorbar(scatter_path_collection, label="confidence")

    set_figure_dimensions(xmax - xmin, y_upper_bound - y_lower_bound)

    plot_words(transcribed_data, ultrastar_class, midi_notes)

    if title is not None:
        plt.title(label=title)

    plt.figure(1).tight_layout(h_pad=1.4)

    dpi = 200
    plt.savefig(
        os.path.join(
            output_path, f"plot{'' if title is None else '_' + snake(title)}.svg"
        ),
        dpi=dpi,
    )
    plt.clf()
    plt.cla()


def set_axes_ticks_and_labels(confidence, ymin, ymax):
    """Set ticks and their labels for x and y axes"""
    notes_within_range = [
        x for x in PLOTTED_NOTES if ymin <= x.frequency_log_10 <= ymax
    ]
    plt.yticks(
        [x.frequency_log_10 for x in notes_within_range],
        [x.note for x in notes_within_range],
    )

    first_time = min(confidence)
    min_tick = first_time // X_TICK_SIZE * X_TICK_SIZE + X_TICK_SIZE

    last_time = max(confidence)
    max_tick = last_time // X_TICK_SIZE * X_TICK_SIZE + 0.1
    ticks = numpy.arange(min_tick, max_tick, X_TICK_SIZE, dtype=int).tolist()

    if len(ticks) == 0 or ticks[0] != first_time:
        ticks.insert(0, first_time)

    if len(ticks) == 1 or ticks[-1] != last_time:
        ticks.append(last_time)

    plt.xticks(ticks, [str(x) for x in ticks])
    return notes_within_range


def determine_bounds(frequency_log_10: list[float]) -> tuple[float, float]:
    """Determine bounds based on 1st and 99th percentile of data"""
    lower = numpy.percentile(numpy.array(frequency_log_10), 1)
    upper = numpy.percentile(numpy.array(frequency_log_10), 99)

    return lower, upper


def set_figure_dimensions(time_range, frequency_log_10_range):
    """Dynamically scale the figure dimensions based on the duration/frequency amplitude of the song"""
    height = frequency_log_10_range / 0.06
    width = time_range / 2

    plt.figure(1).set_figwidth(max(6.4, width))
    plt.figure(1).set_figheight(max(4, height))


def create_gaps(pitched_data: PitchedData, step_size: float) -> PitchedData:
    """
    Add 'nan' where there are no high confidence frequency values.
    This way the graph is only continuous where it should be.

    """
    pitched_data_with_gaps = PitchedData([], [], [])

    previous_time = 0
    for i, time in enumerate(pitched_data.times):
        comes_right_after_previous = time - previous_time <= step_size
        previous_frequency_is_not_gap = (
                len(pitched_data_with_gaps.frequencies) > 0
                and str(pitched_data_with_gaps.frequencies[-1]) != "nan"
        )
        if previous_frequency_is_not_gap and not comes_right_after_previous:
            pitched_data_with_gaps.times.append(time)
            pitched_data_with_gaps.frequencies.append(float("nan"))
            pitched_data_with_gaps.confidence.append(pitched_data.confidence[i])

        pitched_data_with_gaps.times.append(time)
        pitched_data_with_gaps.frequencies.append(pitched_data.frequencies[i])
        pitched_data_with_gaps.confidence.append(pitched_data.confidence[i])

        previous_time = time

    return pitched_data_with_gaps


def plot_word(midi_note: str, start, end, word):
    note_frequency = librosa.note_to_hz(midi_note)
    frequency_range = get_frequency_range(midi_note)

    half_frequency_range = frequency_range / 2
    height = (
            numpy.log10([note_frequency + half_frequency_range])[0]
            - numpy.log10([note_frequency - half_frequency_range])[0]
    )
    xy_start_pos = (
        start,
        numpy.log10([note_frequency - half_frequency_range])[0],
    )
    width = end - start
    rect = Rectangle(
        xy_start_pos,
        width,
        height,
        edgecolor="none",
        facecolor="red",
        alpha=0.5,
    )
    plt.gca().add_patch(rect)
    plt.text(start + width / 4, numpy.log10([note_frequency + half_frequency_range])[0], word, rotation=90)


def plot_words(transcribed_data: list[TranscribedData], ultrastar_class: UltrastarTxtValue, midi_notes: list[str]):
    """Draw rectangles for each word"""
    if transcribed_data is not None:
        for i, data in enumerate(transcribed_data):
            plot_word(midi_notes[i], data.start, data.end, data.word)

    elif ultrastar_class is not None:
        for i, data in enumerate(ultrastar_class.words):
            plot_word(midi_notes[i], ultrastar_class.startTimes[i], ultrastar_class.endTimes[i],
                      ultrastar_class.words[i])


def snake(s):
    """Turn any string into a snake case string"""
    return "_".join(
        sub(
            "([A-Z][a-z]+)", r" \1", sub("([A-Z]+)", r" \1", s.replace("-", " "))
        ).split()
    ).lower()


def plot_spectrogram(audio_seperation_path: str,
                     output_path: str,
                     title: str = "Spectrogram",

                     ) -> None:
    """Plot spectrogram of data"""

    print(
        f"{ULTRASINGER_HEAD} Creating plot{': ' + title}"
    )

    audio, sr = librosa.load(audio_seperation_path, sr=None)
    powerSpectrum, frequenciesFound, time, imageAxis = plt.specgram(audio, Fs=sr)
    plt.colorbar()

    if title is not None:
        plt.title(label=title)

    plt.xlabel("Time (s)")
    plt.ylabel("Frequency (Hz)")

    ymin = 0
    ymax = max(frequenciesFound) + 0.05
    plt.ylim(ymin, ymax)
    xmin = 0
    xmax = max(time)
    plt.xlim(xmin, xmax)

    plt.figure(1).set_figwidth(max(6.4, xmax))
    plt.figure(1).set_figheight(4)

    plt.figure(1).tight_layout(h_pad=1.4)

    dpi = 200
    plt.savefig(
        os.path.join(
            output_path, f"plot{'_' + snake(title)}.svg"
        ),
        dpi=dpi,
    )
    plt.clf()
    plt.cla()