Spaces:
Sleeping
Sleeping
"""Plot transcribed data""" | |
import os | |
from dataclasses import dataclass | |
from re import sub | |
import librosa | |
import numpy | |
from matplotlib import pyplot as plt | |
from matplotlib.patches import Rectangle | |
from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue | |
from modules.console_colors import ULTRASINGER_HEAD | |
from modules.Pitcher.pitched_data import PitchedData | |
from modules.Pitcher.pitcher import get_pitched_data_with_high_confidence | |
from modules.Speech_Recognition.TranscribedData import TranscribedData | |
class PlottedNote: | |
"""Plotted note""" | |
note: str | |
frequency: float | |
frequency_log_10: float | |
octave: int | |
NOTES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"] | |
OCTAVES = [0, 1, 2, 3, 4, 5, 6, 7, 8] | |
X_TICK_SIZE = 5 | |
def get_frequency_range(midi_note: str) -> float: | |
"""Get frequency range""" | |
midi = librosa.note_to_midi(midi_note) | |
frequency_range = librosa.midi_to_hz(midi + 1) - librosa.midi_to_hz(midi) | |
return frequency_range | |
def create_plot_notes(notes: list[str], octaves: list[int]) -> list[PlottedNote]: | |
"""Create list of notes for plot y axis""" | |
plotted_notes = [] | |
for octave in octaves: | |
for note in notes: | |
note_with_octave = note + str(octave) | |
frequency = librosa.note_to_hz(note_with_octave) | |
frequency_log_10 = numpy.log10([frequency])[0] | |
plotted_notes.append( | |
PlottedNote(note_with_octave, frequency, frequency_log_10, octave) | |
) | |
return plotted_notes | |
PLOTTED_NOTES = create_plot_notes(NOTES, OCTAVES) | |
def plot( | |
pitched_data: PitchedData, | |
output_path: str, | |
transcribed_data: list[TranscribedData] = None, | |
ultrastar_class: UltrastarTxtValue = None, | |
midi_notes: list[str] = None, | |
title: str = None, | |
) -> None: | |
"""Plot transcribed data""" | |
# determine time between to datapoints if there is no gap (this is the step size crepe ran with) | |
step_size = pitched_data.times[1] | |
pitched_data = get_pitched_data_with_high_confidence(pitched_data) | |
if len(pitched_data.frequencies) < 2: | |
print(f"{ULTRASINGER_HEAD} Plot can't be created; too few datapoints") | |
return | |
print( | |
f"{ULTRASINGER_HEAD} Creating plot{': ' + title if title is not None else ''}" | |
) | |
# map each frequency to logarithm with base 10 for a linear progression of values between the musical notes | |
# see http://www.phon.ox.ac.uk/jcoleman/LOGARITH.htm | |
frequencies_log_10 = numpy.log10(pitched_data.frequencies) | |
# add 'nan' where there are gaps for frequency values so the graph is only continuous where it should be | |
pitched_data_with_gaps = create_gaps(pitched_data, step_size) | |
frequencies_log_10_with_gaps = numpy.log10(pitched_data_with_gaps.frequencies) | |
# dynamically set the minimum and maximum values for x and y axes based on data | |
y_lower_bound, y_upper_bound = determine_bounds(frequencies_log_10) | |
ymin = max(0, y_lower_bound - 0.05) | |
ymax = y_upper_bound + 0.05 | |
plt.ylim(ymin, ymax) | |
xmin = min(pitched_data.times) | |
xmax = max(pitched_data.times) | |
plt.xlim(xmin, xmax) | |
plt.xlabel("Time (s)") | |
plt.ylabel("log10 of Frequency (Hz)") | |
notes_within_range = set_axes_ticks_and_labels(pitched_data.times, ymin, ymax) | |
# draw horizontal lines for each note | |
for note in notes_within_range: | |
color = "b" | |
if note.note.startswith("C") and not note.note.startswith("C#"): | |
color = "r" | |
plt.axhline(y=note.frequency_log_10, color=color, linestyle="-", linewidth=0.2) | |
# create line and scatter plot of pitched data | |
plt.plot(pitched_data_with_gaps.times, frequencies_log_10_with_gaps, linewidth=0.1) | |
scatter_path_collection = plt.scatter( | |
pitched_data_with_gaps.times, | |
frequencies_log_10_with_gaps, | |
s=5, | |
c=pitched_data_with_gaps.confidence, | |
cmap=plt.colormaps.get_cmap("gray").reversed(), | |
vmin=0, | |
vmax=1, | |
) | |
plt.figure(1).colorbar(scatter_path_collection, label="confidence") | |
set_figure_dimensions(xmax - xmin, y_upper_bound - y_lower_bound) | |
plot_words(transcribed_data, ultrastar_class, midi_notes) | |
if title is not None: | |
plt.title(label=title) | |
plt.figure(1).tight_layout(h_pad=1.4) | |
dpi = 200 | |
plt.savefig( | |
os.path.join( | |
output_path, f"plot{'' if title is None else '_' + snake(title)}.svg" | |
), | |
dpi=dpi, | |
) | |
plt.clf() | |
plt.cla() | |
def set_axes_ticks_and_labels(confidence, ymin, ymax): | |
"""Set ticks and their labels for x and y axes""" | |
notes_within_range = [ | |
x for x in PLOTTED_NOTES if ymin <= x.frequency_log_10 <= ymax | |
] | |
plt.yticks( | |
[x.frequency_log_10 for x in notes_within_range], | |
[x.note for x in notes_within_range], | |
) | |
first_time = min(confidence) | |
min_tick = first_time // X_TICK_SIZE * X_TICK_SIZE + X_TICK_SIZE | |
last_time = max(confidence) | |
max_tick = last_time // X_TICK_SIZE * X_TICK_SIZE + 0.1 | |
ticks = numpy.arange(min_tick, max_tick, X_TICK_SIZE, dtype=int).tolist() | |
if len(ticks) == 0 or ticks[0] != first_time: | |
ticks.insert(0, first_time) | |
if len(ticks) == 1 or ticks[-1] != last_time: | |
ticks.append(last_time) | |
plt.xticks(ticks, [str(x) for x in ticks]) | |
return notes_within_range | |
def determine_bounds(frequency_log_10: list[float]) -> tuple[float, float]: | |
"""Determine bounds based on 1st and 99th percentile of data""" | |
lower = numpy.percentile(numpy.array(frequency_log_10), 1) | |
upper = numpy.percentile(numpy.array(frequency_log_10), 99) | |
return lower, upper | |
def set_figure_dimensions(time_range, frequency_log_10_range): | |
"""Dynamically scale the figure dimensions based on the duration/frequency amplitude of the song""" | |
height = frequency_log_10_range / 0.06 | |
width = time_range / 2 | |
plt.figure(1).set_figwidth(max(6.4, width)) | |
plt.figure(1).set_figheight(max(4, height)) | |
def create_gaps(pitched_data: PitchedData, step_size: float) -> PitchedData: | |
""" | |
Add 'nan' where there are no high confidence frequency values. | |
This way the graph is only continuous where it should be. | |
""" | |
pitched_data_with_gaps = PitchedData([], [], []) | |
previous_time = 0 | |
for i, time in enumerate(pitched_data.times): | |
comes_right_after_previous = time - previous_time <= step_size | |
previous_frequency_is_not_gap = ( | |
len(pitched_data_with_gaps.frequencies) > 0 | |
and str(pitched_data_with_gaps.frequencies[-1]) != "nan" | |
) | |
if previous_frequency_is_not_gap and not comes_right_after_previous: | |
pitched_data_with_gaps.times.append(time) | |
pitched_data_with_gaps.frequencies.append(float("nan")) | |
pitched_data_with_gaps.confidence.append(pitched_data.confidence[i]) | |
pitched_data_with_gaps.times.append(time) | |
pitched_data_with_gaps.frequencies.append(pitched_data.frequencies[i]) | |
pitched_data_with_gaps.confidence.append(pitched_data.confidence[i]) | |
previous_time = time | |
return pitched_data_with_gaps | |
def plot_word(midi_note: str, start, end, word): | |
note_frequency = librosa.note_to_hz(midi_note) | |
frequency_range = get_frequency_range(midi_note) | |
half_frequency_range = frequency_range / 2 | |
height = ( | |
numpy.log10([note_frequency + half_frequency_range])[0] | |
- numpy.log10([note_frequency - half_frequency_range])[0] | |
) | |
xy_start_pos = ( | |
start, | |
numpy.log10([note_frequency - half_frequency_range])[0], | |
) | |
width = end - start | |
rect = Rectangle( | |
xy_start_pos, | |
width, | |
height, | |
edgecolor="none", | |
facecolor="red", | |
alpha=0.5, | |
) | |
plt.gca().add_patch(rect) | |
plt.text(start + width / 4, numpy.log10([note_frequency + half_frequency_range])[0], word, rotation=90) | |
def plot_words(transcribed_data: list[TranscribedData], ultrastar_class: UltrastarTxtValue, midi_notes: list[str]): | |
"""Draw rectangles for each word""" | |
if transcribed_data is not None: | |
for i, data in enumerate(transcribed_data): | |
plot_word(midi_notes[i], data.start, data.end, data.word) | |
elif ultrastar_class is not None: | |
for i, data in enumerate(ultrastar_class.words): | |
plot_word(midi_notes[i], ultrastar_class.startTimes[i], ultrastar_class.endTimes[i], | |
ultrastar_class.words[i]) | |
def snake(s): | |
"""Turn any string into a snake case string""" | |
return "_".join( | |
sub( | |
"([A-Z][a-z]+)", r" \1", sub("([A-Z]+)", r" \1", s.replace("-", " ")) | |
).split() | |
).lower() | |
def plot_spectrogram(audio_seperation_path: str, | |
output_path: str, | |
title: str = "Spectrogram", | |
) -> None: | |
"""Plot spectrogram of data""" | |
print( | |
f"{ULTRASINGER_HEAD} Creating plot{': ' + title}" | |
) | |
audio, sr = librosa.load(audio_seperation_path, sr=None) | |
powerSpectrum, frequenciesFound, time, imageAxis = plt.specgram(audio, Fs=sr) | |
plt.colorbar() | |
if title is not None: | |
plt.title(label=title) | |
plt.xlabel("Time (s)") | |
plt.ylabel("Frequency (Hz)") | |
ymin = 0 | |
ymax = max(frequenciesFound) + 0.05 | |
plt.ylim(ymin, ymax) | |
xmin = 0 | |
xmax = max(time) | |
plt.xlim(xmin, xmax) | |
plt.figure(1).set_figwidth(max(6.4, xmax)) | |
plt.figure(1).set_figheight(4) | |
plt.figure(1).tight_layout(h_pad=1.4) | |
dpi = 200 | |
plt.savefig( | |
os.path.join( | |
output_path, f"plot{'_' + snake(title)}.svg" | |
), | |
dpi=dpi, | |
) | |
plt.clf() | |
plt.cla() | |