yamnet_test / python /util /plt_util.py
Luis
add mp4_to_mp3.py
c0a2456
import librosa.display
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.style as ms
from PIL import Image
import matplotlib.ticker as ticker
from librosa.feature import melspectrogram
from python.util.time_util import int_to_min_sec
import librosa
import librosa.display
import numpy as np
from io import BytesIO
# https://stackoverflow.com/questions/69924881/userwarning-starting-a-matplotlib-gui-outside-of-the-main-thread-will-likely-fa
matplotlib.use('agg')
ms.use('seaborn-muted')
def update_ticks(x, pos):
which_second = (x / 16000)
return int_to_min_sec(which_second)
def plt_line(y_points, sample_rate=16000):
# plt line
fig, ax = plt.subplots()
ax.xaxis.set_major_formatter(ticker.FuncFormatter(update_ticks))
plt.plot(y_points)
plt.title('Waveform')
# plot to image
buffer = BytesIO()
plt.savefig(buffer, format='png')
image = Image.open(buffer)
return image
# plt mfcc, https://www.cnblogs.com/LXP-Never/p/10918590.html
def plt_mfcc(single_channel, sample_rate):
mel_spec = melspectrogram(y=single_channel, sr=sample_rate, n_fft=1024, hop_length=512, n_mels=128)
log_mel_spec = librosa.power_to_db(mel_spec)
plt.figure()
librosa.display.specshow(log_mel_spec, sr=sample_rate, x_axis='time', y_axis='mel')
# plt.colorbar(format='%+2.0f dB') # 右边的色度条
plt.title('MFCC')
# plot to image
buffer = BytesIO()
plt.savefig(buffer, format='png')
image = Image.open(buffer)
return image
# https://gist.github.com/stevemclaugh/80f192130852353ad53e6d8b6b275983
def plt_mfcc2(wav_pathname, sample_rate):
y, sr = librosa.load(wav_pathname)
# Let's make and display a mel-scaled power (energy-squared) spectrogram
S = librosa.feature.melspectrogram(y=y, sr=sample_rate, n_mels=128)
# Convert to log scale (dB). We'll use the peak power as reference.
log_S = librosa.amplitude_to_db(S)
# Make a new figure
plt.figure(figsize=(12, 4))
# Display the spectrogram on a mel scale
# sample rate and hop length parameters are used to render the time axis
librosa.display.specshow(log_S, sr=sample_rate, x_axis='time', y_axis='mel')
# Put a descriptive title on the plot
plt.title('mel power spectrogram')
# draw a color bar
# plt.colorbar(format='%+02.0f dB')
# Make the figure layout compact
plt.tight_layout()
S_rot = np.rot90(S, 3)
# Next, we'll extract the first 13 Mel-frequency cepstral coefficients (MFCCs)
mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13)
# Padding first and second deltas
delta_mfcc = librosa.feature.delta(mfcc)
delta2_mfcc = librosa.feature.delta(mfcc, order=2)
# We'll show each in its own subplot
# plt.figure(figsize=(12, 6))
plt.figure()
# plt.subplot(3, 1, 1)
# librosa.display.specshow(mfcc)
# plt.ylabel('MFCC')
# plt.colorbar()
# plt.subplot(1, 1, 1)
# librosa.display.specshow(delta_mfcc)
# plt.ylabel('MFCC-$\Delta$')
# plt.colorbar()
plt.subplot()
librosa.display.specshow(delta2_mfcc, sr=sample_rate, x_axis='time')
plt.ylabel('MFCC-$\Delta^2$')
plt.colorbar()
plt.tight_layout()
# plot to image
buffer = BytesIO()
plt.savefig(buffer, format='png')
image = Image.open(buffer)
return image