dlaiu
output as json with processing
176ce75
raw
history blame
2.44 kB
import gradio as gr
import parselmouth
from parselmouth.praat import call
import numpy as np
import pandas as pd
def find_rises_and_peaks_gradient(data, threshold=4):
data['rise_point'] = 0
data['peak_point'] = 0
pitch_values = data['pitch'].values
gradients = np.gradient(pitch_values)
in_rise = False
rise_start = 0
successive_rise_count = 0
min_successive_rise = 3 # Minimum successive values to qualify as a rise
checking_rise = False # Flag to start checking for rises after NaN
for i in range(1, len(gradients)):
if np.isnan(pitch_values[i]):
checking_rise = False # Reset flag when encountering NaN
in_rise = False
successive_rise_count = 0
continue
if not checking_rise:
checking_rise = True # Start checking for rises after NaN
continue
if gradients[i] >= threshold:
if not in_rise:
in_rise = True
rise_start = i-1
successive_rise_count += 1
else:
if in_rise:
if successive_rise_count >= min_successive_rise:
data.at[rise_start, 'rise_point'] = 1
data.at[i-1, 'peak_point'] = 1
in_rise = False
successive_rise_count = 0
return data
def get_pitch(audio_data):
rate, data = audio_data
if data.ndim > 1: # Check if the audio is stereo or multi-channel
data = np.mean(data, axis=1) # Convert to mono by averaging channels
# Convert data to float64 for compatibility with Parselmouth
data = data.astype('float64')
sound = parselmouth.Sound(values=data, sampling_frequency=rate)
try:
pitch = call(sound, "To Pitch", 0.0, 75, 500)
pitch_values = pitch.selected_array['frequency']
# return "Pitch frequencies: " + str(pitch_values)
pitch_values[pitch_values==0] = np.nan
df_pitch = pd.DataFrame(np.column_stack([pitch.xs(), pitch_values]),
columns=['time', 'pitch'])
df_pitch = find_rises_and_peaks_gradient(df_pitch)
output = df_pitch.to_json(orient='records')
return output
except Exception as e:
return "Error in pitch extraction: " + str(e)
demo = gr.Interface(fn=get_pitch, inputs="audio", outputs="text")
demo.launch()