Spaces:
Sleeping
Sleeping
File size: 2,436 Bytes
e701383 a2df844 ea19a0e 26ba744 e701383 176ce75 ea19a0e e1503ef fb031e1 26ba744 176ce75 e1503ef a01aab6 e701383 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
import parselmouth
from parselmouth.praat import call
import numpy as np
import pandas as pd
def find_rises_and_peaks_gradient(data, threshold=4):
data['rise_point'] = 0
data['peak_point'] = 0
pitch_values = data['pitch'].values
gradients = np.gradient(pitch_values)
in_rise = False
rise_start = 0
successive_rise_count = 0
min_successive_rise = 3 # Minimum successive values to qualify as a rise
checking_rise = False # Flag to start checking for rises after NaN
for i in range(1, len(gradients)):
if np.isnan(pitch_values[i]):
checking_rise = False # Reset flag when encountering NaN
in_rise = False
successive_rise_count = 0
continue
if not checking_rise:
checking_rise = True # Start checking for rises after NaN
continue
if gradients[i] >= threshold:
if not in_rise:
in_rise = True
rise_start = i-1
successive_rise_count += 1
else:
if in_rise:
if successive_rise_count >= min_successive_rise:
data.at[rise_start, 'rise_point'] = 1
data.at[i-1, 'peak_point'] = 1
in_rise = False
successive_rise_count = 0
return data
def get_pitch(audio_data):
rate, data = audio_data
if data.ndim > 1: # Check if the audio is stereo or multi-channel
data = np.mean(data, axis=1) # Convert to mono by averaging channels
# Convert data to float64 for compatibility with Parselmouth
data = data.astype('float64')
sound = parselmouth.Sound(values=data, sampling_frequency=rate)
try:
pitch = call(sound, "To Pitch", 0.0, 75, 500)
pitch_values = pitch.selected_array['frequency']
# return "Pitch frequencies: " + str(pitch_values)
pitch_values[pitch_values==0] = np.nan
df_pitch = pd.DataFrame(np.column_stack([pitch.xs(), pitch_values]),
columns=['time', 'pitch'])
df_pitch = find_rises_and_peaks_gradient(df_pitch)
output = df_pitch.to_json(orient='records')
return output
except Exception as e:
return "Error in pitch extraction: " + str(e)
demo = gr.Interface(fn=get_pitch, inputs="audio", outputs="text")
demo.launch()
|