Spaces:

clr
/

pce

Sleeping

App Files Files Community

catiR commited on Nov 14, 2023

Commit

f484865

1 Parent(s): cfb1726

normalise

Browse files

Files changed (2) hide show

scripts/clusterprosody.py +13 -8
scripts/runSQ.py +1 -1

scripts/clusterprosody.py CHANGED Viewed

@@ -6,6 +6,7 @@ import soundfile as sf
 from collections import defaultdict
 from dtw import dtw
 from sklearn_extra.cluster import KMedoids
 from copy import deepcopy
 import os, librosa, json
@@ -55,16 +56,16 @@ def get_pitches(start_time, end_time, fpath):
         # find the mean of all pitches in the whole sentence
-        mean = np.mean([line[1] for line in lines if line[2] != -1])
         # find the std of all pitches in the whole sentence
-        std = np.std([line[1] for line in lines if line[2] != -1])
         for line in lines:
             time, pitch, is_pitch = line
             if start_time <= time <= end_time:
-                if is_pitch:
                     pitches.append(z_score(pitch, mean, std))
                 else:
                     #pitches.append(z_score(fifth_percentile, mean, std))
@@ -83,17 +84,21 @@ def get_pitches(start_time, end_time, fpath):
 # TODO: implement that. ?
 # not sure librosa provides hamming window in rms function directly
 # TODO handle audio that not originally .wav
-def get_rmse(start_time, end_time, wpath):
     """
     Returns an array of RMSE values for a given speech.
     """
     audio, sr = librosa.load(wpath, sr=16000)
-    segment = audio[int(np.floor(start_time * sr)):int(np.ceil(end_time * sr))]
-    rmse = librosa.feature.rms(y=segment,frame_length=480,hop_length=80)#librosa.feature.rms(y=segment)
-    rmse = rmse[0]
     #idx = np.round(np.linspace(0, len(rmse) - 1, pitch_len)).astype(int)
-    return rmse#[idx]
 # may be unnecessary depending how rmse and pitch window/hop are calculated already

 from collections import defaultdict
 from dtw import dtw
 from sklearn_extra.cluster import KMedoids
+from scipy import stats
 from copy import deepcopy
 import os, librosa, json
         # find the mean of all pitches in the whole sentence
+        mean = np.mean([line[1] for line in lines if line[2] == 1])
         # find the std of all pitches in the whole sentence
+        std = np.std([line[1] for line in lines if line[2] == 1])
         for line in lines:
             time, pitch, is_pitch = line
             if start_time <= time <= end_time:
+                if is_pitch == 1:
                     pitches.append(z_score(pitch, mean, std))
                 else:
                     #pitches.append(z_score(fifth_percentile, mean, std))
 # TODO: implement that. ?
 # not sure librosa provides hamming window in rms function directly
 # TODO handle audio that not originally .wav
+def get_rmse(start_time, end_time, wpath, znorm = True):
     """
     Returns an array of RMSE values for a given speech.
     """
     audio, sr = librosa.load(wpath, sr=16000)
+    hop = 80
+    #segment = audio[int(np.floor(start_time * sr)):int(np.ceil(end_time * sr))]
+    rmse = librosa.feature.rms(y=audio,frame_length=480,hop_length=hop)
+    rmse = rmse[0]
+    if znorm:
+        rmse = stats.zscore(rmse)
+    segment = rmse[int(np.floor(start_time * sr/hop)):int(np.ceil(end_time * sr/hop))]
     #idx = np.round(np.linspace(0, len(rmse) - 1, pitch_len)).astype(int)
+    return segment#[idx]
 # may be unnecessary depending how rmse and pitch window/hop are calculated already

scripts/runSQ.py CHANGED Viewed

@@ -286,7 +286,7 @@ def localtest():
-#localtest()
 # torch matplotlib librosa sklearn_extra pydub
 # env pclustr

+localtest()
 # torch matplotlib librosa sklearn_extra pydub
 # env pclustr