catiR
commited on
Commit
·
a4ed697
1
Parent(s):
8d1fcc3
adjust plot
Browse files- app.py +2 -2
- scripts/clusterprosody.py +17 -3
- scripts/runSQ.py +1 -2
app.py
CHANGED
@@ -54,7 +54,7 @@ with bl:
|
|
54 |
|
55 |
#temp_sentences = ['Litlaus græn hugmynd?','Var það ekki nóg?', 'Ef svo er hvað heita þau þá?','Eru maríuhænur á Íslandi?']
|
56 |
|
57 |
-
voices = ['
|
58 |
|
59 |
|
60 |
#with gr.Row():
|
@@ -66,7 +66,7 @@ with bl:
|
|
66 |
|
67 |
with gr.Row():
|
68 |
spanselect = gr.Textbox(value='1-3',label="Select words",info='Enter the index of the word(s) to analyse, according to the key above. It can be a single word: 4 or a span of words separated by a dash: 2-3')
|
69 |
-
voiceselect = gr.Radio(voices, label="TTS voice",value='
|
70 |
|
71 |
#with gr.Column(scale=1):
|
72 |
temp_button = gr.Button(value="Run with selected options")
|
|
|
54 |
|
55 |
#temp_sentences = ['Litlaus græn hugmynd?','Var það ekki nóg?', 'Ef svo er hvað heita þau þá?','Eru maríuhænur á Íslandi?']
|
56 |
|
57 |
+
voices = ['Dilja_v2', 'Alfur_v2', 'Dilja', 'Alfur', 'Bjartur', 'Rosa', 'Karl', 'Dora']
|
58 |
|
59 |
|
60 |
#with gr.Row():
|
|
|
66 |
|
67 |
with gr.Row():
|
68 |
spanselect = gr.Textbox(value='1-3',label="Select words",info='Enter the index of the word(s) to analyse, according to the key above. It can be a single word: 4 or a span of words separated by a dash: 2-3')
|
69 |
+
voiceselect = gr.Radio(voices, label="TTS voice",value='Alfur_v2')
|
70 |
|
71 |
#with gr.Column(scale=1):
|
72 |
temp_button = gr.Button(value="Run with selected options")
|
scripts/clusterprosody.py
CHANGED
@@ -361,7 +361,16 @@ def plot_one_cluster(words,feature,speech_data,seg_aligns,cluster_id,tts_data=No
|
|
361 |
# datapoint interval is 0.005 seconds
|
362 |
feat_xvals = [x*0.005 for x in range(len(feats))]
|
363 |
feat_xvals = retime_speaker_xvals(retimes, word_times, feat_xvals)
|
364 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
365 |
|
366 |
cc += 1
|
367 |
if cc >= len(colors):
|
@@ -372,8 +381,13 @@ def plot_one_cluster(words,feature,speech_data,seg_aligns,cluster_id,tts_data=No
|
|
372 |
t_xvals = [x*0.005 for x in range(len(tfeats))]
|
373 |
t_xvals = retime_speaker_xvals(retimes, tts_align, t_xvals)
|
374 |
|
375 |
-
pfunc(t_xvals, tfeats, color="black", label=f"TTS {voice}")
|
376 |
-
|
|
|
|
|
|
|
|
|
|
|
377 |
|
378 |
#plt.legend()
|
379 |
#plt.show()
|
|
|
361 |
# datapoint interval is 0.005 seconds
|
362 |
feat_xvals = [x*0.005 for x in range(len(feats))]
|
363 |
feat_xvals = retime_speaker_xvals(retimes, word_times, feat_xvals)
|
364 |
+
|
365 |
+
|
366 |
+
#pfunc(feat_xvals, feats, color=colors[cc], label=f"Speaker {spk}")
|
367 |
+
for w, st in reversed(retimes):
|
368 |
+
w_xvals = [x for x in feat_xvals if x>= st]
|
369 |
+
w_feats = feats[-(len(w_xvals)):]
|
370 |
+
pfunc(w_xvals, w_feats, color=colors[cc])
|
371 |
+
feat_xvals = feat_xvals[:-(len(w_xvals))]
|
372 |
+
feats = feats[:-(len(w_xvals))]
|
373 |
+
|
374 |
|
375 |
cc += 1
|
376 |
if cc >= len(colors):
|
|
|
381 |
t_xvals = [x*0.005 for x in range(len(tfeats))]
|
382 |
t_xvals = retime_speaker_xvals(retimes, tts_align, t_xvals)
|
383 |
|
384 |
+
#pfunc(t_xvals, tfeats, color="black", label=f"TTS {voice}")
|
385 |
+
for w, st in reversed(retimes):
|
386 |
+
tw_xvals = [x for x in t_xvals if x>= st]
|
387 |
+
tw_feats = tfeats[-(len(tw_xvals)):]
|
388 |
+
pfunc(tw_xvals, tw_feats, color=colors[cc])
|
389 |
+
t_xvals = t_xvals[:-(len(tw_xvals))]
|
390 |
+
tfeats = tfeats[:-(len(tw_xvals))]
|
391 |
|
392 |
#plt.legend()
|
393 |
#plt.show()
|
scripts/runSQ.py
CHANGED
@@ -113,7 +113,6 @@ def get_samromur_queries(sentence, corpusdb, speech_dir, align_dir, align_model_
|
|
113 |
return []
|
114 |
else:
|
115 |
print(f'{len(meta)} recordings of sentence <{sentence}>')
|
116 |
-
#return meta
|
117 |
|
118 |
|
119 |
word_aligner = None
|
@@ -174,7 +173,7 @@ def get_tts(sentence,voices,ttsdir,align_model_path,reaper_path = "REAPER/build/
|
|
174 |
sample_paths.append(wpath)
|
175 |
|
176 |
|
177 |
-
# TEMP
|
178 |
# return for single last voice
|
179 |
temp_sample_path = wpath
|
180 |
|
|
|
113 |
return []
|
114 |
else:
|
115 |
print(f'{len(meta)} recordings of sentence <{sentence}>')
|
|
|
116 |
|
117 |
|
118 |
word_aligner = None
|
|
|
173 |
sample_paths.append(wpath)
|
174 |
|
175 |
|
176 |
+
# TODO TEMP
|
177 |
# return for single last voice
|
178 |
temp_sample_path = wpath
|
179 |
|