Upload 2 files
Browse files- app.py +11 -11
- vowel_length.py +33 -25
app.py
CHANGED
@@ -2,20 +2,18 @@ import gradio as gr
|
|
2 |
import vowel_length
|
3 |
|
4 |
|
5 |
-
|
6 |
-
ph_key = 'data/
|
7 |
-
|
8 |
-
|
9 |
-
dat,vck,kws = vowel_length.setup(tsv,ph_key)
|
10 |
-
kws = ['ala', 'bala', 'dala', 'hala', 'skjala', 'kala', 'mala', 'smala', 'sala', 'tala', 'vala', 'dvala', 'svala', 'alla', 'falla', 'galla', 'halla', 'fjalla', 'hjalla', 'mjalla', 'spjalla', 'kalla', 'skalla', 'lalla', 'malla', 'palla', 'bralla', 'valla', 'baki', 'maki', 'raki', 'taki', 'vaki', 'þaki', 'baggi', 'maggi', 'raggi', 'leki', 'speki', 'reki', 'breki', 'veki', 'eggi', 'beggi', 'skeggi', 'leggi', 'veggi']
|
11 |
|
|
|
12 |
|
13 |
# runan(w,'l1','w2v2',vck,dat,sources)
|
14 |
# runan(w,'l1','mfa',vck,dat,sources)
|
15 |
|
16 |
|
17 |
def manager(word,group,aligner,side):
|
18 |
-
fig = vowel_length.runan(word,group,aligner,vck,dat,
|
19 |
print(side)
|
20 |
return fig
|
21 |
|
@@ -28,10 +26,12 @@ with bl:
|
|
28 |
# with gr.TabItem("One"):
|
29 |
gr.Markdown(
|
30 |
"""
|
31 |
-
#
|
32 |
Choose a word, speaker group, and aligner type. Available speaker groups are native speakers, second-language speakers, or all. Aligner options are Montreal Forced Aligner (MFA) and CTC decoding with Wav2vec-2.0.
|
33 |
|
34 |
-
|
|
|
|
|
35 |
"""
|
36 |
)
|
37 |
|
@@ -39,7 +39,7 @@ with bl:
|
|
39 |
with gr.Column():
|
40 |
wmenu1 = gr.Dropdown(kws,label="Word",value="hala")
|
41 |
lmenu1 = gr.Dropdown(["L1", "L2","All"],label="Speaker group",value="L1")
|
42 |
-
amenu1 = gr.Dropdown(["MFA", "
|
43 |
|
44 |
btn1 = gr.Button(value="Update Plot 1")
|
45 |
|
@@ -50,7 +50,7 @@ with bl:
|
|
50 |
with gr.Column():
|
51 |
wmenu2 = gr.Dropdown(kws,label="Word",value="halla")
|
52 |
lmenu2 = gr.Dropdown(["L1", "L2","All"],label="Speaker group",value="L1")
|
53 |
-
amenu2 = gr.Dropdown(["MFA", "
|
54 |
|
55 |
btn2 = gr.Button(value="Update Plot 2")
|
56 |
pl2 = gr.Plot()
|
|
|
2 |
import vowel_length
|
3 |
|
4 |
|
5 |
+
meta_tsv = ['data/set1.tsv','data/set2.tsv']
|
6 |
+
ph_key = 'data/key_all.tsv'
|
7 |
+
align_output = 'data/aligns_csv.pickle'
|
|
|
|
|
|
|
8 |
|
9 |
+
dat,vck,kws,csvs = vowel_length.setup(tsv,ph_key,align_output)
|
10 |
|
11 |
# runan(w,'l1','w2v2',vck,dat,sources)
|
12 |
# runan(w,'l1','mfa',vck,dat,sources)
|
13 |
|
14 |
|
15 |
def manager(word,group,aligner,side):
|
16 |
+
fig = vowel_length.runan(word,group,aligner,vck,dat,csvs) #TODO add colour by plot-side
|
17 |
print(side)
|
18 |
return fig
|
19 |
|
|
|
26 |
# with gr.TabItem("One"):
|
27 |
gr.Markdown(
|
28 |
"""
|
29 |
+
# Long and short Icelandic vowels
|
30 |
Choose a word, speaker group, and aligner type. Available speaker groups are native speakers, second-language speakers, or all. Aligner options are Montreal Forced Aligner (MFA) and CTC decoding with Wav2vec-2.0.
|
31 |
|
32 |
+
The general expectation is that syllables with long stressed vowels followed by short consonants have a higher vowel:consonant duration ratio, while syllables with short stressed vowels followed by long consonants have a lower vowel:consonant ratio. However, a great many other factors affect the relative duration in any one recorded token. See Pind 1999, 'Speech segment durations and quantity in Icelandic' (J. Acoustical Society of America, 106(2)) for a review of the acoustics of Icelandic vowel duration.
|
33 |
+
|
34 |
+
All phoneme durations are measured automatically with no human correction. The purpose of this demo is to evaluate the role of such tools in large-scale phonetic research. Therefore, no measurements shown in this demo should be taken as conclusive without some independent verification.
|
35 |
"""
|
36 |
)
|
37 |
|
|
|
39 |
with gr.Column():
|
40 |
wmenu1 = gr.Dropdown(kws,label="Word",value="hala")
|
41 |
lmenu1 = gr.Dropdown(["L1", "L2","All"],label="Speaker group",value="L1")
|
42 |
+
amenu1 = gr.Dropdown(["MFA", "CTC"],label="Aligner",value="CTC")
|
43 |
|
44 |
btn1 = gr.Button(value="Update Plot 1")
|
45 |
|
|
|
50 |
with gr.Column():
|
51 |
wmenu2 = gr.Dropdown(kws,label="Word",value="halla")
|
52 |
lmenu2 = gr.Dropdown(["L1", "L2","All"],label="Speaker group",value="L1")
|
53 |
+
amenu2 = gr.Dropdown(["MFA", "CTC"],label="Aligner",value="CTC")
|
54 |
|
55 |
btn2 = gr.Button(value="Update Plot 2")
|
56 |
pl2 = gr.Plot()
|
vowel_length.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import os
|
2 |
import numpy as np
|
3 |
from collections import defaultdict
|
4 |
import matplotlib
|
@@ -11,10 +11,10 @@ def getwps(start,end,phones):
|
|
11 |
return [(p,s,e) for p,s,e in phones if (s>=start) & (e<=end)]
|
12 |
|
13 |
# read align data from mfa file
|
14 |
-
def read_mfa(
|
15 |
-
with open(apath,'r') as handle:
|
16 |
-
|
17 |
-
f = [l.split(',') for l in
|
18 |
wlines = [(w,float(s),float(e)) for s,e,w,t,_ in f if t=='words']
|
19 |
plines = [(p,float(s),float(e)) for s,e,p,t,_ in f if t=='phones']
|
20 |
aligns = [(w,s,e,getwps(s,e,plines)) for w,s,e in wlines]
|
@@ -37,7 +37,7 @@ def read_ph_key(fpath):
|
|
37 |
f = handle.read().splitlines()
|
38 |
f = [l.split('\t') for l in f[1:]]
|
39 |
|
40 |
-
vcdict = {'
|
41 |
'mfa': defaultdict(lambda: defaultdict(tuple))}
|
42 |
for wline in f:
|
43 |
vcdict[wline[1]][wline[0]][wline[2]] = _winfo(wline)
|
@@ -45,17 +45,17 @@ def read_ph_key(fpath):
|
|
45 |
return _d2d(vcdict)
|
46 |
|
47 |
|
48 |
-
def get_vc_dur(kwd,atype,dat,vcd,
|
49 |
d = [l for l in dat if kwd in l[-1]]
|
50 |
prlist = []
|
51 |
|
52 |
-
if atype=='
|
53 |
pspel = kwd
|
54 |
|
55 |
for l in d:
|
56 |
-
|
57 |
-
if os.path.exists(apath):
|
58 |
-
aligns = read_mfa(
|
59 |
aligns = [a for a in aligns if a[0] == kwd]
|
60 |
for al in aligns:
|
61 |
if atype == 'mfa': # get this recording's phone spelling
|
@@ -74,7 +74,7 @@ def get_vc_dur(kwd,atype,dat,vcd,adir):
|
|
74 |
# TODO:
|
75 |
# pass word, lang, aln info for fig title;
|
76 |
# pass r/l info for point colour
|
77 |
-
def displ(prinfo):
|
78 |
rto = np.mean([v/c for v,c in prinfo])
|
79 |
vs = [1000*v for v,c in prinfo]
|
80 |
cs = [1000*c for v,c in prinfo]
|
@@ -88,7 +88,7 @@ def displ(prinfo):
|
|
88 |
|
89 |
plt.xlabel("Vowel length (ms)")
|
90 |
plt.ylabel("Consonant length (ms)")
|
91 |
-
plt.title(f'
|
92 |
return(rto,fig)
|
93 |
|
94 |
|
@@ -96,7 +96,7 @@ def displ(prinfo):
|
|
96 |
#run analysis
|
97 |
# for keyword, speaker-background, align-source,
|
98 |
# vowel/consonant index key, and dataset
|
99 |
-
def runan(kwd,spl,aln,vck,dat,
|
100 |
kwd = kwd.lower()
|
101 |
spl = spl.lower()
|
102 |
aln = aln.lower()
|
@@ -111,26 +111,34 @@ def runan(kwd,spl,aln,vck,dat,sources):
|
|
111 |
d = [l for l in dat if l[8].lower() == 'icelandic']
|
112 |
if spl == 'l2':
|
113 |
d = [l for l in dat if l[8].lower() != 'icelandic']
|
114 |
-
prinfo = get_vc_dur(kwd,aln,d,vcd,
|
115 |
if len(prinfo) >5:
|
116 |
-
r,f = displ(prinfo)
|
117 |
return(f)
|
118 |
else:
|
119 |
return 0
|
120 |
# TODO THIS CASE
|
121 |
|
122 |
|
123 |
-
def setup(
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
kws = sorted(list(set(kws)))
|
132 |
|
133 |
-
return dat,
|
134 |
|
135 |
|
136 |
|
|
|
1 |
+
import os, pickle
|
2 |
import numpy as np
|
3 |
from collections import defaultdict
|
4 |
import matplotlib
|
|
|
11 |
return [(p,s,e) for p,s,e in phones if (s>=start) & (e<=end)]
|
12 |
|
13 |
# read align data from mfa file
|
14 |
+
def read_mfa(afile):
|
15 |
+
#with open(apath,'r') as handle:
|
16 |
+
# f = handle.read().#splitlines()
|
17 |
+
f = [l.split(',') for l in afile.splitlines()]
|
18 |
wlines = [(w,float(s),float(e)) for s,e,w,t,_ in f if t=='words']
|
19 |
plines = [(p,float(s),float(e)) for s,e,p,t,_ in f if t=='phones']
|
20 |
aligns = [(w,s,e,getwps(s,e,plines)) for w,s,e in wlines]
|
|
|
37 |
f = handle.read().splitlines()
|
38 |
f = [l.split('\t') for l in f[1:]]
|
39 |
|
40 |
+
vcdict = {'ctc': defaultdict(lambda: defaultdict(tuple)), \
|
41 |
'mfa': defaultdict(lambda: defaultdict(tuple))}
|
42 |
for wline in f:
|
43 |
vcdict[wline[1]][wline[0]][wline[2]] = _winfo(wline)
|
|
|
45 |
return _d2d(vcdict)
|
46 |
|
47 |
|
48 |
+
def get_vc_dur(kwd,atype,dat,vcd,csvdict):
|
49 |
d = [l for l in dat if kwd in l[-1]]
|
50 |
prlist = []
|
51 |
|
52 |
+
if atype=='ctc': # only one pronunciation-spelling for ctc
|
53 |
pspel = kwd
|
54 |
|
55 |
for l in d:
|
56 |
+
akey = f'{atype}_csv/{l[3].split(".")[0]}.csv'
|
57 |
+
if akey in csvdict.keys():#os.path.exists(apath):
|
58 |
+
aligns = read_mfa(csvdict[akey])
|
59 |
aligns = [a for a in aligns if a[0] == kwd]
|
60 |
for al in aligns:
|
61 |
if atype == 'mfa': # get this recording's phone spelling
|
|
|
74 |
# TODO:
|
75 |
# pass word, lang, aln info for fig title;
|
76 |
# pass r/l info for point colour
|
77 |
+
def displ(prinfo,kwd):
|
78 |
rto = np.mean([v/c for v,c in prinfo])
|
79 |
vs = [1000*v for v,c in prinfo]
|
80 |
cs = [1000*c for v,c in prinfo]
|
|
|
88 |
|
89 |
plt.xlabel("Vowel length (ms)")
|
90 |
plt.ylabel("Consonant length (ms)")
|
91 |
+
plt.title(f'{kwd.upper()}\nV/C duration ratio: {round(rto,2)}')
|
92 |
return(rto,fig)
|
93 |
|
94 |
|
|
|
96 |
#run analysis
|
97 |
# for keyword, speaker-background, align-source,
|
98 |
# vowel/consonant index key, and dataset
|
99 |
+
def runan(kwd,spl,aln,vck,dat,csvs):
|
100 |
kwd = kwd.lower()
|
101 |
spl = spl.lower()
|
102 |
aln = aln.lower()
|
|
|
111 |
d = [l for l in dat if l[8].lower() == 'icelandic']
|
112 |
if spl == 'l2':
|
113 |
d = [l for l in dat if l[8].lower() != 'icelandic']
|
114 |
+
prinfo = get_vc_dur(kwd,aln,d,vcd,csvs)
|
115 |
if len(prinfo) >5:
|
116 |
+
r,f = displ(prinfo,kwd)
|
117 |
return(f)
|
118 |
else:
|
119 |
return 0
|
120 |
# TODO THIS CASE
|
121 |
|
122 |
|
123 |
+
def setup(metadatas,phkey,align_csvs):
|
124 |
+
def _loadr(meta):
|
125 |
+
with open(meta,'r') as handle:
|
126 |
+
d = handle.read().splitlines()
|
127 |
+
d = [l.split('\t') for l in d[1:]]
|
128 |
+
d = [l[:-1] + [tuple(l[-1].split(' '))] for l in d]
|
129 |
+
return d
|
130 |
+
metas = [_loadr(tsv) for tsv in metadatas]
|
131 |
+
dat = []
|
132 |
+
for tsv in metas:
|
133 |
+
dat += tsv
|
134 |
+
|
135 |
+
vck = read_ph_key(ph_key)
|
136 |
+
kws = sorted(list(vck['ctc'].keys()))
|
137 |
|
138 |
+
with open(align_csvs, 'rb') as handle:
|
139 |
+
csvs = pickle.load(handle)
|
|
|
140 |
|
141 |
+
return dat, vck, kws, csvs
|
142 |
|
143 |
|
144 |
|