clr commited on
Commit
d5354f5
1 Parent(s): 09397d4

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +11 -11
  2. vowel_length.py +33 -25
app.py CHANGED
@@ -2,20 +2,18 @@ import gradio as gr
2
  import vowel_length
3
 
4
 
5
- tsv = 'data/set1.tsv'
6
- ph_key = 'data/key_set1_mfaCapt.tsv'
7
- sources = {'w2v2':'data/set1_w2v2/','mfa':'data/set1_mfa/'}
8
-
9
- dat,vck,kws = vowel_length.setup(tsv,ph_key)
10
- kws = ['ala', 'bala', 'dala', 'hala', 'skjala', 'kala', 'mala', 'smala', 'sala', 'tala', 'vala', 'dvala', 'svala', 'alla', 'falla', 'galla', 'halla', 'fjalla', 'hjalla', 'mjalla', 'spjalla', 'kalla', 'skalla', 'lalla', 'malla', 'palla', 'bralla', 'valla', 'baki', 'maki', 'raki', 'taki', 'vaki', 'þaki', 'baggi', 'maggi', 'raggi', 'leki', 'speki', 'reki', 'breki', 'veki', 'eggi', 'beggi', 'skeggi', 'leggi', 'veggi']
11
 
 
12
 
13
  # runan(w,'l1','w2v2',vck,dat,sources)
14
  # runan(w,'l1','mfa',vck,dat,sources)
15
 
16
 
17
  def manager(word,group,aligner,side):
18
- fig = vowel_length.runan(word,group,aligner,vck,dat,sources) #TODO add colour by plot-side
19
  print(side)
20
  return fig
21
 
@@ -28,10 +26,12 @@ with bl:
28
  # with gr.TabItem("One"):
29
  gr.Markdown(
30
  """
31
- # Length of long and short Icelandic vowels
32
  Choose a word, speaker group, and aligner type. Available speaker groups are native speakers, second-language speakers, or all. Aligner options are Montreal Forced Aligner (MFA) and CTC decoding with Wav2vec-2.0.
33
 
34
- All phoneme durations are measured automatically with no human correction. The purpose of this demo is to evaluate the role of such tools in large-scale phonetic research. Therefore, no measurements shown in this demo should be taken as conclusive without some independent verification. The general expectation is that syllables with long stressed vowels followed by short consonants have a higher vowel:consonant duration ratio, while syllables with short stressed vowels followed by long consonants have a lower vowel:consonant ratio. However, a great many other factors affect the relative duration in any one recorded token. See Pind 1999, 'Speech segment durations and quantity in Icelandic' (J. Acoustical Society of America, 106(2)) for a review of the acoustics of Icelandic vowel duration.
 
 
35
  """
36
  )
37
 
@@ -39,7 +39,7 @@ with bl:
39
  with gr.Column():
40
  wmenu1 = gr.Dropdown(kws,label="Word",value="hala")
41
  lmenu1 = gr.Dropdown(["L1", "L2","All"],label="Speaker group",value="L1")
42
- amenu1 = gr.Dropdown(["MFA", "w2v2"],label="Aligner",value="w2v2")
43
 
44
  btn1 = gr.Button(value="Update Plot 1")
45
 
@@ -50,7 +50,7 @@ with bl:
50
  with gr.Column():
51
  wmenu2 = gr.Dropdown(kws,label="Word",value="halla")
52
  lmenu2 = gr.Dropdown(["L1", "L2","All"],label="Speaker group",value="L1")
53
- amenu2 = gr.Dropdown(["MFA", "w2v2"],label="Aligner",value="w2v2")
54
 
55
  btn2 = gr.Button(value="Update Plot 2")
56
  pl2 = gr.Plot()
 
2
  import vowel_length
3
 
4
 
5
+ meta_tsv = ['data/set1.tsv','data/set2.tsv']
6
+ ph_key = 'data/key_all.tsv'
7
+ align_output = 'data/aligns_csv.pickle'
 
 
 
8
 
9
+ dat,vck,kws,csvs = vowel_length.setup(tsv,ph_key,align_output)
10
 
11
  # runan(w,'l1','w2v2',vck,dat,sources)
12
  # runan(w,'l1','mfa',vck,dat,sources)
13
 
14
 
15
  def manager(word,group,aligner,side):
16
+ fig = vowel_length.runan(word,group,aligner,vck,dat,csvs) #TODO add colour by plot-side
17
  print(side)
18
  return fig
19
 
 
26
  # with gr.TabItem("One"):
27
  gr.Markdown(
28
  """
29
+ # Long and short Icelandic vowels
30
  Choose a word, speaker group, and aligner type. Available speaker groups are native speakers, second-language speakers, or all. Aligner options are Montreal Forced Aligner (MFA) and CTC decoding with Wav2vec-2.0.
31
 
32
+ The general expectation is that syllables with long stressed vowels followed by short consonants have a higher vowel:consonant duration ratio, while syllables with short stressed vowels followed by long consonants have a lower vowel:consonant ratio. However, a great many other factors affect the relative duration in any one recorded token. See Pind 1999, 'Speech segment durations and quantity in Icelandic' (J. Acoustical Society of America, 106(2)) for a review of the acoustics of Icelandic vowel duration.
33
+
34
+ All phoneme durations are measured automatically with no human correction. The purpose of this demo is to evaluate the role of such tools in large-scale phonetic research. Therefore, no measurements shown in this demo should be taken as conclusive without some independent verification.
35
  """
36
  )
37
 
 
39
  with gr.Column():
40
  wmenu1 = gr.Dropdown(kws,label="Word",value="hala")
41
  lmenu1 = gr.Dropdown(["L1", "L2","All"],label="Speaker group",value="L1")
42
+ amenu1 = gr.Dropdown(["MFA", "CTC"],label="Aligner",value="CTC")
43
 
44
  btn1 = gr.Button(value="Update Plot 1")
45
 
 
50
  with gr.Column():
51
  wmenu2 = gr.Dropdown(kws,label="Word",value="halla")
52
  lmenu2 = gr.Dropdown(["L1", "L2","All"],label="Speaker group",value="L1")
53
+ amenu2 = gr.Dropdown(["MFA", "CTC"],label="Aligner",value="CTC")
54
 
55
  btn2 = gr.Button(value="Update Plot 2")
56
  pl2 = gr.Plot()
vowel_length.py CHANGED
@@ -1,4 +1,4 @@
1
- import os
2
  import numpy as np
3
  from collections import defaultdict
4
  import matplotlib
@@ -11,10 +11,10 @@ def getwps(start,end,phones):
11
  return [(p,s,e) for p,s,e in phones if (s>=start) & (e<=end)]
12
 
13
  # read align data from mfa file
14
- def read_mfa(apath):
15
- with open(apath,'r') as handle:
16
- f = handle.read().splitlines()
17
- f = [l.split(',') for l in f]
18
  wlines = [(w,float(s),float(e)) for s,e,w,t,_ in f if t=='words']
19
  plines = [(p,float(s),float(e)) for s,e,p,t,_ in f if t=='phones']
20
  aligns = [(w,s,e,getwps(s,e,plines)) for w,s,e in wlines]
@@ -37,7 +37,7 @@ def read_ph_key(fpath):
37
  f = handle.read().splitlines()
38
  f = [l.split('\t') for l in f[1:]]
39
 
40
- vcdict = {'w2v2': defaultdict(lambda: defaultdict(tuple)), \
41
  'mfa': defaultdict(lambda: defaultdict(tuple))}
42
  for wline in f:
43
  vcdict[wline[1]][wline[0]][wline[2]] = _winfo(wline)
@@ -45,17 +45,17 @@ def read_ph_key(fpath):
45
  return _d2d(vcdict)
46
 
47
 
48
- def get_vc_dur(kwd,atype,dat,vcd,adir):
49
  d = [l for l in dat if kwd in l[-1]]
50
  prlist = []
51
 
52
- if atype=='w2v2': # only one pronunciation-spelling for ctc
53
  pspel = kwd
54
 
55
  for l in d:
56
- apath = f'{adir}{l[2]}/{l[3].split(".")[0]}.csv'
57
- if os.path.exists(apath):
58
- aligns = read_mfa(apath)
59
  aligns = [a for a in aligns if a[0] == kwd]
60
  for al in aligns:
61
  if atype == 'mfa': # get this recording's phone spelling
@@ -74,7 +74,7 @@ def get_vc_dur(kwd,atype,dat,vcd,adir):
74
  # TODO:
75
  # pass word, lang, aln info for fig title;
76
  # pass r/l info for point colour
77
- def displ(prinfo):
78
  rto = np.mean([v/c for v,c in prinfo])
79
  vs = [1000*v for v,c in prinfo]
80
  cs = [1000*c for v,c in prinfo]
@@ -88,7 +88,7 @@ def displ(prinfo):
88
 
89
  plt.xlabel("Vowel length (ms)")
90
  plt.ylabel("Consonant length (ms)")
91
- plt.title(f'TITLE\nRatio: {round(rto,2)}')
92
  return(rto,fig)
93
 
94
 
@@ -96,7 +96,7 @@ def displ(prinfo):
96
  #run analysis
97
  # for keyword, speaker-background, align-source,
98
  # vowel/consonant index key, and dataset
99
- def runan(kwd,spl,aln,vck,dat,sources):
100
  kwd = kwd.lower()
101
  spl = spl.lower()
102
  aln = aln.lower()
@@ -111,26 +111,34 @@ def runan(kwd,spl,aln,vck,dat,sources):
111
  d = [l for l in dat if l[8].lower() == 'icelandic']
112
  if spl == 'l2':
113
  d = [l for l in dat if l[8].lower() != 'icelandic']
114
- prinfo = get_vc_dur(kwd,aln,d,vcd,sources[aln])
115
  if len(prinfo) >5:
116
- r,f = displ(prinfo)
117
  return(f)
118
  else:
119
  return 0
120
  # TODO THIS CASE
121
 
122
 
123
- def setup(meta,phkey):
124
- with open(meta,'r') as handle:
125
- dat = handle.read().splitlines()
126
- dat = [l.split('\t') for l in dat[1:]]
127
- dat = [l[:-1] + [tuple(l[-1].split(' '))] for l in dat]
 
 
 
 
 
 
 
 
 
128
 
129
- kws = [l[-1] for l in dat]
130
- kws = [i for t in kws for i in t]
131
- kws = sorted(list(set(kws)))
132
 
133
- return dat, read_ph_key(phkey), kws
134
 
135
 
136
 
 
1
+ import os, pickle
2
  import numpy as np
3
  from collections import defaultdict
4
  import matplotlib
 
11
  return [(p,s,e) for p,s,e in phones if (s>=start) & (e<=end)]
12
 
13
  # read align data from mfa file
14
+ def read_mfa(afile):
15
+ #with open(apath,'r') as handle:
16
+ # f = handle.read().#splitlines()
17
+ f = [l.split(',') for l in afile.splitlines()]
18
  wlines = [(w,float(s),float(e)) for s,e,w,t,_ in f if t=='words']
19
  plines = [(p,float(s),float(e)) for s,e,p,t,_ in f if t=='phones']
20
  aligns = [(w,s,e,getwps(s,e,plines)) for w,s,e in wlines]
 
37
  f = handle.read().splitlines()
38
  f = [l.split('\t') for l in f[1:]]
39
 
40
+ vcdict = {'ctc': defaultdict(lambda: defaultdict(tuple)), \
41
  'mfa': defaultdict(lambda: defaultdict(tuple))}
42
  for wline in f:
43
  vcdict[wline[1]][wline[0]][wline[2]] = _winfo(wline)
 
45
  return _d2d(vcdict)
46
 
47
 
48
+ def get_vc_dur(kwd,atype,dat,vcd,csvdict):
49
  d = [l for l in dat if kwd in l[-1]]
50
  prlist = []
51
 
52
+ if atype=='ctc': # only one pronunciation-spelling for ctc
53
  pspel = kwd
54
 
55
  for l in d:
56
+ akey = f'{atype}_csv/{l[3].split(".")[0]}.csv'
57
+ if akey in csvdict.keys():#os.path.exists(apath):
58
+ aligns = read_mfa(csvdict[akey])
59
  aligns = [a for a in aligns if a[0] == kwd]
60
  for al in aligns:
61
  if atype == 'mfa': # get this recording's phone spelling
 
74
  # TODO:
75
  # pass word, lang, aln info for fig title;
76
  # pass r/l info for point colour
77
+ def displ(prinfo,kwd):
78
  rto = np.mean([v/c for v,c in prinfo])
79
  vs = [1000*v for v,c in prinfo]
80
  cs = [1000*c for v,c in prinfo]
 
88
 
89
  plt.xlabel("Vowel length (ms)")
90
  plt.ylabel("Consonant length (ms)")
91
+ plt.title(f'{kwd.upper()}\nV/C duration ratio: {round(rto,2)}')
92
  return(rto,fig)
93
 
94
 
 
96
  #run analysis
97
  # for keyword, speaker-background, align-source,
98
  # vowel/consonant index key, and dataset
99
+ def runan(kwd,spl,aln,vck,dat,csvs):
100
  kwd = kwd.lower()
101
  spl = spl.lower()
102
  aln = aln.lower()
 
111
  d = [l for l in dat if l[8].lower() == 'icelandic']
112
  if spl == 'l2':
113
  d = [l for l in dat if l[8].lower() != 'icelandic']
114
+ prinfo = get_vc_dur(kwd,aln,d,vcd,csvs)
115
  if len(prinfo) >5:
116
+ r,f = displ(prinfo,kwd)
117
  return(f)
118
  else:
119
  return 0
120
  # TODO THIS CASE
121
 
122
 
123
+ def setup(metadatas,phkey,align_csvs):
124
+ def _loadr(meta):
125
+ with open(meta,'r') as handle:
126
+ d = handle.read().splitlines()
127
+ d = [l.split('\t') for l in d[1:]]
128
+ d = [l[:-1] + [tuple(l[-1].split(' '))] for l in d]
129
+ return d
130
+ metas = [_loadr(tsv) for tsv in metadatas]
131
+ dat = []
132
+ for tsv in metas:
133
+ dat += tsv
134
+
135
+ vck = read_ph_key(ph_key)
136
+ kws = sorted(list(vck['ctc'].keys()))
137
 
138
+ with open(align_csvs, 'rb') as handle:
139
+ csvs = pickle.load(handle)
 
140
 
141
+ return dat, vck, kws, csvs
142
 
143
 
144