Spaces:

clr
/

phonalign

Sleeping

App Files Files Community

clr commited on Feb 7, 2023

Commit

d5354f5

•

1 Parent(s): 09397d4

Upload 2 files

Browse files

Files changed (2) hide show

app.py +11 -11
vowel_length.py +33 -25

app.py CHANGED Viewed

@@ -2,20 +2,18 @@ import gradio as gr
 import vowel_length
-tsv = 'data/set1.tsv'
-ph_key = 'data/key_set1_mfaCapt.tsv'
-sources = {'w2v2':'data/set1_w2v2/','mfa':'data/set1_mfa/'}
-dat,vck,kws = vowel_length.setup(tsv,ph_key)
-kws =  ['ala', 'bala', 'dala', 'hala', 'skjala', 'kala', 'mala', 'smala', 'sala', 'tala', 'vala', 'dvala', 'svala', 'alla', 'falla', 'galla', 'halla', 'fjalla', 'hjalla', 'mjalla', 'spjalla', 'kalla', 'skalla', 'lalla', 'malla', 'palla', 'bralla', 'valla', 'baki', 'maki', 'raki', 'taki', 'vaki', 'þaki', 'baggi', 'maggi', 'raggi', 'leki', 'speki', 'reki', 'breki', 'veki', 'eggi', 'beggi', 'skeggi', 'leggi', 'veggi']
 #    runan(w,'l1','w2v2',vck,dat,sources)
 #    runan(w,'l1','mfa',vck,dat,sources)
 def manager(word,group,aligner,side):
-    fig = vowel_length.runan(word,group,aligner,vck,dat,sources) #TODO add colour by plot-side
     print(side)
     return fig
@@ -28,10 +26,12 @@ with bl:
     #    with gr.TabItem("One"):
             gr.Markdown(
             """
-            # Length of long and short Icelandic vowels
             Choose a word, speaker group, and aligner type. Available speaker groups are native speakers, second-language speakers, or all. Aligner options are Montreal Forced Aligner (MFA) and CTC decoding with Wav2vec-2.0.
-            All phoneme durations are measured automatically with no human correction. The purpose of this demo is to evaluate the role of such tools in large-scale phonetic research. Therefore, no measurements shown in this demo should be taken as conclusive without some independent verification. The general expectation is that syllables with long stressed vowels followed by short consonants have a higher vowel:consonant duration ratio, while syllables with short stressed vowels followed by long consonants have a lower vowel:consonant ratio. However, a great many other factors affect the relative duration in any one recorded token. See Pind 1999, 'Speech segment durations and quantity in Icelandic' (J. Acoustical Society of America, 106(2)) for a review of the acoustics of Icelandic vowel duration.
             """
                 )
@@ -39,7 +39,7 @@ with bl:
                 with gr.Column():
                     wmenu1 = gr.Dropdown(kws,label="Word",value="hala")
                     lmenu1 = gr.Dropdown(["L1", "L2","All"],label="Speaker group",value="L1")
-                    amenu1 = gr.Dropdown(["MFA", "w2v2"],label="Aligner",value="w2v2")
                     btn1 = gr.Button(value="Update Plot 1")
@@ -50,7 +50,7 @@ with bl:
                 with gr.Column():
                     wmenu2 = gr.Dropdown(kws,label="Word",value="halla")
                     lmenu2 = gr.Dropdown(["L1", "L2","All"],label="Speaker group",value="L1")
-                    amenu2 = gr.Dropdown(["MFA", "w2v2"],label="Aligner",value="w2v2")
                     btn2 = gr.Button(value="Update Plot 2")
                     pl2 = gr.Plot()

 import vowel_length
+meta_tsv = ['data/set1.tsv','data/set2.tsv']
+ph_key = 'data/key_all.tsv'
+align_output = 'data/aligns_csv.pickle'
+dat,vck,kws,csvs = vowel_length.setup(tsv,ph_key,align_output)
 #    runan(w,'l1','w2v2',vck,dat,sources)
 #    runan(w,'l1','mfa',vck,dat,sources)
 def manager(word,group,aligner,side):
+    fig = vowel_length.runan(word,group,aligner,vck,dat,csvs) #TODO add colour by plot-side
     print(side)
     return fig
     #    with gr.TabItem("One"):
             gr.Markdown(
             """
+            # Long and short Icelandic vowels
             Choose a word, speaker group, and aligner type. Available speaker groups are native speakers, second-language speakers, or all. Aligner options are Montreal Forced Aligner (MFA) and CTC decoding with Wav2vec-2.0.
+    The general expectation is that syllables with long stressed vowels followed by short consonants have a higher vowel:consonant duration ratio, while syllables with short stressed vowels followed by long consonants have a lower vowel:consonant ratio. However, a great many other factors affect the relative duration in any one recorded token. See Pind 1999, 'Speech segment durations and quantity in Icelandic' (J. Acoustical Society of America, 106(2)) for a review of the acoustics of Icelandic vowel duration.
+ All phoneme durations are measured automatically with no human correction. The purpose of this demo is to evaluate the role of such tools in large-scale phonetic research. Therefore, no measurements shown in this demo should be taken as conclusive without some independent verification.
             """
                 )
                 with gr.Column():
                     wmenu1 = gr.Dropdown(kws,label="Word",value="hala")
                     lmenu1 = gr.Dropdown(["L1", "L2","All"],label="Speaker group",value="L1")
+                    amenu1 = gr.Dropdown(["MFA", "CTC"],label="Aligner",value="CTC")
                     btn1 = gr.Button(value="Update Plot 1")
                 with gr.Column():
                     wmenu2 = gr.Dropdown(kws,label="Word",value="halla")
                     lmenu2 = gr.Dropdown(["L1", "L2","All"],label="Speaker group",value="L1")
+                    amenu2 = gr.Dropdown(["MFA", "CTC"],label="Aligner",value="CTC")
                     btn2 = gr.Button(value="Update Plot 2")
                     pl2 = gr.Plot()

vowel_length.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os
 import numpy as np
 from collections import defaultdict
 import matplotlib
@@ -11,10 +11,10 @@ def getwps(start,end,phones):
     return [(p,s,e) for p,s,e in phones if (s>=start) & (e<=end)]
 # read align data from mfa file
-def read_mfa(apath):
-    with open(apath,'r') as handle:
-        f = handle.read().splitlines()
-    f = [l.split(',') for l in f]
     wlines = [(w,float(s),float(e)) for s,e,w,t,_ in f if t=='words']
     plines = [(p,float(s),float(e)) for s,e,p,t,_ in f if t=='phones']
     aligns = [(w,s,e,getwps(s,e,plines)) for w,s,e in wlines]
@@ -37,7 +37,7 @@ def read_ph_key(fpath):
         f = handle.read().splitlines()
     f = [l.split('\t') for l in f[1:]]
-    vcdict = {'w2v2': defaultdict(lambda: defaultdict(tuple)), \
                   'mfa': defaultdict(lambda: defaultdict(tuple))}
     for wline in f:
         vcdict[wline[1]][wline[0]][wline[2]] = _winfo(wline)
@@ -45,17 +45,17 @@ def read_ph_key(fpath):
     return _d2d(vcdict)
-def get_vc_dur(kwd,atype,dat,vcd,adir):
     d = [l for l in dat if kwd in l[-1]]
     prlist = []
-    if atype=='w2v2': # only one pronunciation-spelling for ctc
         pspel = kwd
     for l in d:
-        apath = f'{adir}{l[2]}/{l[3].split(".")[0]}.csv'
-        if os.path.exists(apath):
-            aligns = read_mfa(apath)
             aligns = [a for a in aligns if a[0] == kwd]
             for al in aligns:
                 if atype == 'mfa': # get this recording's phone spelling
@@ -74,7 +74,7 @@ def get_vc_dur(kwd,atype,dat,vcd,adir):
 # TODO:
 # pass word, lang, aln info for fig title;
 # pass r/l info for point colour
-def displ(prinfo):
     rto = np.mean([v/c for v,c in prinfo])
     vs = [1000*v for v,c in prinfo]
     cs = [1000*c for v,c in prinfo]
@@ -88,7 +88,7 @@ def displ(prinfo):
     plt.xlabel("Vowel length (ms)")
     plt.ylabel("Consonant length (ms)")
-    plt.title(f'TITLE\nRatio: {round(rto,2)}')
     return(rto,fig)
@@ -96,7 +96,7 @@ def displ(prinfo):
 #run analysis
 # for keyword, speaker-background, align-source,
 # vowel/consonant index key, and dataset
-def runan(kwd,spl,aln,vck,dat,sources):
     kwd = kwd.lower()
     spl = spl.lower()
     aln = aln.lower()
@@ -111,26 +111,34 @@ def runan(kwd,spl,aln,vck,dat,sources):
         d = [l for l in dat if l[8].lower() == 'icelandic']
     if spl == 'l2':
         d = [l for l in dat if l[8].lower() != 'icelandic']
-    prinfo = get_vc_dur(kwd,aln,d,vcd,sources[aln])
     if len(prinfo) >5:
-        r,f = displ(prinfo)
         return(f)
     else:
         return 0
     # TODO THIS CASE
-def setup(meta,phkey):
-    with open(meta,'r') as handle:
-        dat = handle.read().splitlines()
-    dat = [l.split('\t') for l in dat[1:]]
-    dat = [l[:-1] + [tuple(l[-1].split(' '))] for l in dat]
-    kws = [l[-1] for l in dat]
-    kws = [i for t in kws for i in t]
-    kws = sorted(list(set(kws)))
-    return dat, read_ph_key(phkey), kws

+import os, pickle
 import numpy as np
 from collections import defaultdict
 import matplotlib
     return [(p,s,e) for p,s,e in phones if (s>=start) & (e<=end)]
 # read align data from mfa file
+def read_mfa(afile):
+    #with open(apath,'r') as handle:
+       # f = handle.read().#splitlines()
+    f = [l.split(',') for l in afile.splitlines()]
     wlines = [(w,float(s),float(e)) for s,e,w,t,_ in f if t=='words']
     plines = [(p,float(s),float(e)) for s,e,p,t,_ in f if t=='phones']
     aligns = [(w,s,e,getwps(s,e,plines)) for w,s,e in wlines]
         f = handle.read().splitlines()
     f = [l.split('\t') for l in f[1:]]
+    vcdict = {'ctc': defaultdict(lambda: defaultdict(tuple)), \
                   'mfa': defaultdict(lambda: defaultdict(tuple))}
     for wline in f:
         vcdict[wline[1]][wline[0]][wline[2]] = _winfo(wline)
     return _d2d(vcdict)
+def get_vc_dur(kwd,atype,dat,vcd,csvdict):
     d = [l for l in dat if kwd in l[-1]]
     prlist = []
+    if atype=='ctc': # only one pronunciation-spelling for ctc
         pspel = kwd
     for l in d:
+        akey = f'{atype}_csv/{l[3].split(".")[0]}.csv'
+        if akey in csvdict.keys():#os.path.exists(apath):
+            aligns = read_mfa(csvdict[akey])
             aligns = [a for a in aligns if a[0] == kwd]
             for al in aligns:
                 if atype == 'mfa': # get this recording's phone spelling
 # TODO:
 # pass word, lang, aln info for fig title;
 # pass r/l info for point colour
+def displ(prinfo,kwd):
     rto = np.mean([v/c for v,c in prinfo])
     vs = [1000*v for v,c in prinfo]
     cs = [1000*c for v,c in prinfo]
     plt.xlabel("Vowel length (ms)")
     plt.ylabel("Consonant length (ms)")
+    plt.title(f'{kwd.upper()}\nV/C duration ratio: {round(rto,2)}')
     return(rto,fig)
 #run analysis
 # for keyword, speaker-background, align-source,
 # vowel/consonant index key, and dataset
+def runan(kwd,spl,aln,vck,dat,csvs):
     kwd = kwd.lower()
     spl = spl.lower()
     aln = aln.lower()
         d = [l for l in dat if l[8].lower() == 'icelandic']
     if spl == 'l2':
         d = [l for l in dat if l[8].lower() != 'icelandic']
+    prinfo = get_vc_dur(kwd,aln,d,vcd,csvs)
     if len(prinfo) >5:
+        r,f = displ(prinfo,kwd)
         return(f)
     else:
         return 0
     # TODO THIS CASE
+def setup(metadatas,phkey,align_csvs):
+    def _loadr(meta):
+        with open(meta,'r') as handle:
+            d = handle.read().splitlines()
+        d = [l.split('\t') for l in d[1:]]
+        d = [l[:-1] + [tuple(l[-1].split(' '))] for l in d]
+        return d
+    metas = [_loadr(tsv) for tsv in metadatas]
+    dat = []
+    for tsv in metas:
+        dat +=  tsv
+    vck = read_ph_key(ph_key)
+    kws = sorted(list(vck['ctc'].keys()))
+    with open(align_csvs, 'rb') as handle:
+        csvs = pickle.load(handle)
+    return dat, vck, kws, csvs