|
import os |
|
import numpy as np |
|
from collections import defaultdict |
|
import matplotlib |
|
matplotlib.use('Agg') |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
|
|
def getwps(start,end,phones): |
|
return [(p,s,e) for p,s,e in phones if (s>=start) & (e<=end)] |
|
|
|
|
|
def read_mfa(apath): |
|
with open(apath,'r') as handle: |
|
f = handle.read().splitlines() |
|
f = [l.split(',') for l in f] |
|
wlines = [(w,float(s),float(e)) for s,e,w,t,_ in f if t=='words'] |
|
plines = [(p,float(s),float(e)) for s,e,p,t,_ in f if t=='phones'] |
|
aligns = [(w,s,e,getwps(s,e,plines)) for w,s,e in wlines] |
|
return aligns |
|
|
|
|
|
|
|
def read_ph_key(fpath): |
|
def _winfo(l): |
|
def _f(n): |
|
return tuple([int(i) if i != 'X' else i for i in n.split(',')]) |
|
return (_f(l[3]),_f(l[4])) |
|
|
|
def _d2d(dic): |
|
if isinstance(dic, dict): |
|
dic = {k: _d2d(v) for k, v in dic.items()} |
|
return dic |
|
|
|
with open(fpath,'r') as handle: |
|
f = handle.read().splitlines() |
|
f = [l.split('\t') for l in f[1:]] |
|
|
|
vcdict = {'w2v2': defaultdict(lambda: defaultdict(tuple)), \ |
|
'mfa': defaultdict(lambda: defaultdict(tuple))} |
|
for wline in f: |
|
vcdict[wline[1]][wline[0]][wline[2]] = _winfo(wline) |
|
|
|
return _d2d(vcdict) |
|
|
|
|
|
def get_vc_dur(kwd,atype,dat,vcd,adir): |
|
d = [l for l in dat if kwd in l[-1]] |
|
prlist = [] |
|
|
|
if atype=='w2v2': |
|
pspel = kwd |
|
|
|
for l in d: |
|
apath = f'{adir}{l[2]}/{l[3].split(".")[0]}.csv' |
|
if os.path.exists(apath): |
|
aligns = read_mfa(apath) |
|
aligns = [a for a in aligns if a[0] == kwd] |
|
for al in aligns: |
|
if atype == 'mfa': |
|
pspel = ' '.join([a for a,s,e in al[3]]) |
|
|
|
vstart = al[3][vcd[pspel][0][0]][1] |
|
vend = al[3][vcd[pspel][0][-1]][2] |
|
cstart = al[3][vcd[pspel][1][0]][1] |
|
cend = al[3][vcd[pspel][1][-1]][2] |
|
vdur = vend-vstart |
|
cdur = cend-cstart |
|
prlist.append(tuple([vdur,cdur])) |
|
return prlist |
|
|
|
|
|
|
|
|
|
|
|
def displ(prinfo): |
|
rto = np.mean([v/c for v,c in prinfo]) |
|
vs = [1000*v for v,c in prinfo] |
|
cs = [1000*c for v,c in prinfo] |
|
|
|
fig = plt.figure(figsize=(6,5)) |
|
|
|
plt.xlim([0.0, max(500,min(max(vs),1000))]) |
|
plt.ylim([0.0, max(500,min(max(cs),1000))]) |
|
plt.scatter(vs,cs) |
|
plt.axline((0,0),slope=1,color="darkgray") |
|
|
|
plt.xlabel("Vowel length (ms)") |
|
plt.ylabel("Consonant length (ms)") |
|
plt.title(f'TITLE\nRatio: {round(rto,2)}') |
|
return(rto,fig) |
|
|
|
|
|
|
|
|
|
|
|
|
|
def runan(kwd,spl,aln,vck,dat,sources): |
|
kwd = kwd.lower() |
|
spl = spl.lower() |
|
aln = aln.lower() |
|
print(kwd,aln,spl,len(dat)) |
|
vcd = vck[aln][kwd] |
|
|
|
if any([ tuple('X') in el for el in vcd.values()]): |
|
|
|
return "EXCLUDED WORD FOR THIS ALIGNMENT TYPE" |
|
else: |
|
if spl == 'l1': |
|
d = [l for l in dat if l[8].lower() == 'icelandic'] |
|
elif spl == 'l2': |
|
d = [l for l in dat if l[8].lower() != 'icelandic'] |
|
|
|
print(len(d),'***') |
|
prinfo = get_vc_dur(kwd,aln,dat,vcd,sources[aln]) |
|
if len(prinfo) >5: |
|
r,f = displ(prinfo) |
|
return(f) |
|
else: |
|
return 0 |
|
|
|
|
|
|
|
def setup(meta,phkey): |
|
with open(meta,'r') as handle: |
|
dat = handle.read().splitlines() |
|
dat = [l.split('\t') for l in dat[1:]] |
|
dat = [l[:-1] + [tuple(l[-1].split(' '))] for l in dat] |
|
|
|
kws = [l[-1] for l in dat] |
|
kws = [i for t in kws for i in t] |
|
kws = sorted(list(set(kws))) |
|
|
|
return dat, read_ph_key(phkey), kws |
|
|
|
|
|
|
|
|