KoichiYasuoka commited on
Commit
3eb6342
·
1 Parent(s): bcdd2d8

algorithm improved

Browse files
Files changed (1) hide show
  1. ud.py +14 -0
ud.py CHANGED
@@ -86,6 +86,20 @@ class UniversalDependenciesPipeline(TokenClassificationPipeline):
86
  v[i-1]=(v[i-1][0],v.pop(i)[1])
87
  q.pop(i)
88
  t=model_outputs["sentence"].replace("\n"," ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  u="# text = "+t+"\n"
90
  for i,(s,e) in enumerate(v):
91
  u+="\t".join([str(i+1),t[s:e],"_",q[i][0].replace(".",""),"_","_" if len(q[i])<3 else "|".join(q[i][1:-1]),str(0 if h[i]==i else h[i]+1),q[i][-1][1:-1],"_","_" if i+1<len(v) and e<v[i+1][0] else "SpaceAfter=No"])+"\n"
 
86
  v[i-1]=(v[i-1][0],v.pop(i)[1])
87
  q.pop(i)
88
  t=model_outputs["sentence"].replace("\n"," ")
89
+ for i,(s,e) in reversed(list(enumerate(v))):
90
+ d=t[s:e]
91
+ j=len(d)-len(d.lstrip())
92
+ if j>0:
93
+ d=d.lstrip()
94
+ v[i]=(v[i][0]+j,v[i][1])
95
+ j=len(d)-len(d.rstrip())
96
+ if j>0:
97
+ d=d.rstrip()
98
+ v[i]=(v[i][0],v[i][1]-j)
99
+ if d.strip()=="":
100
+ h=[b if i>b else b-1 for a,b in enumerate(h) if i!=a]
101
+ v.pop(i)
102
+ q.pop(i)
103
  u="# text = "+t+"\n"
104
  for i,(s,e) in enumerate(v):
105
  u+="\t".join([str(i+1),t[s:e],"_",q[i][0].replace(".",""),"_","_" if len(q[i])<3 else "|".join(q[i][1:-1]),str(0 if h[i]==i else h[i]+1),q[i][-1][1:-1],"_","_" if i+1<len(v) and e<v[i+1][0] else "SpaceAfter=No"])+"\n"