Commit
·
3eb6342
1
Parent(s):
bcdd2d8
algorithm improved
Browse files
ud.py
CHANGED
@@ -86,6 +86,20 @@ class UniversalDependenciesPipeline(TokenClassificationPipeline):
|
|
86 |
v[i-1]=(v[i-1][0],v.pop(i)[1])
|
87 |
q.pop(i)
|
88 |
t=model_outputs["sentence"].replace("\n"," ")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
u="# text = "+t+"\n"
|
90 |
for i,(s,e) in enumerate(v):
|
91 |
u+="\t".join([str(i+1),t[s:e],"_",q[i][0].replace(".",""),"_","_" if len(q[i])<3 else "|".join(q[i][1:-1]),str(0 if h[i]==i else h[i]+1),q[i][-1][1:-1],"_","_" if i+1<len(v) and e<v[i+1][0] else "SpaceAfter=No"])+"\n"
|
|
|
86 |
v[i-1]=(v[i-1][0],v.pop(i)[1])
|
87 |
q.pop(i)
|
88 |
t=model_outputs["sentence"].replace("\n"," ")
|
89 |
+
for i,(s,e) in reversed(list(enumerate(v))):
|
90 |
+
d=t[s:e]
|
91 |
+
j=len(d)-len(d.lstrip())
|
92 |
+
if j>0:
|
93 |
+
d=d.lstrip()
|
94 |
+
v[i]=(v[i][0]+j,v[i][1])
|
95 |
+
j=len(d)-len(d.rstrip())
|
96 |
+
if j>0:
|
97 |
+
d=d.rstrip()
|
98 |
+
v[i]=(v[i][0],v[i][1]-j)
|
99 |
+
if d.strip()=="":
|
100 |
+
h=[b if i>b else b-1 for a,b in enumerate(h) if i!=a]
|
101 |
+
v.pop(i)
|
102 |
+
q.pop(i)
|
103 |
u="# text = "+t+"\n"
|
104 |
for i,(s,e) in enumerate(v):
|
105 |
u+="\t".join([str(i+1),t[s:e],"_",q[i][0].replace(".",""),"_","_" if len(q[i])<3 else "|".join(q[i][1:-1]),str(0 if h[i]==i else h[i]+1),q[i][-1][1:-1],"_","_" if i+1<len(v) and e<v[i+1][0] else "SpaceAfter=No"])+"\n"
|