Upload BertForJointParsing.py
Browse files- BertForJointParsing.py +12 -8
BertForJointParsing.py
CHANGED
@@ -353,7 +353,6 @@ def convert_output_to_ud(output_sentences, htb_extras=False):
|
|
353 |
word['lex'] = word['seg'][-1]
|
354 |
|
355 |
start = len(intermediate_output)
|
356 |
-
idx_to_key[word_idx] = len(intermediate_output) + 1
|
357 |
# Add in all the prefixes
|
358 |
if len(word['seg']) > 1:
|
359 |
for pre in get_prefixes_from_str(word['seg'][0], greedy=True):
|
@@ -370,6 +369,7 @@ def convert_output_to_ud(output_sentences, htb_extras=False):
|
|
370 |
intermediate_output[-1]['feats'] = 'Definite=Def|PronType=Art'
|
371 |
|
372 |
|
|
|
373 |
# add the main word in!
|
374 |
intermediate_output.append(dict(
|
375 |
word=word['seg'][-1], lex=word['lex'], pos=word['morph']['pos'],
|
@@ -380,9 +380,9 @@ def convert_output_to_ud(output_sentences, htb_extras=False):
|
|
380 |
if word['morph']['suffix']:
|
381 |
# first determine the dependency info:
|
382 |
# For adp, num, det - they main word points to here, and the suffix points to the dependency
|
|
|
383 |
if word['morph']['pos'] in ['ADP', 'NUM', 'DET']:
|
384 |
-
intermediate_output[-1]
|
385 |
-
intermediate_output[-1]['absolute_dep'] = True
|
386 |
intermediate_output[-1]['func'] = 'case'
|
387 |
dep = word['syntax']['dep_head_idx']
|
388 |
func = word['syntax']['dep_func']
|
@@ -409,14 +409,18 @@ def convert_output_to_ud(output_sentences, htb_extras=False):
|
|
409 |
# for htb:
|
410 |
else:
|
411 |
# main word becomes the lexeme, the suffix is based on the features
|
412 |
-
intermediate_output[-1]['word'] = s_lex + '_'
|
413 |
suf_feats = word['morph']['suffix_feats']
|
414 |
-
suf = ud_suffix_to_htb_str.get(f"Gender={suf_feats
|
415 |
# for HTB, if the function is poss, then add a shel pointing to the next word
|
416 |
if func == 'nmod:poss':
|
417 |
-
intermediate_output.append(dict(word='_ืฉื_', lex='ืฉื', pos='ADP', dep=len(intermediate_output) +
|
418 |
# add the main suffix in
|
419 |
intermediate_output.append(dict(word=suf, lex='ืืื', pos='PRON', dep=dep, func=func, feats='|'.join(f'{k}={v}' for k,v in word['morph']['suffix_feats'].items())))
|
|
|
|
|
|
|
|
|
420 |
end = len(intermediate_output)
|
421 |
ranges.append((start, end, word['token']))
|
422 |
|
@@ -458,12 +462,12 @@ def ud_get_prefix_dep(pre, word, word_idx):
|
|
458 |
func = 'mark'
|
459 |
# vuv goes to the main word if the function is in the list, otherwise follows
|
460 |
elif pre == 'ื':
|
461 |
-
does_follow_main = word['syntax']['dep_func'] in ["conj", "acl:recl", "parataxis", "root", "acl", "amod", "list", "appos", "dep", "flatccomp"]
|
462 |
func = 'cc'
|
463 |
else:
|
464 |
# for adj, noun, propn, pron, verb - prefixes go to the main word
|
465 |
if word['morph']['pos'] in ["ADJ", "NOUN", "PROPN", "PRON", "VERB"]:
|
466 |
-
does_follow_main =
|
467 |
# otherwise - prefix follows the word if the function is in the list
|
468 |
else: does_follow_main = word['syntax']['dep_func'] in ["compound:affix", "det", "aux", "nummod", "advmod", "dep", "cop", "mark", "fixed"]
|
469 |
|
|
|
353 |
word['lex'] = word['seg'][-1]
|
354 |
|
355 |
start = len(intermediate_output)
|
|
|
356 |
# Add in all the prefixes
|
357 |
if len(word['seg']) > 1:
|
358 |
for pre in get_prefixes_from_str(word['seg'][0], greedy=True):
|
|
|
369 |
intermediate_output[-1]['feats'] = 'Definite=Def|PronType=Art'
|
370 |
|
371 |
|
372 |
+
idx_to_key[word_idx] = len(intermediate_output) + 1
|
373 |
# add the main word in!
|
374 |
intermediate_output.append(dict(
|
375 |
word=word['seg'][-1], lex=word['lex'], pos=word['morph']['pos'],
|
|
|
380 |
if word['morph']['suffix']:
|
381 |
# first determine the dependency info:
|
382 |
# For adp, num, det - they main word points to here, and the suffix points to the dependency
|
383 |
+
entry_to_assign_suf_dep = None
|
384 |
if word['morph']['pos'] in ['ADP', 'NUM', 'DET']:
|
385 |
+
entry_to_assign_suf_dep = intermediate_output[-1]
|
|
|
386 |
intermediate_output[-1]['func'] = 'case'
|
387 |
dep = word['syntax']['dep_head_idx']
|
388 |
func = word['syntax']['dep_func']
|
|
|
409 |
# for htb:
|
410 |
else:
|
411 |
# main word becomes the lexeme, the suffix is based on the features
|
412 |
+
intermediate_output[-1]['word'] = (s_lex if s_lex != s_word else s_word[:-1]) + '_'
|
413 |
suf_feats = word['morph']['suffix_feats']
|
414 |
+
suf = ud_suffix_to_htb_str.get(f"Gender={suf_feats.get('Gender', 'Fem,Masc')}|Number={suf_feats.get('Number', 'Sing')}|Person={suf_feats.get('Person', '3')}", "_ืืื")
|
415 |
# for HTB, if the function is poss, then add a shel pointing to the next word
|
416 |
if func == 'nmod:poss':
|
417 |
+
intermediate_output.append(dict(word='_ืฉื_', lex='ืฉื', pos='ADP', dep=len(intermediate_output) + 2, func='case', feats='_', absolute_dep=True))
|
418 |
# add the main suffix in
|
419 |
intermediate_output.append(dict(word=suf, lex='ืืื', pos='PRON', dep=dep, func=func, feats='|'.join(f'{k}={v}' for k,v in word['morph']['suffix_feats'].items())))
|
420 |
+
if entry_to_assign_suf_dep:
|
421 |
+
entry_to_assign_suf_dep['dep'] = len(intermediate_output)
|
422 |
+
entry_to_assign_suf_dep['absolute_dep'] = True
|
423 |
+
|
424 |
end = len(intermediate_output)
|
425 |
ranges.append((start, end, word['token']))
|
426 |
|
|
|
462 |
func = 'mark'
|
463 |
# vuv goes to the main word if the function is in the list, otherwise follows
|
464 |
elif pre == 'ื':
|
465 |
+
does_follow_main = word['syntax']['dep_func'] not in ["conj", "acl:recl", "parataxis", "root", "acl", "amod", "list", "appos", "dep", "flatccomp"]
|
466 |
func = 'cc'
|
467 |
else:
|
468 |
# for adj, noun, propn, pron, verb - prefixes go to the main word
|
469 |
if word['morph']['pos'] in ["ADJ", "NOUN", "PROPN", "PRON", "VERB"]:
|
470 |
+
does_follow_main = False
|
471 |
# otherwise - prefix follows the word if the function is in the list
|
472 |
else: does_follow_main = word['syntax']['dep_func'] in ["compound:affix", "det", "aux", "nummod", "advmod", "dep", "cop", "mark", "fixed"]
|
473 |
|