|
from construction_prediction.constants import load_morph |
|
|
|
|
|
morph = load_morph() |
|
|
|
|
|
def filter_results(model, word, target_pos, collocate_pos, topn, restrict_vocab): |
|
collocates = [] |
|
target_word = '_'.join((word, target_pos)) |
|
for coll, similarity in model.similar_by_word(target_word, topn=topn, restrict_vocab=restrict_vocab): |
|
try: |
|
coll_word, pos = coll.split('_') |
|
if pos == collocate_pos: |
|
collocates.append((coll_word, similarity)) |
|
if len(collocates) == topn: |
|
break |
|
except ValueError: |
|
continue |
|
return collocates |
|
|
|
|
|
def get_collocates_for_word_type(model, word, target_pos, topn, restrict_vocab): |
|
collocate_pos = 'NOUN' if target_pos == 'ADJ' else 'ADJ' |
|
|
|
collocates = filter_results(model, word, target_pos, collocate_pos, topn * 100, restrict_vocab) |
|
output = '' |
|
for collocate_with_score in collocates[:topn]: |
|
collocate = collocate_with_score[0] |
|
similarity_score = round(collocate_with_score[1], 3) |
|
noun = word if target_pos == 'NOUN' else collocate |
|
adj = word if target_pos == 'ADJ' else collocate |
|
try: |
|
|
|
adj = morph.parse(adj)[0].inflect({morph.parse(noun)[0].tag.gender}).word |
|
|
|
if not adj[:3] == noun[:3]: |
|
|
|
output += f'\t{adj} {noun}: {similarity_score}\n\n' |
|
except AttributeError: |
|
continue |
|
return output |
|
|