ladapetrushenko's picture
Upload 9 files
f2b6412
from construction_prediction.constants import load_morph
morph = load_morph()
def filter_results(model, word, target_pos, collocate_pos, topn, restrict_vocab):
collocates = []
target_word = '_'.join((word, target_pos))
for coll, similarity in model.similar_by_word(target_word, topn=topn, restrict_vocab=restrict_vocab):
try:
coll_word, pos = coll.split('_')
if pos == collocate_pos:
collocates.append((coll_word, similarity))
if len(collocates) == topn:
break
except ValueError:
continue
return collocates
def get_collocates_for_word_type(model, word, target_pos, topn, restrict_vocab):
collocate_pos = 'NOUN' if target_pos == 'ADJ' else 'ADJ'
collocates = filter_results(model, word, target_pos, collocate_pos, topn * 100, restrict_vocab)
output = ''
for collocate_with_score in collocates[:topn]:
collocate = collocate_with_score[0]
similarity_score = round(collocate_with_score[1], 3)
noun = word if target_pos == 'NOUN' else collocate
adj = word if target_pos == 'ADJ' else collocate
try:
# Чтобы была конструкция, в которой один элемент склоняется
adj = morph.parse(adj)[0].inflect({morph.parse(noun)[0].tag.gender}).word
# Чтобы исключить результаты типа 'человечный человек'
if not adj[:3] == noun[:3]:
# if noun == 'день' and not 'днев' in adj and not 'недел' in adj:
output += f'\t{adj} {noun}: {similarity_score}\n\n'
except AttributeError:
continue
return output