File size: 1,408 Bytes
2afb07e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import json
import numpy as np

from transformers import BertTokenizer
from rank_bm25 import BM25Okapi
import gradio as gr

tokenizer = BertTokenizer.from_pretrained("DeepPavlov/rubert-base-cased")

f = open('budu_search_syn_database.json')

database = json.load(f)

b25corpus = [x for x in database.values()]
b25local_names = [x for x in database.keys()]
bm25 = BM25Okapi(corpus=b25corpus)

def predict_bm25(service):
    tokenized_query = tokenizer.tokenize(service.lower())

    doc_scores = bm25.get_scores(tokenized_query)
    sorted_doc_indices = doc_scores.argsort()[::-1]
                        
    sorted_local_names = np.array([b25local_names[i] for i in sorted_doc_indices])
    scores = doc_scores[sorted_doc_indices]
    scores_filtered = np.argwhere(scores>0).reshape(-1)
    filtered_local_names = sorted_local_names[scores_filtered.tolist()].tolist()
    return filtered_local_names

demo = gr.Interface(fn=predict_bm25,inputs=gr.components.Textbox(label='Запрос пользователя'),
                    outputs=[gr.components.Textbox(label='Рекомендованные услуги')],
                    examples=[
                        ['ферритин'],
                        ['кальций'],
                        ['железо'],
                        ['прием']])

if __name__ == "__main__":
    demo.launch()