File size: 1,177 Bytes
a487402
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import torch
import streamlit as st
import transformers
from transformers import AutoTokenizer, AutoModel
from sklearn.linear_model import LogisticRegression
import pickle
import time


def preprocess_bert(text):
    start_time = time.time()
    tokenizer = AutoTokenizer.from_pretrained("cointegrated/LaBSE-en-ru")
    model = AutoModel.from_pretrained("cointegrated/LaBSE-en-ru")
    sentences = text
    encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=500, return_tensors='pt')
    with torch.no_grad():
        model_output = model(**encoded_input)
    embeddings = model_output.pooler_output
    embeddings = torch.nn.functional.normalize(embeddings)
    embeddings = embeddings.detach().cpu().numpy()

    logreg = LogisticRegression(class_weight = 'balanced')
    with open('pages/models/linmodel_min.pkl', 'rb') as f:
        logreg = pickle.load(f)
    predicted_label = logreg.predict(embeddings)
    dict = {0:'Bad', 1: 'Neutral', 2:'Good'}
    predicted_label_text = dict[predicted_label[0]]
    end_time = time.time()

    inference_time = end_time - start_time
    return f'BERT {predicted_label_text} {inference_time} секунд'