|
import torch |
|
import streamlit as st |
|
from transformers import AutoTokenizer, AutoModel |
|
from sklearn.linear_model import LogisticRegression |
|
import joblib |
|
from time import time |
|
|
|
dict = {0: 'Нейтральный', 1: 'Положительный', 2: 'Отрицательный'} |
|
def preprocess_bert(text): |
|
start_time = time() |
|
tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2") |
|
model = AutoModel.from_pretrained("cointegrated/rubert-tiny2") |
|
t = tokenizer(text, padding=True, truncation=True, return_tensors='pt') |
|
with torch.no_grad(): |
|
model_output = model(**{k: v.to(model.device) for k, v in t.items()}) |
|
embeddings = model_output.last_hidden_state[:, 0, :] |
|
embeddings = torch.nn.functional.normalize(embeddings) |
|
embeddings = embeddings.detach().cpu().numpy() |
|
|
|
lr = LogisticRegression() |
|
lr = joblib.load('model/lr_weights.pkl') |
|
|
|
|
|
predicted_label = lr.predict(embeddings) |
|
predicted_label_text = dict[predicted_label[0]] |
|
end_time = time() |
|
|
|
inference_time = end_time - start_time |
|
return f"***{predicted_label_text}***, время предсказания: ***{inference_time:.4f} сек***." |