File size: 1,255 Bytes
019c64d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
import torch
import streamlit as st
from transformers import AutoTokenizer, AutoModel
from sklearn.linear_model import LogisticRegression
import joblib
from time import time
dict = {0: 'Нейтральный', 1: 'Положительный', 2: 'Отрицательный'}
def preprocess_bert(text):
start_time = time()
tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
t = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
with torch.no_grad():
model_output = model(**{k: v.to(model.device) for k, v in t.items()})
embeddings = model_output.last_hidden_state[:, 0, :]
embeddings = torch.nn.functional.normalize(embeddings)
embeddings = embeddings.detach().cpu().numpy()
lr = LogisticRegression()
lr = joblib.load('model/lr_weights.pkl')
# with open('model/lr_weights.pkl', 'rb') as f:
# lr = pickle.load(f)
predicted_label = lr.predict(embeddings)
predicted_label_text = dict[predicted_label[0]]
end_time = time()
inference_time = end_time - start_time
return f"***{predicted_label_text}***, время предсказания: ***{inference_time:.4f} сек***." |