Spaces:
Sleeping
Sleeping
import torch | |
import streamlit as st | |
import transformers | |
from transformers import AutoTokenizer, AutoModel | |
from sklearn.linear_model import LogisticRegression | |
import pickle | |
import time | |
def preprocess_bert(text): | |
start_time = time.time() | |
tokenizer = AutoTokenizer.from_pretrained("cointegrated/LaBSE-en-ru") | |
model = AutoModel.from_pretrained("cointegrated/LaBSE-en-ru") | |
sentences = text | |
encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=500, return_tensors='pt') | |
with torch.no_grad(): | |
model_output = model(**encoded_input) | |
embeddings = model_output.pooler_output | |
embeddings = torch.nn.functional.normalize(embeddings) | |
embeddings = embeddings.detach().cpu().numpy() | |
logreg = LogisticRegression(class_weight = 'balanced') | |
with open('pages/models/linmodel_min.pkl', 'rb') as f: | |
logreg = pickle.load(f) | |
predicted_label = logreg.predict(embeddings) | |
dict = {0:'Bad', 1: 'Neutral', 2:'Good'} | |
predicted_label_text = dict[predicted_label[0]] | |
end_time = time.time() | |
inference_time = end_time - start_time | |
return f'BERT {predicted_label_text} {inference_time} секунд' |