Spaces:
Sleeping
Sleeping
File size: 1,177 Bytes
a487402 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import torch
import streamlit as st
import transformers
from transformers import AutoTokenizer, AutoModel
from sklearn.linear_model import LogisticRegression
import pickle
import time
def preprocess_bert(text):
start_time = time.time()
tokenizer = AutoTokenizer.from_pretrained("cointegrated/LaBSE-en-ru")
model = AutoModel.from_pretrained("cointegrated/LaBSE-en-ru")
sentences = text
encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=500, return_tensors='pt')
with torch.no_grad():
model_output = model(**encoded_input)
embeddings = model_output.pooler_output
embeddings = torch.nn.functional.normalize(embeddings)
embeddings = embeddings.detach().cpu().numpy()
logreg = LogisticRegression(class_weight = 'balanced')
with open('pages/models/linmodel_min.pkl', 'rb') as f:
logreg = pickle.load(f)
predicted_label = logreg.predict(embeddings)
dict = {0:'Bad', 1: 'Neutral', 2:'Good'}
predicted_label_text = dict[predicted_label[0]]
end_time = time.time()
inference_time = end_time - start_time
return f'BERT {predicted_label_text} {inference_time} секунд' |