import torch import streamlit as st import transformers from transformers import AutoTokenizer, AutoModel from sklearn.linear_model import LogisticRegression import pickle import time def preprocess_bert(text): start_time = time.time() tokenizer = AutoTokenizer.from_pretrained("cointegrated/LaBSE-en-ru") model = AutoModel.from_pretrained("cointegrated/LaBSE-en-ru") sentences = text encoded_input = tokenizer(sentences, padding=True, truncation=True, max_length=500, return_tensors='pt') with torch.no_grad(): model_output = model(**encoded_input) embeddings = model_output.pooler_output embeddings = torch.nn.functional.normalize(embeddings) embeddings = embeddings.detach().cpu().numpy() logreg = LogisticRegression(class_weight = 'balanced') with open('pages/models/linmodel_min.pkl', 'rb') as f: logreg = pickle.load(f) predicted_label = logreg.predict(embeddings) dict = {0:'Bad', 1: 'Neutral', 2:'Good'} predicted_label_text = dict[predicted_label[0]] end_time = time.time() inference_time = end_time - start_time return f'BERT {predicted_label_text} {inference_time} секунд'