Spaces:
Sleeping
Sleeping
import numpy as np | |
import torch | |
import streamlit as st | |
from transformers import BertTokenizer | |
from transformers import BertForSequenceClassification | |
from sklearn.preprocessing import LabelEncoder | |
from keras.utils import pad_sequences | |
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler | |
st.markdown("### Hello, world!") | |
st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True) | |
# ^-- можно показывать пользователю текст, картинки, ограниченное подмножество html - всё как в jupyter | |
text = st.text_area("TEXT HERE") | |
# ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент | |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | |
model = BertForSequenceClassification.from_pretrained( | |
"bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab. | |
num_labels = 44,) | |
model.load_state_dict(torch.load("model_last_version.pt", map_location=torch.device('cpu'))) | |
MAX_LEN = 64 | |
tokens = tokenizer.encode_plus(text, add_special_tokens=True, max_length=MAX_LEN, truncation=True, padding='max_length') | |
input_ids = torch.tensor(tokens['input_ids']).unsqueeze(0) | |
attention_mask = torch.tensor(tokens['attention_mask']).unsqueeze(0) | |
logits = model(input_ids, attention_mask)[0] | |
probs = torch.softmax(logits, dim=1) | |
predicted_category = torch.argmax(probs).item() | |
tags_names = ['acc-phys', | |
'adap-org', | |
"adap-org'", | |
'alg-geom', | |
'astro-ph', | |
"astro-ph'", | |
'chao-dyn', | |
'chem-ph', | |
'cmp-lg', | |
"cmp-lg'", | |
'comp-gas', | |
'cond-mat', | |
"cond-mat'", | |
'cs', | |
'dg-ga', | |
'econ', | |
'eess', | |
'funct-an', | |
'gr-qc', | |
"gr-qc'", | |
'hep-ex', | |
"hep-ex'", | |
'hep-lat', | |
"hep-lat'", | |
'hep-ph', | |
"hep-ph'", | |
'hep-th', | |
"hep-th'", | |
'math', | |
'math-ph', | |
'mtrl-th', | |
'nlin', | |
'nucl-ex', | |
'nucl-th', | |
"nucl-th'", | |
'patt-sol', | |
'physics', | |
'q-alg', | |
'q-bio', | |
'q-fin', | |
'quant-ph', | |
"quant-ph'", | |
'solv-int', | |
'stat'] | |
# from transformers import pipeline | |
# pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl") | |
raw_predictions = tags_names[predicted_category]#le.inverse_transform(prediction)#pipe(text) | |
# тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost | |
st.markdown(f"{raw_predictions}") | |
# выводим результаты модели в текстовое поле, на потеху пользователю |