gefedya's picture
Upload app.py
a120ba0
raw
history blame
2.41 kB
import streamlit as st
from datasets import load_dataset
import pandas as pd
import numpy as np
from transformers import pipeline
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel, Trainer, TrainingArguments, LineByLineTextDataset
import json
st.markdown("### Here is a sentiment model trained on a slice of a twitter dataset")
st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True)
# ^-- можно показывать пользователю текст, картинки, ограниченное подмножество html - всё как в jupyter
text = st.text_area("Try typing something here! \n You will see how much better our model is compared to the base model. No kidding")
# ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент
### Loading and tokenizing data
data = load_dataset("carblacac/twitter-sentiment-analysis")
tokenizer = AutoTokenizer.from_pretrained("siebert/sentiment-roberta-large-english")
dataset = data.map(lambda xs: tokenizer(xs["text"], truncation=True, padding='max_length'))
dataset = dataset.rename_column("feeling", "labels")
### Importing existing model
model = AutoModelForSequenceClassification.from_pretrained("siebert/sentiment-roberta-large-english", num_labels=2)
# model.to('cpu');
### Training model
trainer = Trainer(
model=model, train_dataset=dataset["train"].shuffle().select(range(10000)),
eval_dataset = dataset['test'].select(range(5000)),
args=TrainingArguments(
output_dir="./my_saved_model", overwrite_output_dir=True,
num_train_epochs=1, per_device_train_batch_size=4,
save_steps=10_000, save_total_limit=2),
)
trainer.train()
### Using our new BEAST model to predict the sentiment of uers' entries
# TODO: add predictions
model()
#classifier = pipeline('sentiment-analysis', model="distilbert-base-uncased-finetuned-sst-2-english")
#raw_predictions = classifier(text)
# тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
st.markdown(f"{raw_predictions}")
# выводим результаты модели в текстовое поле, на потеху пользователю