gefedya
/

transformer_nlp_ops

Model card Files Files and versions

transformer_nlp_ops / app.py

gefedya's picture

Upload app.py

a120ba0 over 2 years ago

2.41 kB

	import streamlit as st
	from datasets import load_dataset
	import pandas as pd
	import numpy as np
	from transformers import pipeline
	from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel, Trainer, TrainingArguments, LineByLineTextDataset
	import json

	st.markdown("### Here is a sentiment model trained on a slice of a twitter dataset")
	st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True)
	# ^-- можно показывать пользователю текст, картинки, ограниченное подмножество html - всё как в jupyter

	text = st.text_area("Try typing something here! \n You will see how much better our model is compared to the base model. No kidding")
	# ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент

	### Loading and tokenizing data

	data = load_dataset("carblacac/twitter-sentiment-analysis")
	tokenizer = AutoTokenizer.from_pretrained("siebert/sentiment-roberta-large-english")
	dataset = data.map(lambda xs: tokenizer(xs["text"], truncation=True, padding='max_length'))
	dataset = dataset.rename_column("feeling", "labels")

	### Importing existing model

	model = AutoModelForSequenceClassification.from_pretrained("siebert/sentiment-roberta-large-english", num_labels=2)
	# model.to('cpu');

	### Training model

	trainer = Trainer(
	model=model, train_dataset=dataset["train"].shuffle().select(range(10000)),
	eval_dataset = dataset['test'].select(range(5000)),
	args=TrainingArguments(
	output_dir="./my_saved_model", overwrite_output_dir=True,
	num_train_epochs=1, per_device_train_batch_size=4,
	save_steps=10_000, save_total_limit=2),
	)

	trainer.train()


	### Using our new BEAST model to predict the sentiment of uers' entries

	# TODO: add predictions

	model()

	#classifier = pipeline('sentiment-analysis', model="distilbert-base-uncased-finetuned-sst-2-english")
	#raw_predictions = classifier(text)
	# тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost

	st.markdown(f"{raw_predictions}")
	# выводим результаты модели в текстовое поле, на потеху пользователю