gefedya
/

transformer_nlp_ops

Model card Files Files and versions Community

gefedya commited on Apr 17, 2023

Commit

a120ba0

·

1 Parent(s): a6984b6

Upload app.py

Files changed (1) hide show

app.py +53 -0

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import streamlit as st
+from datasets import load_dataset
+import pandas as pd
+import numpy as np
+from transformers import pipeline
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel, Trainer, TrainingArguments, LineByLineTextDataset
+import json
+st.markdown("### Here is a sentiment model trained on a slice of a twitter dataset")
+st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True)
+# ^-- можно показывать пользователю текст, картинки, ограниченное подмножество html - всё как в jupyter
+text = st.text_area("Try typing something here! \n You will see how much better our model is compared to the base model. No kidding")
+# ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент
+### Loading and tokenizing data
+data = load_dataset("carblacac/twitter-sentiment-analysis")
+tokenizer = AutoTokenizer.from_pretrained("siebert/sentiment-roberta-large-english")
+dataset = data.map(lambda xs: tokenizer(xs["text"], truncation=True, padding='max_length'))
+dataset = dataset.rename_column("feeling", "labels")
+### Importing existing model
+model = AutoModelForSequenceClassification.from_pretrained("siebert/sentiment-roberta-large-english", num_labels=2)
+# model.to('cpu');
+### Training model
+trainer = Trainer(
+    model=model, train_dataset=dataset["train"].shuffle().select(range(10000)),
+    eval_dataset = dataset['test'].select(range(5000)),
+    args=TrainingArguments(
+        output_dir="./my_saved_model", overwrite_output_dir=True,
+        num_train_epochs=1, per_device_train_batch_size=4,
+        save_steps=10_000, save_total_limit=2),
+)
+trainer.train()
+### Using our new BEAST model to predict the sentiment of uers' entries
+# TODO: add predictions
+model()
+#classifier = pipeline('sentiment-analysis', model="distilbert-base-uncased-finetuned-sst-2-english")
+#raw_predictions = classifier(text)
+# тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
+st.markdown(f"{raw_predictions}")
+# выводим результаты модели в текстовое поле, на потеху пользователю