gefedya commited on
Commit
a120ba0
·
1 Parent(s): a6984b6

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from datasets import load_dataset
3
+ import pandas as pd
4
+ import numpy as np
5
+ from transformers import pipeline
6
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel, Trainer, TrainingArguments, LineByLineTextDataset
7
+ import json
8
+
9
+ st.markdown("### Here is a sentiment model trained on a slice of a twitter dataset")
10
+ st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True)
11
+ # ^-- можно показывать пользователю текст, картинки, ограниченное подмножество html - всё как в jupyter
12
+
13
+ text = st.text_area("Try typing something here! \n You will see how much better our model is compared to the base model. No kidding")
14
+ # ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент
15
+
16
+ ### Loading and tokenizing data
17
+
18
+ data = load_dataset("carblacac/twitter-sentiment-analysis")
19
+ tokenizer = AutoTokenizer.from_pretrained("siebert/sentiment-roberta-large-english")
20
+ dataset = data.map(lambda xs: tokenizer(xs["text"], truncation=True, padding='max_length'))
21
+ dataset = dataset.rename_column("feeling", "labels")
22
+
23
+ ### Importing existing model
24
+
25
+ model = AutoModelForSequenceClassification.from_pretrained("siebert/sentiment-roberta-large-english", num_labels=2)
26
+ # model.to('cpu');
27
+
28
+ ### Training model
29
+
30
+ trainer = Trainer(
31
+ model=model, train_dataset=dataset["train"].shuffle().select(range(10000)),
32
+ eval_dataset = dataset['test'].select(range(5000)),
33
+ args=TrainingArguments(
34
+ output_dir="./my_saved_model", overwrite_output_dir=True,
35
+ num_train_epochs=1, per_device_train_batch_size=4,
36
+ save_steps=10_000, save_total_limit=2),
37
+ )
38
+
39
+ trainer.train()
40
+
41
+
42
+ ### Using our new BEAST model to predict the sentiment of uers' entries
43
+
44
+ # TODO: add predictions
45
+
46
+ model()
47
+
48
+ #classifier = pipeline('sentiment-analysis', model="distilbert-base-uncased-finetuned-sst-2-english")
49
+ #raw_predictions = classifier(text)
50
+ # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
51
+
52
+ st.markdown(f"{raw_predictions}")
53
+ # выводим результаты модели в текстовое поле, на потеху пользователю