Spaces:
Runtime error
Runtime error
File size: 8,369 Bytes
35f56ba 7749ef6 47ef74f cfa2b70 cd87a42 8389a97 391374c 8389a97 9c5b410 d71bb22 26f6079 cd87a42 26dac8d cd87a42 26f6079 e43f53b 26f6079 2b66ed3 391374c 26f6079 2b66ed3 a2d76c4 2b66ed3 16a37d5 2b66ed3 a2d76c4 2b66ed3 84b6ab2 391374c 84b6ab2 16a37d5 84b6ab2 391374c 26dac8d 26f6079 26dac8d 47ef74f d5b90e7 dff0151 21d64ee dff0151 26f6079 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import streamlit as st
import tensorflow as tf
from transformers import pipeline
from textblob import TextBlob
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
from transformers import BertForMaskedLM
import pandas as pd
# model = BertForMaskedLM.from_pretrained("remi/bertabs-finetuned-extractive-abstractive-summarization")
textIn = st.text_input("Input Text Here:", "I really like the color of your car!")
option = st.selectbox('Which pre-trained model would you like for your sentiment analysis?',('MILESTONE 3', 'Pipeline', 'TextBlob'))
st.write('You selected:', option)
if option == 'MILESTONE 3':
model_name_0 = "Rathgeberj/milestone3_0"
# model_0 = AutoModelForSequenceClassification.from_pretrained(model_name_0)
model_0 = BertForMaskedLM.from_pretrained(model_name_0)
tokenizer_0 = AutoTokenizer.from_pretrained(model_name_0)
classifier_0 = pipeline(task="sentiment-analysis", model=model_0, tokenizer=tokenizer_0)
# model_name_1 = "Rathgeberj/milestone3_1"
# # model_1 = AutoModelForSequenceClassification.from_pretrained(model_name_1)
# model_1 = BertForMaskedLM.from_pretrained(model_name_1)
# tokenizer_1 = AutoTokenizer.from_pretrained(model_name_1)
# classifier_1 = pipeline(task="sentiment-analysis", model=model_1, tokenizer=tokenizer_1)
# model_name_2 = "Rathgeberj/milestone3_2"
# # model_2 = AutoModelForSequenceClassification.from_pretrained(model_name_2)
# model_2 = BertForMaskedLM.from_pretrained(model_name_2)
# tokenizer_2 = AutoTokenizer.from_pretrained(model_name_2)
# classifier_2 = pipeline(task="sentiment-analysis", model=model_2, tokenizer=tokenizer_2)
# model_name_3 = "Rathgeberj/milestone3_3"
# # model_3 = AutoModelForSequenceClassification.from_pretrained(model_name_3)
# model_3 = BertForMaskedLM.from_pretrained(model_name_3)
# tokenizer_3 = AutoTokenizer.from_pretrained(model_name_3)
# classifier_3 = pipeline(task="sentiment-analysis", model=model_3, tokenizer=tokenizer_3)
# model_name_4 = "Rathgeberj/milestone3_4"
# # model_4 = AutoModelForSequenceClassification.from_pretrained(model_name_4)
# model_4 = BertForMaskedLM.from_pretrained(model_name_4)
# tokenizer_4 = AutoTokenizer.from_pretrained(model_name_4)
# classifier_4 = pipeline(task="sentiment-analysis", model=model_4, tokenizer=tokenizer_4)
# model_name_5 = "Rathgeberj/milestone3_5"
# # model_5 = AutoModelForSequenceClassification.from_pretrained(model_name_5)
# model_5 = BertForMaskedLM.from_pretrained(model_name_5)
# tokenizer_5 = AutoTokenizer.from_pretrained(model_name_5)
# classifier_5 = pipeline(task="sentiment-analysis", model=model_5, tokenizer=tokenizer_5)
# models = [model_0, model_1, model_2, model_3, model_4, model_5]
# tokenizers = [tokenizer_0, tokenizer_1, tokenizer_2, tokenizer_3, tokenizer_4, tokenizer_5]
# classifiers = [classifier_0, classifier_1, classifier_2, classifier_3, classifier_4, classifier_5]
X_train = [textIn]
batch_0 = tokenizer_0(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
with torch.no_grad():
outputs = model_0(**batch_0, labels=torch.tensor([1, 0]))
predictions = F.softmax(outputs.logits, dim=1)
labels = torch.argmax(predictions, dim=1)
labels = [model.config.id2label[label_id] for label_id in labels.tolist()]
st.write(predictions['label'])
col = ['Tweet', 'Highest_Toxicity_Class_Overall', 'Score_Overall', 'Highest_Toxicity_Class_Except_Toxic', 'Score_Except_Toxic']
df = pd.DataFrame(columns=col)
pre_populated_tweets = ['Yo bitch Ja Rule is more succesful then youll ever be whats up with you and hating you sad mofuckas...i should bitch slap ur pethedic white faces and get you to kiss my ass you guys sicken me. Ja rule is about pride in da music man. dont diss that shit on him. and nothin is wrong bein like tupac he was a brother too...fuckin white boys get things right next time.',
'If you have a look back at the source, the information I updated was the correct form. I can only guess the source hadnt updated. I shall update the information once again but thank you for your message.',
'I dont anonymously edit articles at all.',
'Thank you for understanding. I think very highly of you and would not revert without discussion.',
'Please do not add nonsense to Wikipedia. Such edits are considered vandalism and quickly undone. If you would like to experiment, please use the sandbox instead. Thank you. -',
'Dear god this site is horrible.',
'I think its crap that the link to roggenbier is to this article. Somebody that knows how to do things should change it.',
'Please stop. If you continue to vandalize Wikipedia, as you did to Homosexuality, you will be blocked from editing.',
'yeah, thanks for reviving the tradition of pissing all over articles because you want to live out your ethnic essentialism. Why let mere facts get into the way of enjoying that.',
'Ive deleted the page , as we have no evidence that you are the person named on that page, and its content goes against Wikipedias policies for the use of user pages.',
]
HTCO = [0]*10
SO = [0]*10
HTCET = [0]*10
SET = [0]*10
# for i in range(10):
# X_train = pre_populated_tweets[i]
# batch = tokenizer_0(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
# with torch.no_grad():
# outputs = model(**batch_0, labels=torch.tensor([1, 0]))
# predictions = F.softmax(outputs.logits, dim=1)
# labels = torch.argmax(predictions, dim=1)
# labels = [model.config.id2label[label_id] for label_id in labels.tolist()]
df = df.assign(Tweet=pre_populated_tweets)
df = df.assign(Highest_Toxicity_Class_Overall=HTCO)
df = df.assign(Score_Overall=SO)
df = df.assign(Highest_Toxicity_Class_Except_Toxic=HTCET)
df = df.assign(Score_Except_Toxic=SET)
st.table(df)
st.write('test2')
if option == 'Pipeline':
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline(task="sentiment-analysis", model=model, tokenizer=tokenizer)
preds = classifier(textIn)
preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
st.write('According to Pipeline, input text is ', preds[0]['label'], ' with a confidence of ', preds[0]['score'])
if option == 'TextBlob':
polarity = TextBlob(textIn).sentiment.polarity
subjectivity = TextBlob(textIn).sentiment.subjectivity
sentiment = ''
if polarity < 0:
sentiment = 'Negative'
elif polarity == 0:
sentiment = 'Neutral'
else:
sentiment = 'Positive'
st.write('According to TextBlob, input text is ', sentiment, ' and a subjectivity score (from 0 being objective to 1 being subjective) of ', subjectivity)
#------------------------------------------------------------------------
# tokens = tokenizer.tokenize(textIn)
# token_ids = tokenizer.convert_tokens_to_ids(tokens)
# input_ids = tokenizer(textIn)
# X_train = [textIn]
# batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
# # batch = torch.tensor(batchbatch["input_ids"])
# with torch.no_grad():
# outputs = model(**batch, labels=torch.tensor([1, 0]))
# predictions = F.softmax(outputs.logits, dim=1)
# labels = torch.argmax(predictions, dim=1)
# labels = [model.config.id2label[label_id] for label_id in labels.tolist()]
# # save_directory = "saved"
# tokenizer.save_pretrained(save_directory)
# model.save_pretrained(save_directory)
# tokenizer = AutoTokenizer.from_pretrained(save_directory)
# model = AutoModelForSequenceClassification.from_pretrained(save_directory)
#------------------------------------------------------------------------ |