milestone-3 / app.py
nppmatt's picture
Update app.py
1c2d1c1
raw
history blame
1.11 kB
import pandas as pd
import torch
import torch.nn.functional as TF
import streamlit as st
option = st.selectbox("Select a text analysis model:", ("BERT", "Fine-tuned BERT"))
bert_path = "bert-base-uncased"
if (option == "BERT"):
tokenizer = AutoTokenizer.from_pretrained(bert_path)
model = BertForSequenceClassification.from_pretrained(bert_path, num_labels=6)
else:
tweets_raw = pd.read_csv("train.csv", nrows=20)
label_set = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
# Run encoding through model to get classification output.
encoding = tokenizer.encode(txt, return_tensors='pt')
result = model(encoding)
# Transform logit to get probabilities.
if (result.logits.size(dim=1) < 2):
pad = (0, 1)
result.logits = nn.functional.pad(result.logits, pad, "constant", 0)
prediction = nn.functional.softmax(result.logits, dim=-1)
neutralProb = prediction.data[0][neutralIndex]
toxicProb = prediction.data[0][toxicIndex]
# Write results
st.write("Classification Probabilities")
st.write(f"{neutralProb:.4f} - NEUTRAL")
st.write(f"{toxicProb:.4f} - TOXIC")