File size: 1,111 Bytes
d63d827
d1e02d2
 
c093cb0
 
d63d827
7ccc2c1
d63d827
 
 
 
 
7ccc2c1
d63d827
27817dc
d63d827
27817dc
1c2d1c1
27817dc
d63d827
 
 
b71697e
d63d827
 
 
 
 
 
 
b71697e
d63d827
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pandas as pd
import torch
import torch.nn.functional as TF
import streamlit as st

option = st.selectbox("Select a text analysis model:", ("BERT", "Fine-tuned BERT"))

bert_path = "bert-base-uncased"
if (option == "BERT"):
    
    tokenizer = AutoTokenizer.from_pretrained(bert_path)
    model = BertForSequenceClassification.from_pretrained(bert_path, num_labels=6)
else:
    

tweets_raw = pd.read_csv("train.csv", nrows=20)

label_set = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]

# Run encoding through model to get classification output.
encoding = tokenizer.encode(txt, return_tensors='pt')
result = model(encoding)

# Transform logit to get probabilities.
if (result.logits.size(dim=1) < 2):
    pad = (0, 1)
    result.logits = nn.functional.pad(result.logits, pad, "constant", 0)
prediction = nn.functional.softmax(result.logits, dim=-1)
neutralProb = prediction.data[0][neutralIndex]
toxicProb = prediction.data[0][toxicIndex]

# Write results
st.write("Classification Probabilities")
st.write(f"{neutralProb:.4f} - NEUTRAL")
st.write(f"{toxicProb:.4f} - TOXIC")