snaramirez872 commited on
Commit
f74bf51
·
1 Parent(s): 4fc0ae0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -8,7 +8,7 @@ from sklearn import metrics
8
  from torch.utils.data import Dataset as set, DataLoader as DL
9
  from torch import cuda
10
  import streamlit as st
11
- from transformers import DistilBertTokenizer as DBT, DistilBertModel as DBM
12
 
13
  # Defined variables for later use
14
  MAX_LEN = 128
@@ -16,8 +16,7 @@ TRAIN_BATCH_SIZE = 4
16
  VALID_BATCH_SIZE = 4
17
  LEARNING_RATE = 5e-05
18
 
19
- modName = 'distilbert-base-uncased' # Pre-trained model
20
-
21
 
22
  categories = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'] # Labels
23
 
@@ -44,7 +43,7 @@ new = pd.DataFrame()
44
  new['text'] = data['comment_text']
45
  new['labels'] = data.iloc[:,1].values.tolist()
46
 
47
- tokenizer = DBT.from_pretrained(modName, truncation=True, do_lower_case=True)
48
 
49
  class MultiLabelDataset(set):
50
  def __init__(self, df, tokenizer, max_len):
@@ -102,7 +101,7 @@ for dat in testing_loader:
102
  class DistilBERTClass(TNN.Module):
103
  def __init__(self):
104
  super(DistilBERTClass, self).__init__()
105
- self.l1 = DBM.from_pretrained(modName)
106
  self.pre_classifier = TNN.Linear(768, 768)
107
  self.dropout = TNN.Dropout(0.1)
108
  self.classifier = TNN.Linear(768, 6)
 
8
  from torch.utils.data import Dataset as set, DataLoader as DL
9
  from torch import cuda
10
  import streamlit as st
11
+ from transformers import BertTokenizer as BT, BertModel as BM
12
 
13
  # Defined variables for later use
14
  MAX_LEN = 128
 
16
  VALID_BATCH_SIZE = 4
17
  LEARNING_RATE = 5e-05
18
 
19
+ modName = 'bert-base-uncased' # Pre-trained model
 
20
 
21
  categories = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'] # Labels
22
 
 
43
  new['text'] = data['comment_text']
44
  new['labels'] = data.iloc[:,1].values.tolist()
45
 
46
+ tokenizer = BT.from_pretrained(modName, truncation=True, do_lower_case=True)
47
 
48
  class MultiLabelDataset(set):
49
  def __init__(self, df, tokenizer, max_len):
 
101
  class DistilBERTClass(TNN.Module):
102
  def __init__(self):
103
  super(DistilBERTClass, self).__init__()
104
+ self.l1 = BM.from_pretrained(modName)
105
  self.pre_classifier = TNN.Linear(768, 768)
106
  self.dropout = TNN.Dropout(0.1)
107
  self.classifier = TNN.Linear(768, 6)