dp92 commited on
Commit
ea0a463
·
1 Parent(s): 5764658

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -22
app.py CHANGED
@@ -1,29 +1,38 @@
 
 
 
1
 
2
- import pandas as pd
3
- import torch
4
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
 
6
- # Load the pre-trained BERT model and tokenizer
7
- model_name = "bert-base-uncased"
8
- tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=6)
10
 
11
- # Load the data
12
- data = pd.read_csv("toxic_comments.csv")
13
 
14
- # Define the function to preprocess the text
15
- def preprocess(text):
16
- inputs = tokenizer(text, padding=True, truncation=True, max_length=128, return_tensors="pt")
17
- return inputs["input_ids"], inputs["attention_mask"]
18
 
19
- # Define the function to classify a text input
20
- def classify(text):
21
- input_ids, attention_mask = preprocess(text)
22
- with torch.no_grad():
23
- logits = model(input_ids, attention_mask=attention_mask).logits
24
- preds = torch.sigmoid(logits).squeeze().tolist()
25
- return {labels[i]: preds[i] for i in range(len(labels))}
26
 
27
- # Define the labels
28
- labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
 
 
 
 
 
 
 
 
 
 
 
 
29
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ from tensorflow.keras.layers import TextVectorization
4
 
 
 
 
5
 
6
+ # Load the saved model
7
+ model = tf.keras.models.load_model('toxicity.h5')
 
 
8
 
9
+ # Columns names
10
+ columns = ['toxic', 'severe_toxic', 'obscene', 'threat','insult', 'identity_hate']
11
 
12
+ # Specifying the maximum number of words in the vocabulary
13
+ MAX_FEATURES=200000
 
 
14
 
15
+ # Creating a TextVectorization layer with the specified parameters
16
+ vectorizer = TextVectorization(max_tokens=MAX_FEATURES,output_sequence_length=1800,output_mode="int")
 
 
 
 
 
17
 
18
+ # Define a function to score a comment
19
+ def score_comment(comment):
20
+ # Vectorize the comment using the vectorizer
21
+ vectorized_comment = vectorizer([comment])
22
+
23
+ # Get the prediction results from the model
24
+ results = model.predict(vectorized_comment)
25
+
26
+ # Create a string to return the prediction results for each class
27
+ text = ''
28
+ for idx,col in enumerate(columns):
29
+ text += '{}: {}\n'.format(col,results[0][idx]>0.5)
30
+
31
+ return text
32
 
33
+
34
+ # Create a Gradio interface for the score_comment function
35
+ interface = gr.Interface(fn=score_comment,inputs=gr.inputs.Textbox(lines=2,placeholder="Comment to score"),outputs="text",title='Comment Toxicity Classifier')
36
+
37
+ # Launch the Gradio interface
38
+ interface.launch()