Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,29 +1,38 @@
|
|
|
|
|
|
|
|
1 |
|
2 |
-
import pandas as pd
|
3 |
-
import torch
|
4 |
-
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
5 |
|
6 |
-
# Load the
|
7 |
-
|
8 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
9 |
-
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=6)
|
10 |
|
11 |
-
#
|
12 |
-
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
inputs = tokenizer(text, padding=True, truncation=True, max_length=128, return_tensors="pt")
|
17 |
-
return inputs["input_ids"], inputs["attention_mask"]
|
18 |
|
19 |
-
#
|
20 |
-
|
21 |
-
input_ids, attention_mask = preprocess(text)
|
22 |
-
with torch.no_grad():
|
23 |
-
logits = model(input_ids, attention_mask=attention_mask).logits
|
24 |
-
preds = torch.sigmoid(logits).squeeze().tolist()
|
25 |
-
return {labels[i]: preds[i] for i in range(len(labels))}
|
26 |
|
27 |
-
# Define
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import tensorflow as tf
|
3 |
+
from tensorflow.keras.layers import TextVectorization
|
4 |
|
|
|
|
|
|
|
5 |
|
6 |
+
# Load the saved model
|
7 |
+
model = tf.keras.models.load_model('toxicity.h5')
|
|
|
|
|
8 |
|
9 |
+
# Columns names
|
10 |
+
columns = ['toxic', 'severe_toxic', 'obscene', 'threat','insult', 'identity_hate']
|
11 |
|
12 |
+
# Specifying the maximum number of words in the vocabulary
|
13 |
+
MAX_FEATURES=200000
|
|
|
|
|
14 |
|
15 |
+
# Creating a TextVectorization layer with the specified parameters
|
16 |
+
vectorizer = TextVectorization(max_tokens=MAX_FEATURES,output_sequence_length=1800,output_mode="int")
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
# Define a function to score a comment
|
19 |
+
def score_comment(comment):
|
20 |
+
# Vectorize the comment using the vectorizer
|
21 |
+
vectorized_comment = vectorizer([comment])
|
22 |
+
|
23 |
+
# Get the prediction results from the model
|
24 |
+
results = model.predict(vectorized_comment)
|
25 |
+
|
26 |
+
# Create a string to return the prediction results for each class
|
27 |
+
text = ''
|
28 |
+
for idx,col in enumerate(columns):
|
29 |
+
text += '{}: {}\n'.format(col,results[0][idx]>0.5)
|
30 |
+
|
31 |
+
return text
|
32 |
|
33 |
+
|
34 |
+
# Create a Gradio interface for the score_comment function
|
35 |
+
interface = gr.Interface(fn=score_comment,inputs=gr.inputs.Textbox(lines=2,placeholder="Comment to score"),outputs="text",title='Comment Toxicity Classifier')
|
36 |
+
|
37 |
+
# Launch the Gradio interface
|
38 |
+
interface.launch()
|