Spaces:

dp92
/

Toxiclassifier

Build error

dp92 commited on Apr 26, 2023

Commit

ea0a463

1 Parent(s): 5764658

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,29 +1,38 @@
-import pandas as pd
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-# Load the pre-trained BERT model and tokenizer
-model_name = "bert-base-uncased"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=6)
-# Load the data
-data = pd.read_csv("toxic_comments.csv")
-# Define the function to preprocess the text
-def preprocess(text):
-    inputs = tokenizer(text, padding=True, truncation=True, max_length=128, return_tensors="pt")
-    return inputs["input_ids"], inputs["attention_mask"]
-# Define the function to classify a text input
-def classify(text):
-    input_ids, attention_mask = preprocess(text)
-    with torch.no_grad():
-        logits = model(input_ids, attention_mask=attention_mask).logits
-    preds = torch.sigmoid(logits).squeeze().tolist()
-    return {labels[i]: preds[i] for i in range(len(labels))}
-# Define the labels
-labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]

+import gradio as gr
+import tensorflow as tf
+from tensorflow.keras.layers import TextVectorization
+# Load the saved model
+model = tf.keras.models.load_model('toxicity.h5')
+# Columns names
+columns = ['toxic', 'severe_toxic', 'obscene', 'threat','insult', 'identity_hate']
+# Specifying the maximum number of words in the vocabulary
+MAX_FEATURES=200000
+# Creating a TextVectorization layer with the specified parameters
+vectorizer = TextVectorization(max_tokens=MAX_FEATURES,output_sequence_length=1800,output_mode="int")
+# Define a function to score a comment
+def score_comment(comment):
+  # Vectorize the comment using the vectorizer
+  vectorized_comment = vectorizer([comment])
+  # Get the prediction results from the model
+  results = model.predict(vectorized_comment)
+  # Create a string to return the prediction results for each class
+  text = ''
+  for idx,col in enumerate(columns):
+    text += '{}: {}\n'.format(col,results[0][idx]>0.5)
+  return text
+# Create a Gradio interface for the score_comment function
+interface = gr.Interface(fn=score_comment,inputs=gr.inputs.Textbox(lines=2,placeholder="Comment to score"),outputs="text",title='Comment Toxicity Classifier')
+# Launch the Gradio interface
+interface.launch()