Spaces:
Build error
Build error
File size: 1,260 Bytes
3630984 6176775 4c30733 6176775 4c30733 6176775 4c30733 6176775 4c30733 6176775 4c30733 6176775 4c30733 6176775 4c30733 230173f 4c30733 6176775 24e7e9e 6176775 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# Load the pre-trained BERT model and tokenizer
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=6)
# Load the data
data = pd.read_csv("toxic_comments.csv")
# Define the function to preprocess the text
def preprocess(text):
inputs = tokenizer(text, padding=True, truncation=True, max_length=128, return_tensors="pt")
return inputs["input_ids"], inputs["attention_mask"]
# Define the function to classify a text input
def classify(text):
input_ids, attention_mask = preprocess(text)
with torch.no_grad():
logits = model(input_ids, attention_mask=attention_mask).logits
preds = torch.sigmoid(logits).squeeze().tolist()
return {labels[i]: preds[i] for i in range(len(labels))}
# Define the labels
labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
# Classify the comments and print the results
for i, row in data.iterrows():
text = row["comment_text"]
print("Comment: ", text)
print("Predictions: ", preds)
print("Labels: ", row[labels].to_dict()) |