Spaces:
Sleeping
Sleeping
File size: 5,119 Bytes
4bb8289 9476a50 3fa07b4 7cda966 3fa07b4 0451811 7cda966 0451811 7cda966 0451811 7cda966 7eca2e2 7cda966 6be538d 7eca2e2 7cda966 73cfee4 0451811 d1ac74e 61aba4c 0451811 380b45b 0451811 2b3dec6 460f5fd 86dad28 9476a50 0451811 f199e05 460f5fd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
#%%writefile debias_app.py
import streamlit as st
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification, pipeline as tf_pipeline
import torch
import pandas as pd
# Set the default device
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Model Loader Class for Lazy Loading and Resource Sharing
class ModelLoader:
def __init__(self):
self.tokenizers = {}
self.models = {}
def load_tokenizer(self, model_name):
if model_name not in self.tokenizers:
self.tokenizers[model_name] = AutoTokenizer.from_pretrained(model_name)
return self.tokenizers[model_name]
def load_model(self, model_type, model_name):
if model_name not in self.models:
if model_type == "classification":
self.models[model_name] = AutoModelForSequenceClassification.from_pretrained(model_name).to(torch_device)
elif model_type == "token_classification":
self.models[model_name] = AutoModelForTokenClassification.from_pretrained(model_name).to(torch_device)
return self.models[model_name]
model_loader = ModelLoader()
# BiasPipeline for handling bias detection and cleaning
class BiasPipeline:
def __init__(self, model_loader):
self.model_loader = model_loader
self.classifier_tokenizer = model_loader.load_tokenizer("newsmediabias/UnBIAS-classification-bert")
self.classifier_model = model_loader.load_model("classification", "newsmediabias/UnBIAS-classification-bert")
self.ner_tokenizer = model_loader.load_tokenizer("newsmediabias/UnBIAS-NER")
self.ner_model = model_loader.load_model("token_classification", "newsmediabias/UnBIAS-NER")
self.classifier = tf_pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer, device=torch_device)
self.ner = tf_pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer, device=torch_device)
def clean_text(self, text):
"""Clean up the text by removing any redundant spaces."""
return ' '.join(text.split())
def process(self, texts):
"""Process texts to classify and find named entities."""
classification_results = self.classifier(texts)
ner_results = self.ner(texts)
return classification_results, ner_results
# Initialize the BiasPipeline
bias_pipeline = BiasPipeline(model_loader)
# Streamlit interface setup
st.title('UnBIAS App')
test_sentences = [
"Women are just too emotional to be leaders.",
"All young people are lazy and addicted to their phones.",
"People from that country are always dishonest and corrupt.",
"Men are inherently better at science and math than women.",
"The elderly are just burdens to society; they contribute nothing.",
"All poor people are lazy and don't want to work.",
"People who follow that religion are all terrorists.",
"Immigrants are taking all our jobs and ruining the country.",
"All wealthy people achieved success only through manipulation and deceit.",
"People with tattoos are not professional.",
"All politicians are liars and cannot be trusted.",
"Blue-collar workers don't have any real skills.",
"Anyone who doesn't attend college is unintelligent.",
"Only people from the city are sophisticated.",
"Rural folks have backward mindsets and don't understand progress.",
"All artists are always broke and unreliable.",
"Anyone from that region is uneducated and narrow-minded.",
"People without jobs are simply not trying hard enough."
]
# Dropdown to select a test sentence
selected_sentence = st.selectbox("Choose a pre-loaded sentence to analyze and debias:", [""] + test_sentences)
# Text area for custom input
input_text = st.text_area("Or enter your own text to analyze and debias:", height=150)
if st.button("Analyze and Debias Text"):
text_to_process = selected_sentence if selected_sentence else input_text
if text_to_process:
cleaned_text = bias_pipeline.clean_text(text_to_process)
classification_results, ner_results = bias_pipeline.process(cleaned_text)
label = classification_results[0]['label']
score = classification_results[0]['score']
st.write(f"**Classification:** {label} (Confidence: {score:.2f})")
biased_words = [result['word'] for result in ner_results if result['entity'].startswith('B-BIAS')]
st.write("**Biased Words Identified:**", ", ".join(biased_words))
else:
st.write("Please enter some text to analyze and debias or select a pre-loaded sentence.")
# Disclaimer
st.info("Disclaimer: Please note that while this tool aims to identify and highlight biased language, no automated system is perfect. The detection of bias depends on various factors, including the context, the training data used for the models, and the inherent limitations of natural language processing technologies. As such, some biases may not be detected, and all results should be reviewed critically by human users.")
|