freyam
commited on
Commit
·
8ab9329
1
Parent(s):
7192c24
Add sample size limit and AVID report
Browse files- app.py +98 -18
- config/methodologies.json +6 -3
- requirements.txt +2 -1
- scripts/genbit.py +1 -2
- scripts/gender_distribution.py +4 -8
- scripts/gender_profession_bias.py +1 -3
app.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
import os
|
2 |
import json
|
|
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
|
|
5 |
|
6 |
from scripts.genbit import *
|
7 |
from scripts.gender_profession_bias import *
|
@@ -10,8 +12,12 @@ from scripts.gender_distribution import *
|
|
10 |
from datasets import load_dataset as hf_load_dataset
|
11 |
from huggingface_hub import DatasetFilter, list_datasets
|
12 |
|
|
|
|
|
|
|
|
|
13 |
MAX_THRESHOLD = 1000
|
14 |
-
METHODOLOGIES = json.load(open("config/methodologies.json", "r"))
|
15 |
|
16 |
DATASET = {
|
17 |
"name": None,
|
@@ -21,10 +27,69 @@ DATASET = {
|
|
21 |
"sampling_size": None,
|
22 |
"column": None,
|
23 |
"methodology": None,
|
|
|
24 |
}
|
25 |
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
def evaluate():
|
|
|
|
|
|
|
28 |
print(
|
29 |
f"Dataset : {DATASET['name']}\n"
|
30 |
f"Source : {DATASET['source']}\n"
|
@@ -32,9 +97,13 @@ def evaluate():
|
|
32 |
f"Sampling Size : {DATASET['sampling_size']}\n"
|
33 |
f"Column : {DATASET['column']}\n"
|
34 |
f"Methodology : {DATASET['methodology']}\n"
|
|
|
|
|
35 |
)
|
36 |
|
37 |
try:
|
|
|
|
|
38 |
data = DATASET["df"].copy()
|
39 |
data = data[[DATASET["column"]]]
|
40 |
|
@@ -45,27 +114,37 @@ def evaluate():
|
|
45 |
elif DATASET["sampling_method"] == "Random":
|
46 |
data = data.sample(n=DATASET["sampling_size"], random_state=42)
|
47 |
|
48 |
-
result_df, result_plot
|
49 |
METHODOLOGIES.get(DATASET["methodology"]).get("fx")
|
50 |
](data)
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
return (
|
53 |
-
gr.Markdown(f"### Result Summary\n\nlorem ipsum", visible=True),
|
54 |
gr.Plot(result_plot, visible=True),
|
55 |
gr.Dataframe(result_df, visible=True),
|
|
|
|
|
56 |
)
|
57 |
except Exception as e:
|
58 |
print(e)
|
59 |
return (
|
60 |
-
gr.Markdown(visible=False),
|
61 |
gr.Plot(visible=False),
|
62 |
gr.Dataframe(visible=False),
|
|
|
|
|
63 |
)
|
64 |
|
65 |
|
66 |
def load_dataset(local_dataset, hf_dataset):
|
67 |
DATASET["name"] = (
|
68 |
-
|
|
|
|
|
69 |
)
|
70 |
DATASET["source"] = "Local Dataset" if local_dataset else "HuggingFace Hub"
|
71 |
DATASET["df"] = (
|
@@ -227,7 +306,6 @@ with BiasAware:
|
|
227 |
)
|
228 |
|
229 |
hf_dataset = gr.Textbox(visible=False)
|
230 |
-
hf_dataset_search_results = gr.Radio(visible=False)
|
231 |
|
232 |
with gr.Row():
|
233 |
with gr.Column(scale=1):
|
@@ -261,16 +339,17 @@ with BiasAware:
|
|
261 |
visible=True,
|
262 |
)
|
263 |
|
264 |
-
|
265 |
-
result_plot = gr.Plot(show_label=False, container=False, visible=False)
|
266 |
result_df = gr.DataFrame(visible=False)
|
267 |
|
268 |
-
|
269 |
-
value="
|
270 |
interactive=False,
|
271 |
variant="primary",
|
272 |
)
|
273 |
|
|
|
|
|
274 |
#
|
275 |
# Event Handlers
|
276 |
#
|
@@ -322,12 +401,12 @@ with BiasAware:
|
|
322 |
hf_dataset.submit(
|
323 |
fn=show_hf_dataset_search_results,
|
324 |
inputs=[hf_dataset],
|
325 |
-
outputs=[dataset_load_btn
|
326 |
)
|
327 |
|
328 |
dataset_load_btn.click(
|
329 |
fn=load_dataset,
|
330 |
-
inputs=[local_dataset,
|
331 |
outputs=[
|
332 |
dataset_sampling_method,
|
333 |
dataset_sampling_size,
|
@@ -365,14 +444,15 @@ with BiasAware:
|
|
365 |
)
|
366 |
|
367 |
evaluation_btn.click(
|
368 |
-
fn=evaluate,
|
|
|
|
|
369 |
)
|
370 |
|
371 |
-
|
372 |
-
fn=None,
|
373 |
-
inputs=None,
|
374 |
-
outputs=None,
|
375 |
)
|
376 |
|
377 |
|
378 |
-
|
|
|
|
1 |
import os
|
2 |
import json
|
3 |
+
import timeit
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
6 |
+
from datetime import date
|
7 |
|
8 |
from scripts.genbit import *
|
9 |
from scripts.gender_profession_bias import *
|
|
|
12 |
from datasets import load_dataset as hf_load_dataset
|
13 |
from huggingface_hub import DatasetFilter, list_datasets
|
14 |
|
15 |
+
from avidtools.datamodels.report import Report
|
16 |
+
from avidtools.datamodels.components import *
|
17 |
+
from avidtools.datamodels.enums import *
|
18 |
+
|
19 |
MAX_THRESHOLD = 1000
|
20 |
+
METHODOLOGIES = json.load(open("config/methodologies.json", "r", encoding="utf8"))
|
21 |
|
22 |
DATASET = {
|
23 |
"name": None,
|
|
|
27 |
"sampling_size": None,
|
28 |
"column": None,
|
29 |
"methodology": None,
|
30 |
+
"result_df": None,
|
31 |
}
|
32 |
|
33 |
|
34 |
+
def generate_avid_report():
|
35 |
+
dataset_id = DATASET["name"]
|
36 |
+
methodology = DATASET["methodology"]
|
37 |
+
result_json = DATASET["result_df"].to_dict(orient="list")
|
38 |
+
|
39 |
+
report = Report()
|
40 |
+
|
41 |
+
report.affects = Affects(
|
42 |
+
developer=[],
|
43 |
+
deployer=["Hugging Face"] if DATASET["source"] == "HuggingFace Hub" else [],
|
44 |
+
artifacts=[Artifact(type=ArtifactTypeEnum.dataset, name=dataset_id)],
|
45 |
+
)
|
46 |
+
report.problemtype = Problemtype(
|
47 |
+
classof=ClassEnum.na,
|
48 |
+
type=TypeEnum.detection,
|
49 |
+
description=LangValue(
|
50 |
+
lang="eng", value="Dataset Bias Detection using BiasAware"
|
51 |
+
),
|
52 |
+
)
|
53 |
+
report.metrics = [
|
54 |
+
Metric(
|
55 |
+
name=methodology,
|
56 |
+
detection_method=Detection(type=MethodEnum.test, name=methodology),
|
57 |
+
results=result_json,
|
58 |
+
)
|
59 |
+
]
|
60 |
+
report.references = (
|
61 |
+
[
|
62 |
+
Reference(
|
63 |
+
label="""{dataset_id} on Hugging Face""".format(model_id=dataset_id),
|
64 |
+
url="""https://huggingface.co/{dataset_id}""".format(
|
65 |
+
dataset_id=dataset_id
|
66 |
+
),
|
67 |
+
)
|
68 |
+
]
|
69 |
+
if DATASET["source"] == "HuggingFace Hub"
|
70 |
+
else []
|
71 |
+
)
|
72 |
+
report.description = LangValue(
|
73 |
+
lang="eng", value=METHODOLOGIES[methodology]["short_description"]
|
74 |
+
)
|
75 |
+
report.impact = Impact(
|
76 |
+
avid=AvidTaxonomy(
|
77 |
+
vuln_id="",
|
78 |
+
risk_domain=["Ethics"],
|
79 |
+
sep_view=[SepEnum.E0101],
|
80 |
+
lifecycle_view=[LifecycleEnum.L03],
|
81 |
+
taxonomy_version="0.2",
|
82 |
+
)
|
83 |
+
)
|
84 |
+
report.reported_date = date.today()
|
85 |
+
|
86 |
+
return gr.JSON(value=report.model_dump(), visible=True)
|
87 |
+
|
88 |
+
|
89 |
def evaluate():
|
90 |
+
if DATASET["methodology"] == "GenBiT (Microsoft Gender Bias Tool)":
|
91 |
+
DATASET["sampling_size"] = min(DATASET["sampling_size"], 100)
|
92 |
+
|
93 |
print(
|
94 |
f"Dataset : {DATASET['name']}\n"
|
95 |
f"Source : {DATASET['source']}\n"
|
|
|
97 |
f"Sampling Size : {DATASET['sampling_size']}\n"
|
98 |
f"Column : {DATASET['column']}\n"
|
99 |
f"Methodology : {DATASET['methodology']}\n"
|
100 |
+
f"Time Taken : ",
|
101 |
+
end="",
|
102 |
)
|
103 |
|
104 |
try:
|
105 |
+
start = timeit.default_timer()
|
106 |
+
|
107 |
data = DATASET["df"].copy()
|
108 |
data = data[[DATASET["column"]]]
|
109 |
|
|
|
114 |
elif DATASET["sampling_method"] == "Random":
|
115 |
data = data.sample(n=DATASET["sampling_size"], random_state=42)
|
116 |
|
117 |
+
result_df, result_plot = globals()[
|
118 |
METHODOLOGIES.get(DATASET["methodology"]).get("fx")
|
119 |
](data)
|
120 |
|
121 |
+
DATASET["result_df"] = result_df
|
122 |
+
|
123 |
+
stop = timeit.default_timer()
|
124 |
+
|
125 |
+
print(f"{stop - start:.2f} seconds")
|
126 |
+
|
127 |
return (
|
|
|
128 |
gr.Plot(result_plot, visible=True),
|
129 |
gr.Dataframe(result_df, visible=True),
|
130 |
+
gr.Button(visible=True, interactive=True),
|
131 |
+
gr.JSON(visible=True),
|
132 |
)
|
133 |
except Exception as e:
|
134 |
print(e)
|
135 |
return (
|
|
|
136 |
gr.Plot(visible=False),
|
137 |
gr.Dataframe(visible=False),
|
138 |
+
gr.Button(visible=False),
|
139 |
+
gr.JSON(visible=False),
|
140 |
)
|
141 |
|
142 |
|
143 |
def load_dataset(local_dataset, hf_dataset):
|
144 |
DATASET["name"] = (
|
145 |
+
os.path.splitext(os.path.basename(local_dataset.name))[0]
|
146 |
+
if local_dataset
|
147 |
+
else hf_dataset
|
148 |
)
|
149 |
DATASET["source"] = "Local Dataset" if local_dataset else "HuggingFace Hub"
|
150 |
DATASET["df"] = (
|
|
|
306 |
)
|
307 |
|
308 |
hf_dataset = gr.Textbox(visible=False)
|
|
|
309 |
|
310 |
with gr.Row():
|
311 |
with gr.Column(scale=1):
|
|
|
339 |
visible=True,
|
340 |
)
|
341 |
|
342 |
+
result_plot = gr.Plot(show_label=False, container=False)
|
|
|
343 |
result_df = gr.DataFrame(visible=False)
|
344 |
|
345 |
+
generate_avid_report_btn = gr.Button(
|
346 |
+
value="Generate AVID Report",
|
347 |
interactive=False,
|
348 |
variant="primary",
|
349 |
)
|
350 |
|
351 |
+
avid_report = gr.JSON(label="AVID Report", visible=False)
|
352 |
+
|
353 |
#
|
354 |
# Event Handlers
|
355 |
#
|
|
|
401 |
hf_dataset.submit(
|
402 |
fn=show_hf_dataset_search_results,
|
403 |
inputs=[hf_dataset],
|
404 |
+
outputs=[dataset_load_btn],
|
405 |
)
|
406 |
|
407 |
dataset_load_btn.click(
|
408 |
fn=load_dataset,
|
409 |
+
inputs=[local_dataset, hf_dataset],
|
410 |
outputs=[
|
411 |
dataset_sampling_method,
|
412 |
dataset_sampling_size,
|
|
|
444 |
)
|
445 |
|
446 |
evaluation_btn.click(
|
447 |
+
fn=evaluate,
|
448 |
+
inputs=None,
|
449 |
+
outputs=[result_plot, result_df, generate_avid_report_btn, avid_report],
|
450 |
)
|
451 |
|
452 |
+
generate_avid_report_btn.click(
|
453 |
+
fn=generate_avid_report, inputs=None, outputs=[avid_report]
|
|
|
|
|
454 |
)
|
455 |
|
456 |
|
457 |
+
if __name__ == "__main__":
|
458 |
+
BiasAware.launch()
|
config/methodologies.json
CHANGED
@@ -1,14 +1,17 @@
|
|
1 |
{
|
2 |
"Gender Distribution (Term Identity Diversity)": {
|
3 |
-
"description": "Gender distribution is
|
|
|
4 |
"fx": "eval_gender_distribution"
|
5 |
},
|
6 |
"Gender Profession Bias (Lexical Evaluation)": {
|
7 |
-
"description": "
|
|
|
8 |
"fx": "eval_gender_profession"
|
9 |
},
|
10 |
"GenBiT (Microsoft Gender Bias Tool)": {
|
11 |
-
"description": "[GenBiT](https://www.microsoft.com/en-us/research/uploads/prod/2021/10/MSJAR_Genbit_Final_Version-616fd3a073758.pdf) is a versatile tool designed to address gender bias in language datasets by utilizing word co-occurrence statistical methods to measure bias. It introduces a novel approach to mitigating gender bias by combining contextual data augmentation, random sampling, sentence classification, and targeted gendered data filtering.\n- The primary goal is to reduce historical gender biases within conversational parallel multilingual datasets, ultimately enhancing the fairness and inclusiveness of machine learning model training and its subsequent applications.\n- What sets GenBiT apart is its adaptability to various forms of bias, not limited to gender alone. It can effectively address biases related to race, religion, or other dimensions, making it a valuable generic tool for bias mitigation in language datasets.\n- GenBiT's impact extends beyond bias reduction metrics; it has shown positive results in improving the performance of machine learning classifiers like Support Vector Machine(SVM). Augmented datasets produced by GenBiT yield significant enhancements in f1-score when compared to the original datasets, underlining its practical benefits in machine learning applications.",
|
|
|
12 |
"fx": "eval_genbit"
|
13 |
}
|
14 |
}
|
|
|
1 |
{
|
2 |
"Gender Distribution (Term Identity Diversity)": {
|
3 |
+
"description": "Gender distribution is a fundamental component of identity diversity, serving as a critical indicator of the presence and equilibrium of various gender identities within a given population or dataset. An understanding of gender distribution holds immense significance for fostering inclusivity and equity across diverse contexts, including workplaces, educational institutions, and social environments.\n\nIn this analysis, we employ a structured approach to examine gender distribution. We categorize gender identities into predefined groups, each representing specific gender-related attributes or expressions. These categories help us comprehensively assess the gender composition within the dataset or population under scrutiny. Here's a breakdown of the terms used in the analysis:\n- No Gender: This category encompasses text that lacks significant gender-specific terms or maintains a balance between male and female terms, resulting in a neutral or 'no gender' classification.\n- Equal Gender: The 'Equal Gender' category signifies a balance between male and female terms in the analyzed text, indicating an equitable representation of both genders.\n- Female Positive Gender: Within this category, we include text that exhibits a higher prevalence of female-related terms.\n- Male Positive Gender: Similarly, the 'Male Positive Gender' category comprises text with a higher occurrence of male-related terms.\n- Female Strongly Positive Gender: This subcategory represents text with a significantly stronger presence of female-related terms, exceeding a 75% threshold for strong gender association.\n- Male Strongly Positive Gender: Analogous to the previous subcategory, 'Male Strongly Positive Gender' represents text with a significantly stronger presence of male-related terms, exceeding a 75% threshold for strong gender association.\n\nPlease note that the following categories are based on the analysis of text content and do not necessarily indicate the gender identities of individuals described within the text.",
|
4 |
+
"short_description": "This methodology uncovers gender distribution and its impact on inclusivity and equity across diverse contexts.",
|
5 |
"fx": "eval_gender_distribution"
|
6 |
},
|
7 |
"Gender Profession Bias (Lexical Evaluation)": {
|
8 |
+
"description": "Gender-profession bias occurs when certain gender identities are overrepresented or underrepresented in the training data, which can result in biased model outputs and reinforce stereotypes. Recognizing and addressing this bias is crucial for promoting fairness and equity in AI applications. Understanding the gender-profession distribution within these datasets is pivotal for creating more inclusive and accurate models, as these models have wide-ranging applications, from chatbots and automated content generation to language translation, and their outputs can have a profound impact on society. Addressing gender-profession bias is an essential step in fostering diversity, inclusivity, and fairness in AI technologies.\n\nThis methodology is designed to identify gender and profession-related information within text-based datasets. It specifically focuses on detecting instances where male and female pronouns are associated with professions in the text. This is achieved through the meticulous use of tailored lexicons and robust regular expressions, which are applied systematically to examine each sentence within the dataset while preserving the contextual information of these linguistic elements.\n\nBy implementing this method, we aim to promote the ethical and socially responsible use of LM-powered applications. It provides valuable insights into gender-profession associations present in unmodified textual data, contributing to a more equitable and informed use of language models.\n\nIn the ever-evolving landscape of technology and language models, this research offers a practical solution to unveil gender and profession dynamics within text data. Its application can bolster the inclusivity and ethical considerations of LM-powered applications, ensuring not only technical proficiency but also a deeper comprehension of the language and its societal implications within textual datasets.",
|
9 |
+
"short_description": "This methodology uncovers gender-profession bias in training data to ensure fairness and inclusivity in AI applications by systematically identifying gender-profession associations within text-based datasets.",
|
10 |
"fx": "eval_gender_profession"
|
11 |
},
|
12 |
"GenBiT (Microsoft Gender Bias Tool)": {
|
13 |
+
"description": "(Note: The sampling size is limited to 100 for this methodology due to computational constraints.)\n\n[GenBiT](https://www.microsoft.com/en-us/research/uploads/prod/2021/10/MSJAR_Genbit_Final_Version-616fd3a073758.pdf) is a versatile tool designed to address gender bias in language datasets by utilizing word co-occurrence statistical methods to measure bias. It introduces a novel approach to mitigating gender bias by combining contextual data augmentation, random sampling, sentence classification, and targeted gendered data filtering.\n- The primary goal is to reduce historical gender biases within conversational parallel multilingual datasets, ultimately enhancing the fairness and inclusiveness of machine learning model training and its subsequent applications.\n- What sets GenBiT apart is its adaptability to various forms of bias, not limited to gender alone. It can effectively address biases related to race, religion, or other dimensions, making it a valuable generic tool for bias mitigation in language datasets.\n- GenBiT's impact extends beyond bias reduction metrics; it has shown positive results in improving the performance of machine learning classifiers like Support Vector Machine(SVM). Augmented datasets produced by GenBiT yield significant enhancements in f1-score when compared to the original datasets, underlining its practical benefits in machine learning applications.",
|
14 |
+
"short_description": "This methodology highlights GenBiT's function in mitigating bias in language datasets by offering adaptability to various forms of bias, such as gender, race, religion, and other dimensions.",
|
15 |
"fx": "eval_genbit"
|
16 |
}
|
17 |
}
|
requirements.txt
CHANGED
@@ -5,4 +5,5 @@ pandas==2.0.3
|
|
5 |
spacy==3.6.1
|
6 |
genbit==2.2.0.0
|
7 |
plotly==5.16.1
|
8 |
-
datasets==2.14.5
|
|
|
|
5 |
spacy==3.6.1
|
6 |
genbit==2.2.0.0
|
7 |
plotly==5.16.1
|
8 |
+
datasets==2.14.5
|
9 |
+
avidtools==0.1.1.2
|
scripts/genbit.py
CHANGED
@@ -72,6 +72,5 @@ def eval_genbit(data):
|
|
72 |
)
|
73 |
|
74 |
result_plot = plot_genbit(result_json)
|
75 |
-
result_conclusion = ""
|
76 |
|
77 |
-
return result_df, result_plot
|
|
|
72 |
)
|
73 |
|
74 |
result_plot = plot_genbit(result_json)
|
|
|
75 |
|
76 |
+
return result_df, result_plot
|
scripts/gender_distribution.py
CHANGED
@@ -88,15 +88,11 @@ def eval_gender_distribution(data):
|
|
88 |
"Female Strongly Positive Gender",
|
89 |
]
|
90 |
|
91 |
-
|
92 |
data["gender_category"].value_counts().reindex(gender_labels, fill_value=0)
|
93 |
)
|
94 |
|
95 |
-
result_df = pd.DataFrame(
|
96 |
-
|
97 |
-
)
|
98 |
-
result_plot = plot_gender_category_counts(gender_labels, gender_counts)
|
99 |
-
|
100 |
-
result_conclusion = ""
|
101 |
|
102 |
-
return result_df, result_plot
|
|
|
88 |
"Female Strongly Positive Gender",
|
89 |
]
|
90 |
|
91 |
+
result_json = (
|
92 |
data["gender_category"].value_counts().reindex(gender_labels, fill_value=0)
|
93 |
)
|
94 |
|
95 |
+
result_df = pd.DataFrame({"Metric": result_json.index, "Value": result_json.values})
|
96 |
+
result_plot = plot_gender_category_counts(gender_labels, result_json)
|
|
|
|
|
|
|
|
|
97 |
|
98 |
+
return result_df, result_plot
|
scripts/gender_profession_bias.py
CHANGED
@@ -142,6 +142,4 @@ def eval_gender_profession(data):
|
|
142 |
.rename(columns={"index": "Metric", 0: "Value"})
|
143 |
)
|
144 |
|
145 |
-
|
146 |
-
|
147 |
-
return result_df, result_plot, result_conclusion
|
|
|
142 |
.rename(columns={"index": "Metric", 0: "Value"})
|
143 |
)
|
144 |
|
145 |
+
return result_df, result_plot
|
|
|
|