freyam commited on
Commit
8ab9329
·
1 Parent(s): 7192c24

Add sample size limit and AVID report

Browse files
app.py CHANGED
@@ -1,7 +1,9 @@
1
  import os
2
  import json
 
3
  import gradio as gr
4
  import pandas as pd
 
5
 
6
  from scripts.genbit import *
7
  from scripts.gender_profession_bias import *
@@ -10,8 +12,12 @@ from scripts.gender_distribution import *
10
  from datasets import load_dataset as hf_load_dataset
11
  from huggingface_hub import DatasetFilter, list_datasets
12
 
 
 
 
 
13
  MAX_THRESHOLD = 1000
14
- METHODOLOGIES = json.load(open("config/methodologies.json", "r"))
15
 
16
  DATASET = {
17
  "name": None,
@@ -21,10 +27,69 @@ DATASET = {
21
  "sampling_size": None,
22
  "column": None,
23
  "methodology": None,
 
24
  }
25
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def evaluate():
 
 
 
28
  print(
29
  f"Dataset : {DATASET['name']}\n"
30
  f"Source : {DATASET['source']}\n"
@@ -32,9 +97,13 @@ def evaluate():
32
  f"Sampling Size : {DATASET['sampling_size']}\n"
33
  f"Column : {DATASET['column']}\n"
34
  f"Methodology : {DATASET['methodology']}\n"
 
 
35
  )
36
 
37
  try:
 
 
38
  data = DATASET["df"].copy()
39
  data = data[[DATASET["column"]]]
40
 
@@ -45,27 +114,37 @@ def evaluate():
45
  elif DATASET["sampling_method"] == "Random":
46
  data = data.sample(n=DATASET["sampling_size"], random_state=42)
47
 
48
- result_df, result_plot, result_description = globals()[
49
  METHODOLOGIES.get(DATASET["methodology"]).get("fx")
50
  ](data)
51
 
 
 
 
 
 
 
52
  return (
53
- gr.Markdown(f"### Result Summary\n\nlorem ipsum", visible=True),
54
  gr.Plot(result_plot, visible=True),
55
  gr.Dataframe(result_df, visible=True),
 
 
56
  )
57
  except Exception as e:
58
  print(e)
59
  return (
60
- gr.Markdown(visible=False),
61
  gr.Plot(visible=False),
62
  gr.Dataframe(visible=False),
 
 
63
  )
64
 
65
 
66
  def load_dataset(local_dataset, hf_dataset):
67
  DATASET["name"] = (
68
- local_dataset.name.split("/")[-1].split(".")[0] if local_dataset else hf_dataset
 
 
69
  )
70
  DATASET["source"] = "Local Dataset" if local_dataset else "HuggingFace Hub"
71
  DATASET["df"] = (
@@ -227,7 +306,6 @@ with BiasAware:
227
  )
228
 
229
  hf_dataset = gr.Textbox(visible=False)
230
- hf_dataset_search_results = gr.Radio(visible=False)
231
 
232
  with gr.Row():
233
  with gr.Column(scale=1):
@@ -261,16 +339,17 @@ with BiasAware:
261
  visible=True,
262
  )
263
 
264
- result_description = gr.Markdown(visible=False)
265
- result_plot = gr.Plot(show_label=False, container=False, visible=False)
266
  result_df = gr.DataFrame(visible=False)
267
 
268
- submit_to_avid_btn = gr.Button(
269
- value="Submit to AVID",
270
  interactive=False,
271
  variant="primary",
272
  )
273
 
 
 
274
  #
275
  # Event Handlers
276
  #
@@ -322,12 +401,12 @@ with BiasAware:
322
  hf_dataset.submit(
323
  fn=show_hf_dataset_search_results,
324
  inputs=[hf_dataset],
325
- outputs=[dataset_load_btn, hf_dataset_search_results],
326
  )
327
 
328
  dataset_load_btn.click(
329
  fn=load_dataset,
330
- inputs=[local_dataset, hf_dataset_search_results],
331
  outputs=[
332
  dataset_sampling_method,
333
  dataset_sampling_size,
@@ -365,14 +444,15 @@ with BiasAware:
365
  )
366
 
367
  evaluation_btn.click(
368
- fn=evaluate, inputs=None, outputs=[result_description, result_plot, result_df]
 
 
369
  )
370
 
371
- submit_to_avid_btn.click(
372
- fn=None,
373
- inputs=None,
374
- outputs=None,
375
  )
376
 
377
 
378
- BiasAware.launch()
 
 
1
  import os
2
  import json
3
+ import timeit
4
  import gradio as gr
5
  import pandas as pd
6
+ from datetime import date
7
 
8
  from scripts.genbit import *
9
  from scripts.gender_profession_bias import *
 
12
  from datasets import load_dataset as hf_load_dataset
13
  from huggingface_hub import DatasetFilter, list_datasets
14
 
15
+ from avidtools.datamodels.report import Report
16
+ from avidtools.datamodels.components import *
17
+ from avidtools.datamodels.enums import *
18
+
19
  MAX_THRESHOLD = 1000
20
+ METHODOLOGIES = json.load(open("config/methodologies.json", "r", encoding="utf8"))
21
 
22
  DATASET = {
23
  "name": None,
 
27
  "sampling_size": None,
28
  "column": None,
29
  "methodology": None,
30
+ "result_df": None,
31
  }
32
 
33
 
34
+ def generate_avid_report():
35
+ dataset_id = DATASET["name"]
36
+ methodology = DATASET["methodology"]
37
+ result_json = DATASET["result_df"].to_dict(orient="list")
38
+
39
+ report = Report()
40
+
41
+ report.affects = Affects(
42
+ developer=[],
43
+ deployer=["Hugging Face"] if DATASET["source"] == "HuggingFace Hub" else [],
44
+ artifacts=[Artifact(type=ArtifactTypeEnum.dataset, name=dataset_id)],
45
+ )
46
+ report.problemtype = Problemtype(
47
+ classof=ClassEnum.na,
48
+ type=TypeEnum.detection,
49
+ description=LangValue(
50
+ lang="eng", value="Dataset Bias Detection using BiasAware"
51
+ ),
52
+ )
53
+ report.metrics = [
54
+ Metric(
55
+ name=methodology,
56
+ detection_method=Detection(type=MethodEnum.test, name=methodology),
57
+ results=result_json,
58
+ )
59
+ ]
60
+ report.references = (
61
+ [
62
+ Reference(
63
+ label="""{dataset_id} on Hugging Face""".format(model_id=dataset_id),
64
+ url="""https://huggingface.co/{dataset_id}""".format(
65
+ dataset_id=dataset_id
66
+ ),
67
+ )
68
+ ]
69
+ if DATASET["source"] == "HuggingFace Hub"
70
+ else []
71
+ )
72
+ report.description = LangValue(
73
+ lang="eng", value=METHODOLOGIES[methodology]["short_description"]
74
+ )
75
+ report.impact = Impact(
76
+ avid=AvidTaxonomy(
77
+ vuln_id="",
78
+ risk_domain=["Ethics"],
79
+ sep_view=[SepEnum.E0101],
80
+ lifecycle_view=[LifecycleEnum.L03],
81
+ taxonomy_version="0.2",
82
+ )
83
+ )
84
+ report.reported_date = date.today()
85
+
86
+ return gr.JSON(value=report.model_dump(), visible=True)
87
+
88
+
89
  def evaluate():
90
+ if DATASET["methodology"] == "GenBiT (Microsoft Gender Bias Tool)":
91
+ DATASET["sampling_size"] = min(DATASET["sampling_size"], 100)
92
+
93
  print(
94
  f"Dataset : {DATASET['name']}\n"
95
  f"Source : {DATASET['source']}\n"
 
97
  f"Sampling Size : {DATASET['sampling_size']}\n"
98
  f"Column : {DATASET['column']}\n"
99
  f"Methodology : {DATASET['methodology']}\n"
100
+ f"Time Taken : ",
101
+ end="",
102
  )
103
 
104
  try:
105
+ start = timeit.default_timer()
106
+
107
  data = DATASET["df"].copy()
108
  data = data[[DATASET["column"]]]
109
 
 
114
  elif DATASET["sampling_method"] == "Random":
115
  data = data.sample(n=DATASET["sampling_size"], random_state=42)
116
 
117
+ result_df, result_plot = globals()[
118
  METHODOLOGIES.get(DATASET["methodology"]).get("fx")
119
  ](data)
120
 
121
+ DATASET["result_df"] = result_df
122
+
123
+ stop = timeit.default_timer()
124
+
125
+ print(f"{stop - start:.2f} seconds")
126
+
127
  return (
 
128
  gr.Plot(result_plot, visible=True),
129
  gr.Dataframe(result_df, visible=True),
130
+ gr.Button(visible=True, interactive=True),
131
+ gr.JSON(visible=True),
132
  )
133
  except Exception as e:
134
  print(e)
135
  return (
 
136
  gr.Plot(visible=False),
137
  gr.Dataframe(visible=False),
138
+ gr.Button(visible=False),
139
+ gr.JSON(visible=False),
140
  )
141
 
142
 
143
  def load_dataset(local_dataset, hf_dataset):
144
  DATASET["name"] = (
145
+ os.path.splitext(os.path.basename(local_dataset.name))[0]
146
+ if local_dataset
147
+ else hf_dataset
148
  )
149
  DATASET["source"] = "Local Dataset" if local_dataset else "HuggingFace Hub"
150
  DATASET["df"] = (
 
306
  )
307
 
308
  hf_dataset = gr.Textbox(visible=False)
 
309
 
310
  with gr.Row():
311
  with gr.Column(scale=1):
 
339
  visible=True,
340
  )
341
 
342
+ result_plot = gr.Plot(show_label=False, container=False)
 
343
  result_df = gr.DataFrame(visible=False)
344
 
345
+ generate_avid_report_btn = gr.Button(
346
+ value="Generate AVID Report",
347
  interactive=False,
348
  variant="primary",
349
  )
350
 
351
+ avid_report = gr.JSON(label="AVID Report", visible=False)
352
+
353
  #
354
  # Event Handlers
355
  #
 
401
  hf_dataset.submit(
402
  fn=show_hf_dataset_search_results,
403
  inputs=[hf_dataset],
404
+ outputs=[dataset_load_btn],
405
  )
406
 
407
  dataset_load_btn.click(
408
  fn=load_dataset,
409
+ inputs=[local_dataset, hf_dataset],
410
  outputs=[
411
  dataset_sampling_method,
412
  dataset_sampling_size,
 
444
  )
445
 
446
  evaluation_btn.click(
447
+ fn=evaluate,
448
+ inputs=None,
449
+ outputs=[result_plot, result_df, generate_avid_report_btn, avid_report],
450
  )
451
 
452
+ generate_avid_report_btn.click(
453
+ fn=generate_avid_report, inputs=None, outputs=[avid_report]
 
 
454
  )
455
 
456
 
457
+ if __name__ == "__main__":
458
+ BiasAware.launch()
config/methodologies.json CHANGED
@@ -1,14 +1,17 @@
1
  {
2
  "Gender Distribution (Term Identity Diversity)": {
3
- "description": "Gender distribution is an essential aspect of identity diversity, representing the presence and balance of different gender identities within a given population or dataset. Understanding gender distribution is crucial for promoting inclusivity and equity in various contexts, such as workplaces, educational institutions, and social settings.\nIn this analysis, we examine gender distribution using a set of predefined gender identity categories. Each category represents a specific gender-related attribute or expression. Let's define the terms used in the analysis:\n- No Gender: This category likely refers to individuals who identify as non-binary, genderqueer, or gender-neutral, indicating that they do not align with traditional binary gender categories (male or female).\n- Equal Gender: This category may represent a balance between male and female genders, suggesting an equal representation of both in the dataset or population.\n- Female Positive Gender: This category likely includes individuals who identify as female or have a strong affiliation with femininity.\n- Male Positive Gender: Similarly, this category includes individuals who identify as male or have a strong affiliation with masculinity.\n- Female Strongly Positive Gender: This subcategory represents a more emphatic identification with female gender attributes, possibly indicating a stronger female gender identity.\n- Male Strongly Positive Gender: This subcategory mirrors the previous one but for male gender attributes, indicating a stronger male gender identity.",
 
4
  "fx": "eval_gender_distribution"
5
  },
6
  "Gender Profession Bias (Lexical Evaluation)": {
7
- "description": "This approach to addressing gender bias in language places a strong emphasis on a fundamental shift in detection and mitigation strategies.\n- Instead of solely relying on traditional frequency-based methods, this approach adopts a more nuanced perspective, prioritizing features within the text that consider contextual and semantic cues. It recognizes that gender bias extends beyond mere word frequency and delves into how language is structured and how it reinforces gender stereotypes.\n- Even with advanced models like Word Embedding and Contextual Word Embedding, which capture more complex language features, there's still a risk of inheriting biases from training data.\n- To tackle this, this approach advocates for a data-driven strategy, involving the collection and labeling of datasets encompassing various subtypes of bias, using a comprehensive taxonomy for precise categorization.",
 
8
  "fx": "eval_gender_profession"
9
  },
10
  "GenBiT (Microsoft Gender Bias Tool)": {
11
- "description": "[GenBiT](https://www.microsoft.com/en-us/research/uploads/prod/2021/10/MSJAR_Genbit_Final_Version-616fd3a073758.pdf) is a versatile tool designed to address gender bias in language datasets by utilizing word co-occurrence statistical methods to measure bias. It introduces a novel approach to mitigating gender bias by combining contextual data augmentation, random sampling, sentence classification, and targeted gendered data filtering.\n- The primary goal is to reduce historical gender biases within conversational parallel multilingual datasets, ultimately enhancing the fairness and inclusiveness of machine learning model training and its subsequent applications.\n- What sets GenBiT apart is its adaptability to various forms of bias, not limited to gender alone. It can effectively address biases related to race, religion, or other dimensions, making it a valuable generic tool for bias mitigation in language datasets.\n- GenBiT's impact extends beyond bias reduction metrics; it has shown positive results in improving the performance of machine learning classifiers like Support Vector Machine(SVM). Augmented datasets produced by GenBiT yield significant enhancements in f1-score when compared to the original datasets, underlining its practical benefits in machine learning applications.",
 
12
  "fx": "eval_genbit"
13
  }
14
  }
 
1
  {
2
  "Gender Distribution (Term Identity Diversity)": {
3
+ "description": "Gender distribution is a fundamental component of identity diversity, serving as a critical indicator of the presence and equilibrium of various gender identities within a given population or dataset. An understanding of gender distribution holds immense significance for fostering inclusivity and equity across diverse contexts, including workplaces, educational institutions, and social environments.\n\nIn this analysis, we employ a structured approach to examine gender distribution. We categorize gender identities into predefined groups, each representing specific gender-related attributes or expressions. These categories help us comprehensively assess the gender composition within the dataset or population under scrutiny. Here's a breakdown of the terms used in the analysis:\n- No Gender: This category encompasses text that lacks significant gender-specific terms or maintains a balance between male and female terms, resulting in a neutral or 'no gender' classification.\n- Equal Gender: The 'Equal Gender' category signifies a balance between male and female terms in the analyzed text, indicating an equitable representation of both genders.\n- Female Positive Gender: Within this category, we include text that exhibits a higher prevalence of female-related terms.\n- Male Positive Gender: Similarly, the 'Male Positive Gender' category comprises text with a higher occurrence of male-related terms.\n- Female Strongly Positive Gender: This subcategory represents text with a significantly stronger presence of female-related terms, exceeding a 75% threshold for strong gender association.\n- Male Strongly Positive Gender: Analogous to the previous subcategory, 'Male Strongly Positive Gender' represents text with a significantly stronger presence of male-related terms, exceeding a 75% threshold for strong gender association.\n\nPlease note that the following categories are based on the analysis of text content and do not necessarily indicate the gender identities of individuals described within the text.",
4
+ "short_description": "This methodology uncovers gender distribution and its impact on inclusivity and equity across diverse contexts.",
5
  "fx": "eval_gender_distribution"
6
  },
7
  "Gender Profession Bias (Lexical Evaluation)": {
8
+ "description": "Gender-profession bias occurs when certain gender identities are overrepresented or underrepresented in the training data, which can result in biased model outputs and reinforce stereotypes. Recognizing and addressing this bias is crucial for promoting fairness and equity in AI applications. Understanding the gender-profession distribution within these datasets is pivotal for creating more inclusive and accurate models, as these models have wide-ranging applications, from chatbots and automated content generation to language translation, and their outputs can have a profound impact on society. Addressing gender-profession bias is an essential step in fostering diversity, inclusivity, and fairness in AI technologies.\n\nThis methodology is designed to identify gender and profession-related information within text-based datasets. It specifically focuses on detecting instances where male and female pronouns are associated with professions in the text. This is achieved through the meticulous use of tailored lexicons and robust regular expressions, which are applied systematically to examine each sentence within the dataset while preserving the contextual information of these linguistic elements.\n\nBy implementing this method, we aim to promote the ethical and socially responsible use of LM-powered applications. It provides valuable insights into gender-profession associations present in unmodified textual data, contributing to a more equitable and informed use of language models.\n\nIn the ever-evolving landscape of technology and language models, this research offers a practical solution to unveil gender and profession dynamics within text data. Its application can bolster the inclusivity and ethical considerations of LM-powered applications, ensuring not only technical proficiency but also a deeper comprehension of the language and its societal implications within textual datasets.",
9
+ "short_description": "This methodology uncovers gender-profession bias in training data to ensure fairness and inclusivity in AI applications by systematically identifying gender-profession associations within text-based datasets.",
10
  "fx": "eval_gender_profession"
11
  },
12
  "GenBiT (Microsoft Gender Bias Tool)": {
13
+ "description": "(Note: The sampling size is limited to 100 for this methodology due to computational constraints.)\n\n[GenBiT](https://www.microsoft.com/en-us/research/uploads/prod/2021/10/MSJAR_Genbit_Final_Version-616fd3a073758.pdf) is a versatile tool designed to address gender bias in language datasets by utilizing word co-occurrence statistical methods to measure bias. It introduces a novel approach to mitigating gender bias by combining contextual data augmentation, random sampling, sentence classification, and targeted gendered data filtering.\n- The primary goal is to reduce historical gender biases within conversational parallel multilingual datasets, ultimately enhancing the fairness and inclusiveness of machine learning model training and its subsequent applications.\n- What sets GenBiT apart is its adaptability to various forms of bias, not limited to gender alone. It can effectively address biases related to race, religion, or other dimensions, making it a valuable generic tool for bias mitigation in language datasets.\n- GenBiT's impact extends beyond bias reduction metrics; it has shown positive results in improving the performance of machine learning classifiers like Support Vector Machine(SVM). Augmented datasets produced by GenBiT yield significant enhancements in f1-score when compared to the original datasets, underlining its practical benefits in machine learning applications.",
14
+ "short_description": "This methodology highlights GenBiT's function in mitigating bias in language datasets by offering adaptability to various forms of bias, such as gender, race, religion, and other dimensions.",
15
  "fx": "eval_genbit"
16
  }
17
  }
requirements.txt CHANGED
@@ -5,4 +5,5 @@ pandas==2.0.3
5
  spacy==3.6.1
6
  genbit==2.2.0.0
7
  plotly==5.16.1
8
- datasets==2.14.5
 
 
5
  spacy==3.6.1
6
  genbit==2.2.0.0
7
  plotly==5.16.1
8
+ datasets==2.14.5
9
+ avidtools==0.1.1.2
scripts/genbit.py CHANGED
@@ -72,6 +72,5 @@ def eval_genbit(data):
72
  )
73
 
74
  result_plot = plot_genbit(result_json)
75
- result_conclusion = ""
76
 
77
- return result_df, result_plot, result_conclusion
 
72
  )
73
 
74
  result_plot = plot_genbit(result_json)
 
75
 
76
+ return result_df, result_plot
scripts/gender_distribution.py CHANGED
@@ -88,15 +88,11 @@ def eval_gender_distribution(data):
88
  "Female Strongly Positive Gender",
89
  ]
90
 
91
- gender_counts = (
92
  data["gender_category"].value_counts().reindex(gender_labels, fill_value=0)
93
  )
94
 
95
- result_df = pd.DataFrame(
96
- {"Metric": gender_counts.index, "Value": gender_counts.values}
97
- )
98
- result_plot = plot_gender_category_counts(gender_labels, gender_counts)
99
-
100
- result_conclusion = ""
101
 
102
- return result_df, result_plot, result_conclusion
 
88
  "Female Strongly Positive Gender",
89
  ]
90
 
91
+ result_json = (
92
  data["gender_category"].value_counts().reindex(gender_labels, fill_value=0)
93
  )
94
 
95
+ result_df = pd.DataFrame({"Metric": result_json.index, "Value": result_json.values})
96
+ result_plot = plot_gender_category_counts(gender_labels, result_json)
 
 
 
 
97
 
98
+ return result_df, result_plot
scripts/gender_profession_bias.py CHANGED
@@ -142,6 +142,4 @@ def eval_gender_profession(data):
142
  .rename(columns={"index": "Metric", 0: "Value"})
143
  )
144
 
145
- result_conclusion = ""
146
-
147
- return result_df, result_plot, result_conclusion
 
142
  .rename(columns={"index": "Metric", 0: "Value"})
143
  )
144
 
145
+ return result_df, result_plot