Spaces:
Sleeping
Sleeping
Rafal
commited on
Commit
·
2a7e3b8
1
Parent(s):
f818770
Changed background in Accordions and per sentence progress in get sentences
Browse files- app.py +13 -13
- mgr_bias_scoring.py +1 -1
- mgr_requests.py +7 -2
- mgr_sentences.py +2 -1
app.py
CHANGED
@@ -275,8 +275,6 @@ def _genSentenceCoverMsg(test_sentences, total_att_terms, bias_spec, isGen=False
|
|
275 |
# missing pairs spec
|
276 |
bt_mgr.genMissingPairsSpec(bias_spec, test_sentences_df)
|
277 |
|
278 |
-
|
279 |
-
|
280 |
att1_missing_num = sum([v for k, v in att1_missing.items()])
|
281 |
att2_missing_num = sum([v for k, v in att2_missing.items()])
|
282 |
total_missing = att1_missing_num + att2_missing_num
|
@@ -287,9 +285,9 @@ def _genSentenceCoverMsg(test_sentences, total_att_terms, bias_spec, isGen=False
|
|
287 |
source_msg = "Found" if isGen==False else "Generated"
|
288 |
if num_covered_atts >= total_att_terms:
|
289 |
if total_missing > 0:
|
290 |
-
info_msg = f"**{source_msg} {len(test_sentences)} sentences covering all bias specification attributes, but some attributes are underepresented. Generating additional {total_missing} sentences is suggested.**"
|
291 |
else:
|
292 |
-
info_msg = f"**{source_msg} {len(test_sentences)} sentences covering all bias specification attributes. Please select model to test.**"
|
293 |
else:
|
294 |
info_msg = f"**{source_msg} {len(test_sentences)} sentences covering {num_covered_atts} of {total_att_terms} attributes. Please select model to test.**"
|
295 |
|
@@ -707,8 +705,7 @@ def useOnlineGen(value):
|
|
707 |
|
708 |
def changeTerm(evt: gr.EventData):
|
709 |
global G_CORE_BIAS_NAME
|
710 |
-
|
711 |
-
print("Bias is custom now...")
|
712 |
|
713 |
G_CORE_BIAS_NAME = None
|
714 |
|
@@ -805,7 +802,10 @@ css_adds = "#group_row {background: white; border-color: white;} \
|
|
805 |
#filled:hover .tooltiptext_left {visibility: visible;} \
|
806 |
#empty:hover .tooltiptext_left {visibility: visible;} \
|
807 |
#filled:hover .tooltiptext_right {visibility: visible;} \
|
808 |
-
#empty:hover .tooltiptext_right {visibility: visible;}
|
|
|
|
|
|
|
809 |
|
810 |
#'bethecloud/storj_theme'
|
811 |
with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
@@ -917,7 +917,7 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
917 |
gr.Markdown(" ")
|
918 |
|
919 |
with gr.Row(visible=False) as row_sentences:
|
920 |
-
with gr.Accordion(label="Test Sentences", open=False, visible=False) as acc_test_sentences:
|
921 |
test_sentences = gr.DataFrame(
|
922 |
headers=["Sentence", "Alternative Sentence", "Group term 1", "Group term 2", "Attribute term"],
|
923 |
datatype=["str", "str", "str", "str", "str"],
|
@@ -950,8 +950,8 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
950 |
model_bias_label = gr.Label(num_top_classes=1, label="% stereotyped choices (↑ more bias)",
|
951 |
elem_id="res_label",
|
952 |
show_label=False)
|
953 |
-
with gr.Accordion("Additional Interpretation", open=False, visible=True):
|
954 |
-
interpretation_msg = gr.HTML(value="Interpretation: Stereotype Score metric details in <a href='https://arxiv.org/abs/2004.09456'>Nadeem
|
955 |
|
956 |
lbl_attrib_bias = gr.Markdown("**Bias in the Context of Attributes** - % stereotyped choices (↑ more bias)")
|
957 |
#gr.Markdown("**Legend**")
|
@@ -971,13 +971,13 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
971 |
gr.Markdown("#### Attribute Group 2")
|
972 |
attribute_bias_html_antistereo = gr.HTML()
|
973 |
|
974 |
-
gr.HTML(value="Visualization inspired by <a href='https://www.bloomberg.com/graphics/2023-generative-ai-bias/' target='_blank'>Bloomberg article on bias in text-to-image models</a
|
975 |
save_msg = gr.HTML(value="<span style=\"color:black\">Bias test result saved! </span>",
|
976 |
visible=False)
|
977 |
|
978 |
with gr.Row():
|
979 |
with gr.Column(scale=2):
|
980 |
-
with gr.Accordion("Per Sentence Bias Results", open=False, visible=True):
|
981 |
test_pairs = gr.DataFrame(
|
982 |
headers=["group_term", "template", "att_term_1", "att_term_2","label_1","label_2"],
|
983 |
datatype=["str", "str", "str", "str", "str", "str"],
|
@@ -1071,7 +1071,7 @@ with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
1071 |
exp_button.click(export_csv,
|
1072 |
inputs=[test_pairs, group1, group2, att1, att2],
|
1073 |
outputs=[csv])
|
1074 |
-
|
1075 |
# Changing any of the bias specification terms
|
1076 |
group1.change(fn=changeTerm, inputs=[], outputs=[csv])
|
1077 |
group2.change(fn=changeTerm, inputs=[], outputs=[csv])
|
|
|
275 |
# missing pairs spec
|
276 |
bt_mgr.genMissingPairsSpec(bias_spec, test_sentences_df)
|
277 |
|
|
|
|
|
278 |
att1_missing_num = sum([v for k, v in att1_missing.items()])
|
279 |
att2_missing_num = sum([v for k, v in att2_missing.items()])
|
280 |
total_missing = att1_missing_num + att2_missing_num
|
|
|
285 |
source_msg = "Found" if isGen==False else "Generated"
|
286 |
if num_covered_atts >= total_att_terms:
|
287 |
if total_missing > 0:
|
288 |
+
info_msg = f"**{source_msg} {len(test_sentences)} sentences covering all bias specification attributes, but some attributes are underepresented (see at the bottom). Generating additional {total_missing} sentences is suggested.**"
|
289 |
else:
|
290 |
+
info_msg = f"**{source_msg} {len(test_sentences)} sentences covering all bias specification attributes (see at the bottom). Please select model to test.**"
|
291 |
else:
|
292 |
info_msg = f"**{source_msg} {len(test_sentences)} sentences covering {num_covered_atts} of {total_att_terms} attributes. Please select model to test.**"
|
293 |
|
|
|
705 |
|
706 |
def changeTerm(evt: gr.EventData):
|
707 |
global G_CORE_BIAS_NAME
|
708 |
+
#print("Bias is custom now...")
|
|
|
709 |
|
710 |
G_CORE_BIAS_NAME = None
|
711 |
|
|
|
802 |
#filled:hover .tooltiptext_left {visibility: visible;} \
|
803 |
#empty:hover .tooltiptext_left {visibility: visible;} \
|
804 |
#filled:hover .tooltiptext_right {visibility: visible;} \
|
805 |
+
#empty:hover .tooltiptext_right {visibility: visible;} \
|
806 |
+
#add_interpret {border: 1px solid #f1f5f9; background: #F8FAFC;} \
|
807 |
+
#per_sent_bias_accordion {border: 1px solid #f1f5f9; background: #F8FAFC;} \
|
808 |
+
#test_sentences_accordion {border: 1px solid #f1f5f9; background: #F8FAFC;}"
|
809 |
|
810 |
#'bethecloud/storj_theme'
|
811 |
with gr.Blocks(theme=soft, title="Social Bias Testing in Language Models",
|
|
|
917 |
gr.Markdown(" ")
|
918 |
|
919 |
with gr.Row(visible=False) as row_sentences:
|
920 |
+
with gr.Accordion(label="Test Sentences", open=False, visible=False, elem_id="test_sentences_accordion") as acc_test_sentences:
|
921 |
test_sentences = gr.DataFrame(
|
922 |
headers=["Sentence", "Alternative Sentence", "Group term 1", "Group term 2", "Attribute term"],
|
923 |
datatype=["str", "str", "str", "str", "str"],
|
|
|
950 |
model_bias_label = gr.Label(num_top_classes=1, label="% stereotyped choices (↑ more bias)",
|
951 |
elem_id="res_label",
|
952 |
show_label=False)
|
953 |
+
with gr.Accordion("Additional Interpretation", open=False, visible=True, elem_id="add_interpret") as interpret_accordion:
|
954 |
+
interpretation_msg = gr.HTML(value="Interpretation: Stereotype Score metric details in <a href='https://arxiv.org/abs/2004.09456'>Nadeem et al. 2020<a>", visible=False)
|
955 |
|
956 |
lbl_attrib_bias = gr.Markdown("**Bias in the Context of Attributes** - % stereotyped choices (↑ more bias)")
|
957 |
#gr.Markdown("**Legend**")
|
|
|
971 |
gr.Markdown("#### Attribute Group 2")
|
972 |
attribute_bias_html_antistereo = gr.HTML()
|
973 |
|
974 |
+
gr.HTML(value="Visualization inspired by <a href='https://www.bloomberg.com/graphics/2023-generative-ai-bias/' target='_blank'>Bloomberg article on bias in text-to-image models</a>.<br />While we perform several checks, the tool still relies on ChatGPT generations, please examine the sentences for quality.")
|
975 |
save_msg = gr.HTML(value="<span style=\"color:black\">Bias test result saved! </span>",
|
976 |
visible=False)
|
977 |
|
978 |
with gr.Row():
|
979 |
with gr.Column(scale=2):
|
980 |
+
with gr.Accordion("Per Sentence Bias Results", open=False, visible=True, elem_id="per_sent_bias_accordion"):
|
981 |
test_pairs = gr.DataFrame(
|
982 |
headers=["group_term", "template", "att_term_1", "att_term_2","label_1","label_2"],
|
983 |
datatype=["str", "str", "str", "str", "str", "str"],
|
|
|
1071 |
exp_button.click(export_csv,
|
1072 |
inputs=[test_pairs, group1, group2, att1, att2],
|
1073 |
outputs=[csv])
|
1074 |
+
|
1075 |
# Changing any of the bias specification terms
|
1076 |
group1.change(fn=changeTerm, inputs=[], outputs=[csv])
|
1077 |
group2.change(fn=changeTerm, inputs=[], outputs=[csv])
|
mgr_bias_scoring.py
CHANGED
@@ -905,7 +905,7 @@ def _constructInterpretationMsg(bias_spec, num_sentences, model_name, bias_stats
|
|
905 |
interpret_msg += "<div style=\"margin-top: 3px; margin-left: 3px\"><b>◼ </b>" + att1_msg + "<br /></div>"
|
906 |
interpret_msg += "<div style=\"margin-top: 3px; margin-left: 3px; margin-bottom: 3px\"><b>◼ </b>" + att2_msg + "<br /></div>"
|
907 |
interpret_msg += "Please examine the exact test sentences used below."
|
908 |
-
interpret_msg += "<br />More details about Stereotype Score metric: <a href='https://arxiv.org/abs/2004.09456' target='_blank'>Nadeem
|
909 |
|
910 |
return interpret_msg
|
911 |
|
|
|
905 |
interpret_msg += "<div style=\"margin-top: 3px; margin-left: 3px\"><b>◼ </b>" + att1_msg + "<br /></div>"
|
906 |
interpret_msg += "<div style=\"margin-top: 3px; margin-left: 3px; margin-bottom: 3px\"><b>◼ </b>" + att2_msg + "<br /></div>"
|
907 |
interpret_msg += "Please examine the exact test sentences used below."
|
908 |
+
interpret_msg += "<br />More details about Stereotype Score metric: <a href='https://arxiv.org/abs/2004.09456' target='_blank'>Nadeem et al. 2020<a>"
|
909 |
|
910 |
return interpret_msg
|
911 |
|
mgr_requests.py
CHANGED
@@ -171,9 +171,14 @@ def _getSavedSentences(bias_spec, progress, use_paper_sentences):
|
|
171 |
att_list.extend(att_list_nospace)
|
172 |
att_list = list(set(att_list))
|
173 |
|
174 |
-
progress(gi/len(g1+g2), desc=f"{g_term}")
|
175 |
|
176 |
-
_, sentence_df, _ = smgr.getSavedSentences(g_term)
|
|
|
|
|
|
|
|
|
|
|
177 |
# only take from paper & gpt3.5
|
178 |
flt_gen_models = ["gpt-3.5","gpt-3.5-turbo","gpt-4"]
|
179 |
print(f"Before filter: {sentence_df.shape[0]}")
|
|
|
171 |
att_list.extend(att_list_nospace)
|
172 |
att_list = list(set(att_list))
|
173 |
|
174 |
+
#progress(gi/len(g1+g2), desc=f"{g_term}")
|
175 |
|
176 |
+
_, sentence_df, _ = smgr.getSavedSentences(g_term)#, gi, len(g1+g2), progress)
|
177 |
+
if sentence_df.shape[0] > 0:
|
178 |
+
progress(gi/len(g1+g2), desc=f"{sentence_df['sentence'].tolist()[0]}")
|
179 |
+
else:
|
180 |
+
progress(gi/len(g1+g2), desc=f"{g_term}")
|
181 |
+
|
182 |
# only take from paper & gpt3.5
|
183 |
flt_gen_models = ["gpt-3.5","gpt-3.5-turbo","gpt-4"]
|
184 |
print(f"Before filter: {sentence_df.shape[0]}")
|
mgr_sentences.py
CHANGED
@@ -90,13 +90,14 @@ def get_sentence_csv(file_path: str):
|
|
90 |
|
91 |
return df
|
92 |
|
93 |
-
def getSavedSentences(grp):
|
94 |
filename = f"{grp.replace(' ','-')}.csv"
|
95 |
sentence_df = pd.DataFrame()
|
96 |
|
97 |
try:
|
98 |
text = f"Loading sentences: {filename}\n"
|
99 |
sentence_df = get_sentence_csv(filename)
|
|
|
100 |
|
101 |
except Exception as e:
|
102 |
text = f"Error, no saved generations for {filename}"
|
|
|
90 |
|
91 |
return df
|
92 |
|
93 |
+
def getSavedSentences(grp): #, gi, total_grp_len, progress):
|
94 |
filename = f"{grp.replace(' ','-')}.csv"
|
95 |
sentence_df = pd.DataFrame()
|
96 |
|
97 |
try:
|
98 |
text = f"Loading sentences: {filename}\n"
|
99 |
sentence_df = get_sentence_csv(filename)
|
100 |
+
#progress(gi/total_grp_len, desc=f"{sentence_df[0]}")
|
101 |
|
102 |
except Exception as e:
|
103 |
text = f"Error, no saved generations for {filename}"
|