biasaware / scripts /genbit.py
freyam
Add sample size limit and AVID report
8ab9329
raw
history blame
2.06 kB
from genbit.genbit_metrics import GenBitMetrics
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
def plot_genbit(result_json):
data1 = {
"Metric": [
"Female Gender",
"Male Gender",
"Non-Binary Gender",
],
"Value": [
result_json["percentage_of_female_gender_definition_words"],
result_json["percentage_of_male_gender_definition_words"],
result_json["percentage_of_non_binary_gender_definition_words"],
],
}
data2 = {
"Metric": [
"Trans Gender",
"Cis Gender",
],
"Value": [
result_json["percentage_of_trans_gender_definition_words"],
result_json["percentage_of_cis_gender_definition_words"],
],
}
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)
fig1 = px.pie(
df1,
names="Metric",
values="Value",
title="Combined Gender Definition Words Distribution",
)
fig1.update_traces(textposition="inside", textinfo="percent+label")
# fig2 = px.pie(
# df2,
# names="Metric",
# values="Value",
# )
# fig2.update_traces(textposition="inside", textinfo="percent+label")
# fig = make_subplots(rows=2, cols=1, specs=[[{"type": "pie"}], [{"type": "pie"}]])
# fig.add_trace(fig1.data[0], row=1, col=1)
# fig.add_trace(fig2.data[0], row=2, col=1)
return fig1
def eval_genbit(data):
genbit_metrics = GenBitMetrics(
language_code="en", context_window=5, distance_weight=0.95, percentile_cutoff=80
)
data = data[data.columns[0]].to_list()
genbit_metrics.add_data(data, tokenized=False)
result_json = genbit_metrics.get_metrics(output_word_list=False)
result_df = (
pd.DataFrame.from_dict(result_json, orient="index")
.reset_index()
.rename(columns={"index": "Metric", 0: "Value"})
)
result_plot = plot_genbit(result_json)
return result_df, result_plot