File size: 2,061 Bytes
e0db39e
0946447
d1a2df2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0db39e
 
 
 
 
 
 
6d2d9db
e0db39e
 
0946447
e0db39e
0946447
 
 
 
 
 
d1a2df2
0946447
8ab9329
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from genbit.genbit_metrics import GenBitMetrics
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots


def plot_genbit(result_json):
    data1 = {
        "Metric": [
            "Female Gender",
            "Male Gender",
            "Non-Binary Gender",
        ],
        "Value": [
            result_json["percentage_of_female_gender_definition_words"],
            result_json["percentage_of_male_gender_definition_words"],
            result_json["percentage_of_non_binary_gender_definition_words"],
        ],
    }

    data2 = {
        "Metric": [
            "Trans Gender",
            "Cis Gender",
        ],
        "Value": [
            result_json["percentage_of_trans_gender_definition_words"],
            result_json["percentage_of_cis_gender_definition_words"],
        ],
    }

    df1 = pd.DataFrame(data1)
    df2 = pd.DataFrame(data2)

    fig1 = px.pie(
        df1,
        names="Metric",
        values="Value",
        title="Combined Gender Definition Words Distribution",
    )
    fig1.update_traces(textposition="inside", textinfo="percent+label")

    # fig2 = px.pie(
    #     df2,
    #     names="Metric",
    #     values="Value",
    # )
    # fig2.update_traces(textposition="inside", textinfo="percent+label")

    # fig = make_subplots(rows=2, cols=1, specs=[[{"type": "pie"}], [{"type": "pie"}]])

    # fig.add_trace(fig1.data[0], row=1, col=1)
    # fig.add_trace(fig2.data[0], row=2, col=1)

    return fig1


def eval_genbit(data):
    genbit_metrics = GenBitMetrics(
        language_code="en", context_window=5, distance_weight=0.95, percentile_cutoff=80
    )

    data = data[data.columns[0]].to_list()

    genbit_metrics.add_data(data, tokenized=False)
    result_json = genbit_metrics.get_metrics(output_word_list=False)

    result_df = (
        pd.DataFrame.from_dict(result_json, orient="index")
        .reset_index()
        .rename(columns={"index": "Metric", 0: "Value"})
    )

    result_plot = plot_genbit(result_json)

    return result_df, result_plot