File size: 1,930 Bytes
e0db39e 6d2d9db e0db39e 6d2d9db e0db39e 6d2d9db e0db39e 6d2d9db e0db39e 6d2d9db e0db39e 6d2d9db e0db39e 6d2d9db e0db39e 6d2d9db e0db39e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import re
import json
gender_lexicons = json.load(open("config/gender_lexicons.json", "r"))
def count_gender_terms(text, gender_terms):
pattern = r"\b({})\b".format("|".join(gender_terms))
matches = re.findall(pattern, str(text))
return len(matches)
def get_gender_tag(count_m_term, count_f_term):
total_terms = count_m_term + count_f_term
if total_terms == 0:
return "No Gender"
m_proportion = (count_m_term / total_terms) * 100
if m_proportion >= 75:
return "Male Strongly Positive Gender"
elif m_proportion >= 50:
return "Male Positive Gender"
f_proportion = (count_f_term / total_terms) * 100
if f_proportion >= 75:
return "Female Strongly Positive Gender"
elif f_proportion >= 50:
return "Female Positive Gender"
return "Equal Gender"
def get_pg_spg(sample_df):
gender_labels = [
"Gender",
"No Gender",
"Equal Gender",
"Female Positive Gender",
"Male Positive Gender",
"Female Strongly Positive Gender",
"Male Strongly Positive Gender",
]
gender_counts = sample_df["gender_cat"].value_counts()
result = {label: str(gender_counts.get(label, 0)) for label in gender_labels}
return result
def eval_gender_divide(data):
male_terms = gender_lexicons.get("male_lexicons")
female_terms = gender_lexicons.get("female_lexicons")
data[data.columns[0]] = data[data.columns[0]].str.lower().str.strip()
data["count_male_term"] = data.apply(
lambda x: count_gender_terms(x[data.columns[0]], male_terms), axis=1
)
data["count_female_term"] = data.apply(
lambda x: count_gender_terms(x[:], female_terms), axis=1
)
data["gender_cat"] = data.apply(
lambda row: get_gender_tag(row["count_male_term"], row["count_female_term"]),
axis=1,
)
collection = get_pg_spg(data)
return collection
|