Spaces:
Runtime error
Runtime error
from flask import Flask, send_from_directory | |
from flask import request | |
import random | |
import json | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import pickle | |
import os | |
from sklearn.metrics import mean_absolute_error | |
from sklearn.metrics import mean_squared_error | |
from sklearn.metrics import confusion_matrix | |
import math | |
import altair as alt | |
import matplotlib.pyplot as plt | |
import time | |
import audit_utils as utils | |
app = Flask(__name__) | |
# Path for our main Svelte page | |
def base(): | |
return send_from_directory('indie_label_svelte/public', 'index.html') | |
# Path for all the static files (compiled JS/CSS, etc.) | |
def home(path): | |
return send_from_directory('indie_label_svelte/public', path) | |
######################################## | |
# ROUTE: /AUDIT_SETTINGS | |
comments_grouped_full_topic_cat = pd.read_pickle("data/comments_grouped_full_topic_cat2_persp.pkl") | |
def audit_settings(): | |
# Fetch page content | |
user = request.args.get("user") | |
scaffold_method = request.args.get("scaffold_method") | |
user_models = utils.get_all_model_names(user) | |
grp_models = [m for m in user_models if m.startswith(f"model_{user}_group_")] | |
clusters = utils.get_unique_topics() | |
if len(user_models) > 2 and scaffold_method != "tutorial" and user != "DemoUser": | |
# Highlight topics that have been tuned | |
tuned_clusters = [m.lstrip(f"model_{user}_") for m in user_models if (m != f"model_{user}" and not m.startswith(f"model_{user}_group_"))] | |
other_clusters = [c for c in clusters if c not in tuned_clusters] | |
tuned_options = { | |
"label": "Topics with tuned models", | |
"options": [{"value": i, "text": cluster} for i, cluster in enumerate(tuned_clusters)], | |
} | |
other_options = { | |
"label": "All other topics", | |
"options": [{"value": i, "text": cluster} for i, cluster in enumerate(other_clusters)], | |
} | |
clusters_options = [tuned_options, other_options] | |
else: | |
clusters_options = [{ | |
"label": "All auto-generated topics", | |
"options": [{"value": i, "text": cluster} for i, cluster in enumerate(clusters)], | |
},] | |
if scaffold_method == "personal_cluster": | |
cluster_model = user_models[0] | |
personal_cluster_file = f"./data/personal_cluster_dfs/{cluster_model}.pkl" | |
if os.path.isfile(personal_cluster_file) and cluster_model != "": | |
print("audit_settings", personal_cluster_file, cluster_model) | |
topics_under_top, topics_over_top = utils.get_personal_clusters(cluster_model) | |
pers_cluster = topics_under_top + topics_over_top | |
pers_cluster_options = { | |
"label": "Personalized clusters", | |
"options": [{"value": i, "text": cluster} for i, cluster in enumerate(pers_cluster)], | |
} | |
clusters_options.insert(0, pers_cluster_options) | |
clusters_for_tuning = utils.get_large_clusters(min_n=150) | |
clusters_for_tuning_options = [{"value": i, "text": cluster} for i, cluster in enumerate(clusters_for_tuning)] # Format for Svelecte UI element | |
context = { | |
"personalized_models": user_models, | |
"personalized_model_grp": grp_models, | |
"perf_metrics": ["Average rating difference", "Mean Absolute Error (MAE)", "Root Mean Squared Error (RMSE)", "Mean Squared Error (MSE)"], | |
"breakdown_categories": ['Topic', 'Toxicity Category', 'Toxicity Severity'], | |
"clusters": clusters_options, | |
"clusters_for_tuning": clusters_for_tuning_options, | |
} | |
return json.dumps(context) | |
######################################## | |
# ROUTE: /GET_USERS | |
def get_users(): | |
# Fetch page content | |
with open(f"./data/users_to_models.pkl", "rb") as f: | |
users_to_models = pickle.load(f) | |
users = list(users_to_models.keys()) | |
context = { | |
"users": users, | |
} | |
return json.dumps(context) | |
######################################## | |
# ROUTE: /GET_AUDIT | |
def get_audit(): | |
pers_model = request.args.get("pers_model") | |
perf_metric = request.args.get("perf_metric") | |
breakdown_axis = request.args.get("breakdown_axis") | |
breakdown_sort = request.args.get("breakdown_sort") | |
n_topics = int(request.args.get("n_topics")) | |
error_type = request.args.get("error_type") | |
cur_user = request.args.get("cur_user") | |
topic_vis_method = request.args.get("topic_vis_method") | |
if topic_vis_method == "null": | |
topic_vis_method = "median" | |
if breakdown_sort == "difference": | |
sort_class_plot = True | |
elif breakdown_sort == "default": | |
sort_class_plot = False | |
else: | |
raise Exception("Invalid breakdown_sort value") | |
overall_perf = utils.show_overall_perf( | |
variant=pers_model, | |
error_type=error_type, | |
cur_user=cur_user, | |
breakdown_axis=breakdown_axis, | |
topic_vis_method=topic_vis_method, | |
) | |
results = { | |
"overall_perf": overall_perf, | |
} | |
return json.dumps(results) | |
######################################## | |
# ROUTE: /GET_CLUSTER_RESULTS | |
def get_cluster_results(): | |
pers_model = request.args.get("pers_model") | |
n_examples = int(request.args.get("n_examples")) | |
cluster = request.args.get("cluster") | |
example_sort = request.args.get("example_sort") | |
comparison_group = request.args.get("comparison_group") | |
topic_df_ids = request.args.getlist("topic_df_ids") | |
topic_df_ids = [int(val) for val in topic_df_ids[0].split(",") if val != ""] | |
search_type = request.args.get("search_type") | |
keyword = request.args.get("keyword") | |
n_neighbors = request.args.get("n_neighbors") | |
if n_neighbors != "null": | |
n_neighbors = int(n_neighbors) | |
neighbor_threshold = 0.6 | |
error_type = request.args.get("error_type") | |
use_model = request.args.get("use_model") == "true" | |
scaffold_method = request.args.get("scaffold_method") | |
# If user has a tuned model for this cluster, use that | |
cluster_model_file = f"./data/trained_models/{pers_model}_{cluster}.pkl" | |
if os.path.isfile(cluster_model_file): | |
pers_model = f"{pers_model}_{cluster}" | |
print(f"get_cluster_results using model {pers_model}") | |
other_ids = [] | |
perf_metric = "avg_diff" | |
sort_ascending = True if example_sort == "ascending" else False | |
topic_df = None | |
personal_cluster_file = f"./data/personal_cluster_dfs/{pers_model}.pkl" | |
if (scaffold_method == "personal_cluster") and (os.path.isfile(personal_cluster_file)): | |
# Handle personal clusters | |
with open(personal_cluster_file, "rb") as f: | |
topic_df = pickle.load(f) | |
topic_df = topic_df[(topic_df["topic"] == cluster)] | |
else: | |
# Regular handling | |
with open(f"data/preds_dfs/{pers_model}.pkl", "rb") as f: | |
topic_df = pickle.load(f) | |
if search_type == "cluster": | |
# Display examples with comment, your pred, and other users' pred | |
topic_df = topic_df[(topic_df["topic"] == cluster) | (topic_df["item_id"].isin(topic_df_ids))] | |
elif search_type == "neighbors": | |
neighbor_ids = utils.get_match(topic_df_ids, K=n_neighbors, threshold=neighbor_threshold, debug=False) | |
topic_df = topic_df[(topic_df["item_id"].isin(neighbor_ids)) | (topic_df["item_id"].isin(topic_df_ids))] | |
elif search_type == "keyword": | |
topic_df = topic_df[(topic_df["comment"].str.contains(keyword, case=False, regex=False)) | (topic_df["item_id"].isin(topic_df_ids))] | |
topic_df = topic_df.drop_duplicates() | |
print("len topic_df", len(topic_df)) | |
# Handle empty results | |
if len(topic_df) == 0: | |
results = { | |
"user_perf_rounded": None, | |
"user_direction": None, | |
"other_perf_rounded": None, | |
"other_direction": None, | |
"n_other_users": None, | |
"cluster_examples": None, | |
"odds_ratio": None, | |
"odds_ratio_explanation": None, | |
"topic_df_ids": [], | |
"cluster_overview_plot_json": None, | |
"cluster_comments": None, | |
} | |
return results | |
topic_df_ids = topic_df["item_id"].unique().tolist() | |
if (scaffold_method == "personal_cluster") and (os.path.isfile(personal_cluster_file)): | |
cluster_overview_plot_json, sampled_df = utils.plot_overall_vis_cluster(topic_df, error_type=error_type, n_comments=500) | |
else: | |
# Regular | |
cluster_overview_plot_json, sampled_df = utils.get_cluster_overview_plot(topic_df, error_type=error_type, use_model=use_model) | |
cluster_comments = utils.get_cluster_comments(sampled_df,error_type=error_type, num_examples=n_examples, use_model=use_model) # New version of cluster comment table | |
results = { | |
"topic_df_ids": topic_df_ids, | |
"cluster_overview_plot_json": json.loads(cluster_overview_plot_json), | |
"cluster_comments": cluster_comments, | |
} | |
return json.dumps(results) | |
######################################## | |
# ROUTE: /GET_GROUP_SIZE | |
def get_group_size(): | |
# Fetch info for initial labeling component | |
sel_gender = request.args.get("sel_gender") | |
sel_pol = request.args.get("sel_pol") | |
sel_relig = request.args.get("sel_relig") | |
sel_race = request.args.get("sel_race") | |
sel_lgbtq = request.args.get("sel_lgbtq") | |
if sel_race != "": | |
sel_race = sel_race.split(",") | |
_, group_size = utils.get_workers_in_group(sel_gender, sel_race, sel_relig, sel_pol, sel_lgbtq) | |
context = { | |
"group_size": group_size, | |
} | |
return json.dumps(context) | |
######################################## | |
# ROUTE: /GET_GROUP_MODEL | |
def get_group_model(): | |
# Fetch info for initial labeling component | |
model_name = request.args.get("model_name") | |
user = request.args.get("user") | |
sel_gender = request.args.get("sel_gender") | |
sel_pol = request.args.get("sel_pol") | |
sel_relig = request.args.get("sel_relig") | |
sel_lgbtq = request.args.get("sel_lgbtq") | |
sel_race_orig = request.args.get("sel_race") | |
if sel_race_orig != "": | |
sel_race = sel_race_orig.split(",") | |
else: | |
sel_race = "" | |
start = time.time() | |
grp_df, group_size = utils.get_workers_in_group(sel_gender, sel_race, sel_relig, sel_pol, sel_lgbtq) | |
grp_ids = grp_df["worker_id"].tolist() | |
ratings_grp = utils.get_grp_model_labels( | |
comments_df=comments_grouped_full_topic_cat, | |
n_label_per_bin=BIN_DISTRIB, | |
score_bins=SCORE_BINS, | |
grp_ids=grp_ids, | |
) | |
# print("ratings_grp", ratings_grp) | |
# Modify model name | |
model_name = f"{model_name}_group_gender{sel_gender}_relig{sel_relig}_pol{sel_pol}_race{sel_race_orig}_lgbtq_{sel_lgbtq}" | |
label_dir = f"./data/labels/{model_name}" | |
# Create directory for labels if it doesn't yet exist | |
if not os.path.isdir(label_dir): | |
os.mkdir(label_dir) | |
last_label_i = len([name for name in os.listdir(label_dir) if (os.path.isfile(os.path.join(label_dir, name)) and name.endswith('.pkl'))]) | |
# Train group model | |
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, last_label_i, ratings_grp, user) | |
duration = time.time() - start | |
print("Time to train/cache:", duration) | |
context = { | |
"group_size": group_size, | |
"mae": mae, | |
} | |
return json.dumps(context) | |
######################################## | |
# ROUTE: /GET_LABELING | |
def get_labeling(): | |
# Fetch info for initial labeling component | |
user = request.args.get("user") | |
clusters_for_tuning = utils.get_large_clusters(min_n=150) | |
clusters_for_tuning_options = [{"value": i, "text": cluster} for i, cluster in enumerate(clusters_for_tuning)] # Format for Svelecte UI element | |
# model_name_suggestion = f"model_{int(time.time())}" | |
model_name_suggestion = f"model_{user}" | |
context = { | |
"personalized_models": utils.get_all_model_names(user), | |
"model_name_suggestion": model_name_suggestion, | |
"clusters_for_tuning": clusters_for_tuning_options, | |
} | |
return json.dumps(context) | |
######################################## | |
# ROUTE: /GET_COMMENTS_TO_LABEL | |
N_LABEL_PER_BIN = 8 # 8 * 5 = 40 comments | |
BIN_DISTRIB = [4, 8, 16, 8, 4] | |
SCORE_BINS = [(0.0, 0.5), (0.5, 1.5), (1.5, 2.5), (2.5, 3.5), (3.5, 4.01)] | |
def get_comments_to_label(): | |
n = int(request.args.get("n")) | |
# Fetch examples to label | |
to_label_ids = utils.create_example_sets( | |
comments_df=comments_grouped_full_topic_cat, | |
n_label_per_bin=BIN_DISTRIB, | |
score_bins=SCORE_BINS, | |
keyword=None | |
) | |
random.shuffle(to_label_ids) # randomize to not prime users | |
to_label_ids = to_label_ids[:n] | |
ids_to_comments = utils.get_ids_to_comments() | |
to_label = [ids_to_comments[comment_id] for comment_id in to_label_ids] | |
context = { | |
"to_label": to_label, | |
} | |
return json.dumps(context) | |
######################################## | |
# ROUTE: /GET_COMMENTS_TO_LABEL_TOPIC | |
N_LABEL_PER_BIN_TOPIC = 2 # 2 * 5 = 10 comments | |
def get_comments_to_label_topic(): | |
# Fetch examples to label | |
topic = request.args.get("topic") | |
to_label_ids = utils.create_example_sets( | |
comments_df=comments_grouped_full_topic_cat, | |
# n_label_per_bin=N_LABEL_PER_BIN_TOPIC, | |
n_label_per_bin=BIN_DISTRIB, | |
score_bins=SCORE_BINS, | |
keyword=None, | |
topic=topic, | |
) | |
random.shuffle(to_label_ids) # randomize to not prime users | |
ids_to_comments = utils.get_ids_to_comments() | |
to_label = [ids_to_comments[comment_id] for comment_id in to_label_ids] | |
context = { | |
"to_label": to_label, | |
} | |
return json.dumps(context) | |
######################################## | |
# ROUTE: /GET_PERSONALIZED_MODEL | |
def get_personalized_model(): | |
model_name = request.args.get("model_name") | |
ratings_json = request.args.get("ratings") | |
mode = request.args.get("mode") | |
user = request.args.get("user") | |
ratings = json.loads(ratings_json) | |
print(ratings) | |
start = time.time() | |
label_dir = f"./data/labels/{model_name}" | |
# Create directory for labels if it doesn't yet exist | |
if not os.path.isdir(label_dir): | |
os.mkdir(label_dir) | |
last_label_i = len([name for name in os.listdir(label_dir) if (os.path.isfile(os.path.join(label_dir, name)) and name.endswith('.pkl'))]) | |
# Handle existing or new model cases | |
if mode == "view": | |
# Fetch prior model performance | |
if model_name not in utils.get_all_model_names(): | |
raise Exception(f"Model {model_name} does not exist") | |
else: | |
mae, mse, rmse, avg_diff, ratings_prev = utils.fetch_existing_data(model_name, last_label_i) | |
elif mode == "train": | |
# Train model and cache predictions using new labels | |
print("get_personalized_model train") | |
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, last_label_i, ratings, user) | |
duration = time.time() - start | |
print("Time to train/cache:", duration) | |
perf_plot, mae_status = utils.plot_train_perf_results(model_name, mae) | |
perf_plot_json = perf_plot.to_json() | |
def round_metric(x): | |
return np.round(abs(x), 3) | |
results = { | |
"model_name": model_name, | |
"mae": round_metric(mae), | |
"mae_status": mae_status, | |
"mse": round_metric(mse), | |
"rmse": round_metric(rmse), | |
"avg_diff": round_metric(avg_diff), | |
"duration": duration, | |
"ratings_prev": ratings_prev, | |
"perf_plot_json": json.loads(perf_plot_json), | |
} | |
return json.dumps(results) | |
######################################## | |
# ROUTE: /GET_PERSONALIZED_MODEL_TOPIC | |
def get_personalized_model_topic(): | |
model_name = request.args.get("model_name") | |
ratings_json = request.args.get("ratings") | |
user = request.args.get("user") | |
ratings = json.loads(ratings_json) | |
topic = request.args.get("topic") | |
print(ratings) | |
start = time.time() | |
# Modify model name | |
model_name = f"{model_name}_{topic}" | |
label_dir = f"./data/labels/{model_name}" | |
# Create directory for labels if it doesn't yet exist | |
if not os.path.isdir(label_dir): | |
os.mkdir(label_dir) | |
last_label_i = len([name for name in os.listdir(label_dir) if (os.path.isfile(os.path.join(label_dir, name)) and name.endswith('.pkl'))]) | |
# Handle existing or new model cases | |
# Train model and cache predictions using new labels | |
print("get_personalized_model_topic train") | |
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, last_label_i, ratings, user, topic=topic) | |
duration = time.time() - start | |
print("Time to train/cache:", duration) | |
def round_metric(x): | |
return np.round(abs(x), 3) | |
results = { | |
"success": "success", | |
"ratings_prev": ratings_prev, | |
"new_model_name": model_name, | |
} | |
return json.dumps(results) | |
######################################## | |
# ROUTE: /GET_REPORTS | |
def get_reports(): | |
cur_user = request.args.get("cur_user") | |
scaffold_method = request.args.get("scaffold_method") | |
model = request.args.get("model") | |
topic_vis_method = request.args.get("topic_vis_method") | |
if topic_vis_method == "null": | |
topic_vis_method = "fp_fn" | |
# Load reports for current user from stored files | |
report_dir = f"./data/user_reports" | |
user_file = os.path.join(report_dir, f"{cur_user}_{scaffold_method}.pkl") | |
if not os.path.isfile(user_file): | |
if scaffold_method == "fixed": | |
reports = get_fixed_scaffold() | |
elif (scaffold_method == "personal" or scaffold_method == "personal_group" or scaffold_method == "personal_test"): | |
reports = get_personal_scaffold(model, topic_vis_method) | |
elif (scaffold_method == "personal_cluster"): | |
reports = get_personal_cluster_scaffold(model) | |
elif scaffold_method == "prompts": | |
reports = get_prompts_scaffold() | |
elif scaffold_method == "tutorial": | |
reports = get_tutorial_scaffold() | |
else: | |
# Prepare empty report | |
reports = [ | |
{ | |
"title": "", | |
"error_type": "", | |
"evidence": [], | |
"text_entry": "", | |
"complete_status": False, | |
} | |
] | |
else: | |
# Load from pickle file | |
with open(user_file, "rb") as f: | |
reports = pickle.load(f) | |
results = { | |
"reports": reports, | |
} | |
return json.dumps(results) | |
def get_fixed_scaffold(): | |
return [ | |
{ | |
"title": "Topic: 6_jews_jew_jewish_rabbi", | |
"error_type": "System is under-sensitive", | |
"evidence": [], | |
"text_entry": "", | |
"complete_status": False, | |
}, | |
{ | |
"title": "Topic: 73_troll_trolls_trolling_spammers", | |
"error_type": "System is over-sensitive", | |
"evidence": [], | |
"text_entry": "", | |
"complete_status": False, | |
}, | |
{ | |
"title": "Topic: 66_mexicans_mexico_mexican_spanish", | |
"error_type": "System is under-sensitive", | |
"evidence": [], | |
"text_entry": "", | |
"complete_status": False, | |
}, | |
{ | |
"title": "Topic: 89_cowards_coward_cowardly_brave", | |
"error_type": "System is over-sensitive", | |
"evidence": [], | |
"text_entry": "", | |
"complete_status": False, | |
}, | |
{ | |
"title": "Topic: 63_disgusting_gross_toxic_thicc", | |
"error_type": "System is under-sensitive", | |
"evidence": [], | |
"text_entry": "", | |
"complete_status": False, | |
}, | |
] | |
def get_empty_report(title, error_type): | |
return { | |
"title": f"Topic: {title}", | |
"error_type": error_type, | |
"evidence": [], | |
"text_entry": "", | |
"complete_status": False, | |
} | |
def get_tutorial_scaffold(): | |
return [ | |
{ | |
"title": "Topic: 79_idiot_dumb_stupid_dumber", | |
"error_type": "System is over-sensitive", | |
"evidence": [], | |
"text_entry": "", | |
"complete_status": False, | |
}, | |
] | |
def get_personal_cluster_scaffold(model): | |
topics_under_top, topics_over_top = utils.get_personal_clusters(model) | |
report_under = [get_empty_report(topic, "System is under-sensitive") for topic in topics_under_top] | |
report_over = [get_empty_report(topic, "System is over-sensitive") for topic in topics_over_top] | |
reports = (report_under + report_over) | |
random.shuffle(reports) | |
return reports | |
def get_topic_errors(df, topic_vis_method, threshold=2): | |
topics = df["topic_"].unique().tolist() | |
topic_errors = {} | |
for topic in topics: | |
t_df = df[df["topic_"] == topic] | |
y_true = t_df["pred"].to_numpy() | |
y_pred = t_df["rating"].to_numpy() | |
if topic_vis_method == "mae": | |
t_err = mean_absolute_error(y_true, y_pred) | |
elif topic_vis_method == "mse": | |
t_err = mean_squared_error(y_true, y_pred) | |
elif topic_vis_method == "avg_diff": | |
t_err = np.mean(y_true - y_pred) | |
elif topic_vis_method == "fp_proportion": | |
y_true = [0 if rating < threshold else 1 for rating in t_df["pred"].tolist()] | |
y_pred = [0 if rating < threshold else 1 for rating in t_df["rating"].tolist()] | |
try: | |
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel() | |
except: | |
tn, fp, fn, tp = [0, 0, 0, 0] # ignore; set error to 0 | |
total = float(len(y_true)) | |
t_err = fp / total | |
elif topic_vis_method == "fn_proportion": | |
y_true = [0 if rating < threshold else 1 for rating in t_df["pred"].tolist()] | |
y_pred = [0 if rating < threshold else 1 for rating in t_df["rating"].tolist()] | |
try: | |
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel() | |
except: | |
tn, fp, fn, tp = [0, 0, 0, 0] # ignore; set error to 0 | |
total = float(len(y_true)) | |
t_err = fn / total | |
topic_errors[topic] = t_err | |
return topic_errors | |
def get_personal_scaffold(model, topic_vis_method, n_topics=200, n=5): | |
threshold = utils.get_toxic_threshold() | |
# Get topics with greatest amount of error | |
with open(f"./data/preds_dfs/{model}.pkl", "rb") as f: | |
preds_df = pickle.load(f) | |
preds_df_mod = preds_df.merge(utils.get_comments_grouped_full_topic_cat(), on="item_id", how="left", suffixes=('_', '_avg')) | |
preds_df_mod = preds_df_mod[preds_df_mod["user_id"] == "A"].sort_values(by=["item_id"]).reset_index() | |
preds_df_mod = preds_df_mod[preds_df_mod["topic_id_"] < n_topics] | |
if topic_vis_method == "median": | |
df = preds_df_mod.groupby(["topic_", "user_id"]).median().reset_index() | |
elif topic_vis_method == "mean": | |
df = preds_df_mod.groupby(["topic_", "user_id"]).mean().reset_index() | |
elif topic_vis_method == "fp_fn": | |
for error_type in ["fn_proportion", "fp_proportion"]: | |
topic_errors = get_topic_errors(preds_df_mod, error_type) | |
preds_df_mod[error_type] = [topic_errors[topic] for topic in preds_df_mod["topic_"].tolist()] | |
df = preds_df_mod.groupby(["topic_", "user_id"]).mean().reset_index() | |
else: | |
# Get error for each topic | |
topic_errors = get_topic_errors(preds_df_mod, topic_vis_method) | |
preds_df_mod[topic_vis_method] = [topic_errors[topic] for topic in preds_df_mod["topic_"].tolist()] | |
df = preds_df_mod.groupby(["topic_", "user_id"]).mean().reset_index() | |
# Get system error | |
df = df[(df["topic_"] != "53_maiareficco_kallystas_dyisisitmanila_tractorsazi") & (df["topic_"] != "79_idiot_dumb_stupid_dumber")] | |
if topic_vis_method == "median" or topic_vis_method == "mean": | |
df["error_magnitude"] = [utils.get_error_magnitude(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())] | |
df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())] | |
df_under = df[df["error_type"] == "System is under-sensitive"] | |
df_under = df_under.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first | |
report_under = [get_empty_report(row["topic_"], row["error_type"]) for _, row in df_under.iterrows()] | |
df_over = df[df["error_type"] == "System is over-sensitive"] | |
df_over = df_over.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first | |
report_over = [get_empty_report(row["topic_"], row["error_type"]) for _, row in df_over.iterrows()] | |
# Set up reports | |
# return [get_empty_report(row["topic_"], row["error_type"]) for index, row in df.iterrows()] | |
reports = (report_under + report_over) | |
random.shuffle(reports) | |
elif topic_vis_method == "fp_fn": | |
df_under = df.sort_values(by=["fn_proportion"], ascending=False).head(n) | |
df_under = df_under[df_under["fn_proportion"] > 0] | |
report_under = [get_empty_report(row["topic_"], "System is under-sensitive") for _, row in df_under.iterrows()] | |
df_over = df.sort_values(by=["fp_proportion"], ascending=False).head(n) | |
df_over = df_over[df_over["fp_proportion"] > 0] | |
report_over = [get_empty_report(row["topic_"], "System is over-sensitive") for _, row in df_over.iterrows()] | |
reports = (report_under + report_over) | |
random.shuffle(reports) | |
else: | |
df = df.sort_values(by=[topic_vis_method], ascending=False).head(n * 2) | |
df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())] | |
reports = [get_empty_report(row["topic_"], row["error_type"]) for _, row in df.iterrows()] | |
return reports | |
def get_prompts_scaffold(): | |
return [ | |
{ | |
"title": "Are there terms that are used in your identity group or community that tend to be flagged incorrectly as toxic?", | |
"error_type": "System is over-sensitive", | |
"evidence": [], | |
"text_entry": "", | |
"complete_status": False, | |
}, | |
{ | |
"title": "Are there terms that are used in your identity group or community that tend to be flagged incorrectly as non-toxic?", | |
"error_type": "System is under-sensitive", | |
"evidence": [], | |
"text_entry": "", | |
"complete_status": False, | |
}, | |
{ | |
"title": "Are there certain ways that your community tends to be targeted by outsiders?", | |
"error_type": "", | |
"evidence": [], | |
"text_entry": "", | |
"complete_status": False, | |
}, | |
{ | |
"title": "Are there other communities whose content should be very similar to your community's? Verify that this content is treated similarly by the system.", | |
"error_type": "", | |
"evidence": [], | |
"text_entry": "", | |
"complete_status": False, | |
}, | |
{ | |
"title": "Are there ways that you've seen individuals in your community actively try to thwart the rules of automated content moderation systems? Check whether these strategies work here.", | |
"error_type": "", | |
"evidence": [], | |
"text_entry": "", | |
"complete_status": False, | |
}, | |
] | |
######################################## | |
# ROUTE: /SAVE_REPORTS | |
def save_reports(): | |
cur_user = request.args.get("cur_user") | |
reports_json = request.args.get("reports") | |
reports = json.loads(reports_json) | |
scaffold_method = request.args.get("scaffold_method") | |
# Save reports for current user to stored files | |
report_dir = f"./data/user_reports" | |
# Save to pickle file | |
with open(os.path.join(report_dir, f"{cur_user}_{scaffold_method}.pkl"), "wb") as f: | |
pickle.dump(reports, f) | |
results = { | |
"status": "success", | |
} | |
return json.dumps(results) | |
######################################## | |
# ROUTE: /GET_EXPLORE_EXAMPLES | |
def get_explore_examples(): | |
threshold = utils.get_toxic_threshold() | |
n_examples = int(request.args.get("n_examples")) | |
# Get sample of examples | |
df = utils.get_comments_grouped_full_topic_cat().sample(n=n_examples) | |
df["system_decision"] = [utils.get_decision(rating, threshold) for rating in df["rating"].tolist()] | |
df["system_color"] = [utils.get_user_color(sys, threshold) for sys in df["rating"].tolist()] # get cell colors | |
ex_json = df.to_json(orient="records") | |
results = { | |
"examples": ex_json, | |
} | |
return json.dumps(results) | |
######################################## | |
# ROUTE: /GET_RESULTS | |
def get_results(): | |
users = request.args.get("users") | |
if users != "": | |
users = users.split(",") | |
# print("users", users) | |
IGNORE_LIST = ["DemoUser"] | |
report_dir = f"./data/user_reports" | |
# For each user, get personal and prompt results | |
# Get links to label pages and audit pages | |
results = [] | |
for user in users: | |
if user not in IGNORE_LIST: | |
user_results = {} | |
user_results["user"] = user | |
for scaffold_method in ["personal", "personal_group", "prompts"]: | |
# Get results | |
user_file = os.path.join(report_dir, f"{user}_{scaffold_method}.pkl") | |
if os.path.isfile(user_file): | |
with open(user_file, "rb") as f: | |
user_results[scaffold_method] = pickle.load(f) | |
results.append(user_results) | |
# print("results", results) | |
results = { | |
"results": results, | |
} | |
return json.dumps(results) | |
if __name__ == "__main__": | |
app.run(debug=True, port=5001) | |