File size: 3,924 Bytes
2290cee
f3cf0e1
2290cee
 
 
cce1b85
2290cee
 
 
 
 
 
f3e3e40
2290cee
 
 
 
 
 
 
cce1b85
 
1303f07
cce1b85
2290cee
cce1b85
2290cee
 
f811658
2290cee
 
 
 
cce1b85
2290cee
4de2d22
2290cee
 
 
 
 
 
f811658
b45db7b
f811658
b45db7b
f811658
b45db7b
f811658
2290cee
 
 
 
 
f811658
2290cee
 
 
 
f3cf0e1
cce1b85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3cf0e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93b2487
f3cf0e1
 
 
2290cee
 
 
f3cf0e1
2290cee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from glob import glob
from sklearn.metrics import accuracy_score,  recall_score, f1_score
import os
import pandas as pd 

def get_merged_df(results_path, skip_samples = True):
  results = glob(os.path.join(results_path, "*.json"))
  dfs = []
  for r in results:
    if skip_samples and 'sample_result' in r:
      continue
    df = pd.read_json(r, lines = True)
    if df.drop(['algorithm'], axis = 1).isna().values.any(): 
      print(f"Missing values in {r}")
    else:
      dfs.append(df)
  full_df = pd.concat(dfs)
  return full_df

def map_df(full_df):
  gnd_truth_mapping = {'full fake': 1, 
           'half fake': 1,
            'mostly fake': 1,
           'real': 0}

  pred_mapping = {'fake':1, 'real': 0}


  full_df['gnd_truth'] = full_df['label'].map(gnd_truth_mapping)
  full_df['pred'] = full_df['type'].map(pred_mapping)

  return full_df

def get_duration_scores(df):

  columns = ['Under 26 s', '55 s', '125 s', 'Overall' ]
  samples_tested = []
  acc_scores = []

  for c in columns:
    
    if c == 'Overall':
      mask = df.gnd_truth == 0
    elif c == 'Under 26 s':
      mask = (df.gnd_truth == 0) & (df.duration < 26)
    elif c == '55 s':
      mask = (df.gnd_truth == 0) & (df.duration >= 26) & (df.duration < 56)
    elif c == '125 s':
      mask = (df.gnd_truth == 0) & (df.duration >= 56) & (df.duration < 126)
    else:
      raise ValueError
    sel_df = df[mask]

    samples_tested.append(len(sel_df))
    acc_scores.append(round(accuracy_score(sel_df.gnd_truth.values, sel_df.pred.values), 3))
  
  lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Accuracy": acc_scores})
  return lb

def get_algorithm_scores_v1(df):
    
    columns = list(df[df.label != 'real'].algorithm.unique())
    samples_tested = []
    acc_scores = []
    rec_scores = []
    
    for c in columns:
    
    
      mask = (df.algorithm == c) | (df.label == 'real')
      sel_df = df[mask]
    
      samples_tested.append(len(sel_df[sel_df.label != 'real']))
      rec_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values), 3))
        
    lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Recall": rec_scores})
    return lb

def get_algorithm_scores_v2(df):
    
    columns = list(df[df.label != 'real'].algorithm.unique())
    columns2 = list(df[df.label != 'real'].label.unique())
    samples_tested = []
    acc_scores = []
    tpr_scores = []
    tnr_scores = [float('nan')]*(len(columns) + len(columns2))
    f1_scores = [float('nan')]*(len(columns) + len(columns2))
    
    for c in columns:
      mask = (df.algorithm == c)
      sel_df = df[mask]
    
      samples_tested.append(len(sel_df))
      tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1), 3))
    
    
    for c in columns2:
      mask = (df.label == c)
      sel_df = df[mask]
    
      samples_tested.append(len(sel_df))
      tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1), 3))
    
    mask = (df.label != "real")
    sel_df = df[mask]

    tpr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=1), 3))

    mask = (df.label == "real")
    sel_df = df[mask]

    tnr_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values, pos_label=0), 3))

    sel_df = df.copy()
    samples_tested.append(len(sel_df))
    f1_scores.append(round(f1_score(sel_df.gnd_truth.values, sel_df.pred.values, average="macro"), 3))

        
    lb = pd.DataFrame({"Sample": columns + columns2 + ["overall"], "Num Samples": samples_tested, 
                   "TPR": tpr_scores, "TNR": tnr_scores, "F1": f1_scores})
    return lb
    
def build_leaderboard(results_path = 'results'):
  full_df = get_merged_df(results_path)
  full_df_mapped = map_df(full_df)
  leaderboard = get_algorithm_scores_v2(full_df_mapped)
  return leaderboard