File size: 2,363 Bytes
2290cee
cce1b85
2290cee
 
 
cce1b85
2290cee
 
 
 
 
 
f3e3e40
2290cee
 
 
 
 
 
 
cce1b85
 
 
2290cee
cce1b85
2290cee
 
f811658
2290cee
 
 
 
cce1b85
2290cee
4de2d22
2290cee
 
 
 
 
 
f811658
b45db7b
f811658
b45db7b
f811658
b45db7b
f811658
2290cee
 
 
 
 
f811658
2290cee
 
 
 
cce1b85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2290cee
 
 
cce1b85
2290cee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from glob import glob
from sklearn.metrics import accuracy_score,  recall_score
import os
import pandas as pd 

def get_merged_df(results_path, skip_samples = True):
  results = glob(os.path.join(results_path, "*.json"))
  dfs = []
  for r in results:
    if skip_samples and 'sample_result' in r:
      continue
    df = pd.read_json(r, lines = True)
    if df.drop(['algorithm'], axis = 1).isna().values.any(): 
      print(f"Missing values in {r}")
    else:
      dfs.append(df)
  full_df = pd.concat(dfs)
  return full_df

def map_df(full_df):
  gnd_truth_mapping = {'full fake': 1, 
           'half fake': 1,
           'real': 0}

  pred_mapping = {'fake':1, 'real': 0}


  full_df['gnd_truth'] = full_df['label'].map(gnd_truth_mapping)
  full_df['pred'] = full_df['type'].map(pred_mapping)

  return full_df

def get_duration_scores(df):

  columns = ['Under 26 s', '55 s', '125 s', 'Overall' ]
  samples_tested = []
  acc_scores = []

  for c in columns:
    
    if c == 'Overall':
      mask = df.gnd_truth == 0
    elif c == 'Under 26 s':
      mask = (df.gnd_truth == 0) & (df.duration < 26)
    elif c == '55 s':
      mask = (df.gnd_truth == 0) & (df.duration >= 26) & (df.duration < 56)
    elif c == '125 s':
      mask = (df.gnd_truth == 0) & (df.duration >= 56) & (df.duration < 126)
    else:
      raise ValueError
    sel_df = df[mask]

    samples_tested.append(len(sel_df))
    acc_scores.append(round(accuracy_score(sel_df.gnd_truth.values, sel_df.pred.values), 3))
  
  lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Accuracy": acc_scores})
  return lb

def get_algorithm_scores(df):
    
    columns = list(df[df.label != 'real'].algorithm.unique())
    samples_tested = []
    acc_scores = []
    rec_scores = []
    
    for c in columns:
    
    
      mask = (df.algorithm == c) | (df.label == 'real')
      sel_df = df[mask]
    
      samples_tested.append(len(sel_df[sel_df.label != 'real']))
      rec_scores.append(round(recall_score(sel_df.gnd_truth.values, sel_df.pred.values), 3))
        
    lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Recall": rec_scores})
    return lb

def build_leaderboard(results_path = 'results'):
  full_df = get_merged_df(results_path)
  full_df_mapped = map_df(full_df)
  leaderboard = get_algorithm_scores(full_df_mapped)
  return leaderboard