bishmoy commited on
Commit
2290cee
·
verified ·
1 Parent(s): 5feca9c

added initial code for building lb

Browse files
Files changed (1) hide show
  1. leaderboard.py +63 -0
leaderboard.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from glob import glob
2
+ from sklearn.metrics import accuracy_score
3
+ import os
4
+ import pandas as pd
5
+
6
+ def get_merged_df(results_path, skip_samples = False):
7
+ results = glob(os.path.join(results_path, "*.json"))
8
+ dfs = []
9
+ for r in results:
10
+ if skip_samples and 'sample_result' in r:
11
+ continue
12
+ df = pd.read_json(r, lines = True)
13
+ if df.isna().values.any():
14
+ print(f"Missing values in {r}")
15
+ else:
16
+ dfs.append(df)
17
+ full_df = pd.concat(dfs)
18
+ return full_df
19
+
20
+ def map_df(full_df):
21
+ gnd_truth_mapping = {'full fake': 0,
22
+ 'half fake': 0,
23
+ 'real': 1}
24
+
25
+ pred_mapping = {'fake':0, 'real': 1}
26
+
27
+
28
+ full_df['label'] = full_df['ground_truth'].map(gnd_truth_mapping)
29
+ full_df['pred'] = full_df['type'].map(pred_mapping)
30
+
31
+ return full_df
32
+
33
+ def get_scores(df):
34
+
35
+ columns = ['Under 25s', '26s - 55s', '56s - 125s', 'Overall' ]
36
+ samples_tested = []
37
+ acc_scores = []
38
+
39
+ for c in columns:
40
+
41
+ if c == 'Overall':
42
+ mask = df.label == 0
43
+ elif c == 'Under 25s':
44
+ mask = (df.label == 0) & (df.duration < 26)
45
+ elif c == '26s - 55s':
46
+ mask = (df.label == 0) & (df.duration >= 26) & (df.duration < 56)
47
+ elif c == '56s - 125s':
48
+ mask = (df.label == 0) & (df.duration >= 56) & (df.duration < 126)
49
+ else:
50
+ raise ValueError
51
+ sel_df = df[mask]
52
+
53
+ samples_tested.append(len(sel_df))
54
+ acc_scores.append(round(accuracy_score(sel_df.label.values, sel_df.pred.values), 3))
55
+
56
+ lb = pd.DataFrame({"Sample": columns, "Num Samples": samples_tested, "Accuracy": acc_scores})
57
+ return lb
58
+
59
+ def build_leaderboard(results_path = 'results'):
60
+ full_df = get_merged_df(results_path)
61
+ full_df_mapped = map_df(full_df)
62
+ leaderboard = get_scores(full_df_mapped)
63
+ return leaderboard