Spaces:
Running
Running
updated scores algorithm
Browse files- utils_v2.py +39 -32
utils_v2.py
CHANGED
@@ -66,17 +66,9 @@ def load_data(base_dir=SCORE_BASE_DIR):
|
|
66 |
all_data.append(data)
|
67 |
return all_data
|
68 |
|
69 |
-
def
|
70 |
-
"""This function
|
71 |
-
|
72 |
-
"""
|
73 |
-
def get_avg(sum_score, leng):
|
74 |
-
avg = sum_score / leng if leng > 0 else 0.0
|
75 |
-
avg = round(avg, 2) # Round to 2 decimal places
|
76 |
-
return avg
|
77 |
-
|
78 |
-
avg_scores = {}
|
79 |
-
overall_scores_summary = {} # Stores the scores sum and length for each modality and all datasets
|
80 |
for modality, datasets_list in DATASETS.items(): # Ex.: ('image', {'I-CLS': [...], 'I-QA': [...]})
|
81 |
overall_scores_summary[modality] = (0.0, 0) # Initialize the sum and count for each modality
|
82 |
for sub_task, datasets in datasets_list.items(): # Ex.: ('I-CLS', ['VOC2007', 'N24News', ...])
|
@@ -87,26 +79,42 @@ def calculate_score(raw_scores=None):
|
|
87 |
metric = SPECIAL_METRICS.get(dataset, 'hit@1')
|
88 |
if isinstance(score, dict):
|
89 |
score = score.get(metric, 0.0)
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
def generate_model_row(data):
|
112 |
metadata = data['metadata']
|
@@ -127,7 +135,6 @@ def get_df():
|
|
127 |
df = df.sort_values(by='Overall', ascending=False).reset_index(drop=True)
|
128 |
df['Rank'] = range(1, len(df) + 1)
|
129 |
df = create_hyperlinked_names(df)
|
130 |
-
|
131 |
return df
|
132 |
|
133 |
def refresh_data():
|
|
|
66 |
all_data.append(data)
|
67 |
return all_data
|
68 |
|
69 |
+
def load_scores(raw_scores=None):
|
70 |
+
"""This function loads the raw scores from the user provided scores summary and flattens them into a single dictionary."""
|
71 |
+
all_scores = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
for modality, datasets_list in DATASETS.items(): # Ex.: ('image', {'I-CLS': [...], 'I-QA': [...]})
|
73 |
overall_scores_summary[modality] = (0.0, 0) # Initialize the sum and count for each modality
|
74 |
for sub_task, datasets in datasets_list.items(): # Ex.: ('I-CLS', ['VOC2007', 'N24News', ...])
|
|
|
79 |
metric = SPECIAL_METRICS.get(dataset, 'hit@1')
|
80 |
if isinstance(score, dict):
|
81 |
score = score.get(metric, 0.0)
|
82 |
+
single_dataset_score = {'dataset': dataset, 'score': score}
|
83 |
+
all_scores.update(single_dataset_score)
|
84 |
+
return all_scores
|
85 |
+
|
86 |
+
def calculate_score(raw_scores=None):
|
87 |
+
"""This function calculates the overall average scores for all datasets as well as avg scores for each modality and sub-task based on the raw scores.
|
88 |
+
"""
|
89 |
+
def get_avg(sum_score, leng):
|
90 |
+
avg = sum_score / leng if leng > 0 else 0.0
|
91 |
+
avg = round(avg, 2) # Round to 2 decimal places
|
92 |
+
return avg
|
93 |
+
|
94 |
+
all_scores = load_scores(raw_scores)
|
95 |
+
avg_scores = {}
|
96 |
+
|
97 |
+
# Calculate overall score for all datasets
|
98 |
+
avg_scores['Overall'] = get_avg(sum(
|
99 |
+
all_scores.values()),
|
100 |
+
len(ALL_DATASETS))
|
101 |
+
|
102 |
+
# Calculate scores for each modality
|
103 |
+
for modality in MODALITIES:
|
104 |
+
datasets_for_each_modality = ALL_DATASETS_SPLITS.get(modality, [])
|
105 |
+
avg_scores[f"{modality.capitalize()}-Overall"] = get_avg(
|
106 |
+
sum(all_scores.get(dataset, 0.0) for dataset in datasets_for_each_modality),
|
107 |
+
len(datasets_for_each_modality)
|
108 |
+
)
|
109 |
+
|
110 |
+
# Calculate scores for each sub-task
|
111 |
+
for modality, datasets_list in DATASETS.items():
|
112 |
+
for sub_task, datasets in datasets_list.items():
|
113 |
+
sub_task_score = sum(all_scores.get(dataset, 0.0) for dataset in datasets)
|
114 |
+
avg_scores[sub_task] = get_avg(sub_task_score, len(datasets))
|
115 |
+
|
116 |
+
all_scores.update(avg_scores)
|
117 |
+
return all_scores
|
118 |
|
119 |
def generate_model_row(data):
|
120 |
metadata = data['metadata']
|
|
|
135 |
df = df.sort_values(by='Overall', ascending=False).reset_index(drop=True)
|
136 |
df['Rank'] = range(1, len(df) + 1)
|
137 |
df = create_hyperlinked_names(df)
|
|
|
138 |
return df
|
139 |
|
140 |
def refresh_data():
|