Clémentine commited on
Commit
51195ac
·
1 Parent(s): 2ff76cf

reordered file saving step + added new catch

Browse files
Files changed (1) hide show
  1. app.py +25 -19
app.py CHANGED
@@ -34,7 +34,8 @@ ref_level_len = {"validation": {1: 53, 2: 86, 3: 26}, "test": {1: 93, 2: 159, 3:
34
 
35
  os.makedirs("scored", exist_ok=True)
36
 
37
- LOCAL_DEBUG = False
 
38
 
39
  # Display the results
40
  eval_results = load_dataset(RESULTS_DATASET, YEAR_VERSION, token=TOKEN, download_mode="force_redownload", verification_mode=VerificationMode.NO_CHECKS, trust_remote_code=True)
@@ -111,7 +112,7 @@ def add_new_eval(
111
  if path_to_file is None:
112
  return format_warning("Please attach a file.")
113
 
114
- # Save submitted file
115
  if LOCAL_DEBUG:
116
  print("mock uploaded submission")
117
  else:
@@ -123,7 +124,23 @@ def add_new_eval(
123
  token=TOKEN
124
  )
125
 
126
- # Compute score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  file_path = path_to_file.name
128
  scores = {"all": 0, 1: 0, 2: 0, 3: 0}
129
  num_questions = {"all": 0, 1: 0, 2: 0, 3: 0}
@@ -169,7 +186,7 @@ def add_new_eval(
169
  if any([num_questions[level] != ref_level_len[val_or_test][level] for level in [1, 2, 3]]):
170
  return format_error(f"Your submission has {num_questions[1]} questions for level 1, {num_questions[2]} for level 2, and {num_questions[3]} for level 3, but it should have {ref_level_len[val_or_test][1]}, {ref_level_len[val_or_test][2]}, and {ref_level_len[val_or_test][3]} respectively. Please check your submission.")
171
 
172
- # Save scored file
173
  if LOCAL_DEBUG:
174
  print("mock uploaded scored submission")
175
  else:
@@ -191,7 +208,7 @@ def add_new_eval(
191
  token=TOKEN
192
  )
193
 
194
- # Actual submission
195
  eval_entry = {
196
  "model": model,
197
  "model_family": model_family,
@@ -206,6 +223,9 @@ def add_new_eval(
206
  }
207
  if num_questions[1] + num_questions[2] + num_questions[3] != ref_scores_len[val_or_test]:
208
  return format_error(f"Your submission has {len(scores['all'])} questions for the {val_or_test} set, but it should have {ref_scores_len[val_or_test]}. Please check your submission.")
 
 
 
209
 
210
  # Testing for duplicates - to see if we want to add something like it as it would allow people to try to see the content of other submissions
211
  #eval_entry_no_date = {k: v for k, v in eval_entry if k != "date"}
@@ -220,20 +240,6 @@ def add_new_eval(
220
  else:
221
  eval_results.push_to_hub(RESULTS_DATASET, config_name = YEAR_VERSION, token=TOKEN)
222
 
223
- contact_info = {
224
- "model": model,
225
- "model_family": model_family,
226
- "url": url,
227
- "organisation": organisation,
228
- "username": profile.username,
229
- "mail": mail,
230
- "date": datetime.datetime.today().strftime('%Y-%m-%d')
231
- }
232
- contact_infos[val_or_test]= contact_infos[val_or_test].add_item(contact_info)
233
- if LOCAL_DEBUG:
234
- print("mock uploaded contact info")
235
- else:
236
- contact_infos.push_to_hub(CONTACT_DATASET, config_name = YEAR_VERSION, token=TOKEN)
237
 
238
  return format_log(f"Model {model} submitted by {organisation} successfully.\nPlease wait a few hours and refresh the leaderboard to see your score displayed.")
239
 
 
34
 
35
  os.makedirs("scored", exist_ok=True)
36
 
37
+ # Should be False on spaces and True outside
38
+ LOCAL_DEBUG = not (os.environ.get("system") == "spaces")
39
 
40
  # Display the results
41
  eval_results = load_dataset(RESULTS_DATASET, YEAR_VERSION, token=TOKEN, download_mode="force_redownload", verification_mode=VerificationMode.NO_CHECKS, trust_remote_code=True)
 
112
  if path_to_file is None:
113
  return format_warning("Please attach a file.")
114
 
115
+ # SAVE UNSCORED SUBMISSION
116
  if LOCAL_DEBUG:
117
  print("mock uploaded submission")
118
  else:
 
124
  token=TOKEN
125
  )
126
 
127
+ # SAVE CONTACT
128
+ contact_info = {
129
+ "model": model,
130
+ "model_family": model_family,
131
+ "url": url,
132
+ "organisation": organisation,
133
+ "username": profile.username,
134
+ "mail": mail,
135
+ "date": datetime.datetime.today().strftime('%Y-%m-%d')
136
+ }
137
+ contact_infos[val_or_test]= contact_infos[val_or_test].add_item(contact_info)
138
+ if LOCAL_DEBUG:
139
+ print("mock uploaded contact info")
140
+ else:
141
+ contact_infos.push_to_hub(CONTACT_DATASET, config_name = YEAR_VERSION, token=TOKEN)
142
+
143
+ # SCORE SUBMISSION
144
  file_path = path_to_file.name
145
  scores = {"all": 0, 1: 0, 2: 0, 3: 0}
146
  num_questions = {"all": 0, 1: 0, 2: 0, 3: 0}
 
186
  if any([num_questions[level] != ref_level_len[val_or_test][level] for level in [1, 2, 3]]):
187
  return format_error(f"Your submission has {num_questions[1]} questions for level 1, {num_questions[2]} for level 2, and {num_questions[3]} for level 3, but it should have {ref_level_len[val_or_test][1]}, {ref_level_len[val_or_test][2]}, and {ref_level_len[val_or_test][3]} respectively. Please check your submission.")
188
 
189
+ # SAVE SCORED SUBMISSION
190
  if LOCAL_DEBUG:
191
  print("mock uploaded scored submission")
192
  else:
 
208
  token=TOKEN
209
  )
210
 
211
+ # SAVE TO LEADERBOARD DATA
212
  eval_entry = {
213
  "model": model,
214
  "model_family": model_family,
 
223
  }
224
  if num_questions[1] + num_questions[2] + num_questions[3] != ref_scores_len[val_or_test]:
225
  return format_error(f"Your submission has {len(scores['all'])} questions for the {val_or_test} set, but it should have {ref_scores_len[val_or_test]}. Please check your submission.")
226
+ # Catching spam submissions of 100%
227
+ if all((eval_entry[k] == 1 for k in ["score_level1", "score_level2", "score_level3"])):
228
+ return format_error(f"There was a problem with your submission. Please open a discussion.")
229
 
230
  # Testing for duplicates - to see if we want to add something like it as it would allow people to try to see the content of other submissions
231
  #eval_entry_no_date = {k: v for k, v in eval_entry if k != "date"}
 
240
  else:
241
  eval_results.push_to_hub(RESULTS_DATASET, config_name = YEAR_VERSION, token=TOKEN)
242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  return format_log(f"Model {model} submitted by {organisation} successfully.\nPlease wait a few hours and refresh the leaderboard to see your score displayed.")
245