Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
7ded1c5
1
Parent(s):
6e793d7
auth and fetch user details
Browse files- README.md +1 -0
- app.py +107 -75
- src/submission/submit.py +62 -26
README.md
CHANGED
@@ -4,6 +4,7 @@ emoji: 🥇
|
|
4 |
colorFrom: green
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
|
|
7 |
app_file: app.py
|
8 |
pinned: true
|
9 |
license: apache-2.0
|
|
|
4 |
colorFrom: green
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
+
hf_oauth: true
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
license: apache-2.0
|
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
import pandas as pd
|
3 |
from apscheduler.schedulers.background import BackgroundScheduler
|
4 |
from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
|
|
|
5 |
|
6 |
from src.about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, TITLE
|
7 |
from src.datamodel.data import F1Data
|
@@ -11,7 +12,7 @@ from src.display.utils import AutoEvalColumn, ModelType, fields
|
|
11 |
from src.envs import API, CODE_PROBLEMS_REPO, REPO_ID, RESULTS_REPO, SUBMISSIONS_REPO
|
12 |
from src.logger import get_logger
|
13 |
from src.populate import get_leaderboard_df
|
14 |
-
from src.submission.submit import add_new_solutions
|
15 |
from src.validation.validate import MAX_INPUT_LENGTH, MIN_INPUT_LENGTH, is_submission_file_valid, is_valid
|
16 |
|
17 |
logger = get_logger(__name__)
|
@@ -77,6 +78,75 @@ def init_leaderboard(dataframe: pd.DataFrame):
|
|
77 |
)
|
78 |
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
blocks = gr.Blocks(css=custom_css)
|
81 |
with blocks:
|
82 |
gr.Image(
|
@@ -126,81 +196,43 @@ with blocks:
|
|
126 |
with gr.Row():
|
127 |
gr.Markdown("# ✉️✨ Submit your solutions here!", elem_classes="markdown-text")
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
choices=[t.to_str() for t in ModelType],
|
135 |
-
label=AutoEvalColumn.system_type.name,
|
136 |
-
multiselect=False,
|
137 |
-
value=ModelType.LLM.to_str(),
|
138 |
-
interactive=True,
|
139 |
-
)
|
140 |
-
|
141 |
-
submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
|
142 |
-
|
143 |
-
logger.info("Submit button")
|
144 |
-
submit_button = gr.Button("Submit")
|
145 |
-
submission_result = gr.Markdown()
|
146 |
-
|
147 |
-
def add_solution_cbk(
|
148 |
-
system_name: str,
|
149 |
-
org: str,
|
150 |
-
sys_type: str,
|
151 |
-
submission_path: str,
|
152 |
-
):
|
153 |
-
|
154 |
-
try:
|
155 |
-
# Validating the submission file.
|
156 |
-
if len(submission_path) == 0:
|
157 |
-
return styled_error("Please upload JSONL submission file.")
|
158 |
-
|
159 |
-
if not is_submission_file_valid(
|
160 |
-
submission_path,
|
161 |
-
is_warmup_dataset=(SPLIT == "warmup"),
|
162 |
-
):
|
163 |
-
return styled_error("Failed to read JSONL submission file. Please try again later.")
|
164 |
-
|
165 |
-
# Validating all user-supplied arguments.
|
166 |
-
for val, val_name in [
|
167 |
-
(system_name, "System name"),
|
168 |
-
(org, "Organisation name"),
|
169 |
-
(sys_type, "System type"),
|
170 |
-
]:
|
171 |
-
if len(val) == 0:
|
172 |
-
return styled_error(f"Please fill in the '{val_name}' field.")
|
173 |
-
|
174 |
-
if not is_valid(val):
|
175 |
-
return styled_error(
|
176 |
-
f"{val_name} is invalid! Must only contain characters [a-zA-Z0-9], spaces, "
|
177 |
-
+ "or the special characters '-' and '.', and be of length between "
|
178 |
-
+ f"{MIN_INPUT_LENGTH} and {MAX_INPUT_LENGTH}."
|
179 |
-
)
|
180 |
-
except Exception:
|
181 |
-
logger.warning("Failed to process user submission", exc_info=True)
|
182 |
-
return styled_error("An error occurred. Please try again later.") # Intentionally vague.
|
183 |
-
|
184 |
-
return add_new_solutions(
|
185 |
-
lbdb,
|
186 |
-
system_name,
|
187 |
-
org,
|
188 |
-
sys_type,
|
189 |
-
submission_path,
|
190 |
-
is_warmup_dataset=(SPLIT == "warmup"),
|
191 |
-
ensure_all_present=ENSURE_ALL_PRESENT,
|
192 |
-
)
|
193 |
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
|
205 |
with gr.Row():
|
206 |
logger.info("Citation")
|
|
|
2 |
import pandas as pd
|
3 |
from apscheduler.schedulers.background import BackgroundScheduler
|
4 |
from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
|
5 |
+
from huggingface_hub import whoami
|
6 |
|
7 |
from src.about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, TITLE
|
8 |
from src.datamodel.data import F1Data
|
|
|
12 |
from src.envs import API, CODE_PROBLEMS_REPO, REPO_ID, RESULTS_REPO, SUBMISSIONS_REPO
|
13 |
from src.logger import get_logger
|
14 |
from src.populate import get_leaderboard_df
|
15 |
+
from src.submission.submit import add_new_solutions, fetch_sub_claim
|
16 |
from src.validation.validate import MAX_INPUT_LENGTH, MIN_INPUT_LENGTH, is_submission_file_valid, is_valid
|
17 |
|
18 |
logger = get_logger(__name__)
|
|
|
78 |
)
|
79 |
|
80 |
|
81 |
+
def add_solution_cbk(
|
82 |
+
system_name: str,
|
83 |
+
org: str,
|
84 |
+
sys_type: str,
|
85 |
+
submission_path: str,
|
86 |
+
profile: gr.OAuthProfile | None,
|
87 |
+
token: gr.OAuthToken | None,
|
88 |
+
):
|
89 |
+
logger.info("Fetching user details for submission")
|
90 |
+
if profile is None or token is None:
|
91 |
+
return styled_error("Please sign in with Hugging Face before submitting.")
|
92 |
+
|
93 |
+
# Display handle and display name (good for the UI)
|
94 |
+
handle = profile.username
|
95 |
+
logger.info(f"User handle: {handle}")
|
96 |
+
display_name = profile.name or handle
|
97 |
+
logger.info(f"Displaying name: {display_name}")
|
98 |
+
|
99 |
+
# Optional: verify handle/orgs via Hub API
|
100 |
+
info = whoami(token.token) # {'name': 'user', 'orgs': [{'name': 'org1'}, ...], ...}
|
101 |
+
logger.info(f"User info: {info}")
|
102 |
+
|
103 |
+
# Stable account id (for dedup, enforcement, joins)
|
104 |
+
claims = fetch_sub_claim(token) or {}
|
105 |
+
logger.info(f"Claims: {claims}")
|
106 |
+
stable_id = claims.get("sub")
|
107 |
+
logger.info(f"User stable ID: {stable_id}")
|
108 |
+
|
109 |
+
try:
|
110 |
+
# Validating the submission file.
|
111 |
+
if len(submission_path) == 0:
|
112 |
+
return styled_error("Please upload JSONL submission file.")
|
113 |
+
|
114 |
+
if not is_submission_file_valid(
|
115 |
+
submission_path,
|
116 |
+
is_warmup_dataset=(SPLIT == "warmup"),
|
117 |
+
):
|
118 |
+
return styled_error("Failed to read JSONL submission file. Please try again later.")
|
119 |
+
|
120 |
+
# Validating all user-supplied arguments.
|
121 |
+
for val, val_name in [
|
122 |
+
(system_name, "System name"),
|
123 |
+
(org, "Organisation name"),
|
124 |
+
(sys_type, "System type"),
|
125 |
+
]:
|
126 |
+
if len(val) == 0:
|
127 |
+
return styled_error(f"Please fill in the '{val_name}' field.")
|
128 |
+
|
129 |
+
if not is_valid(val):
|
130 |
+
return styled_error(
|
131 |
+
f"{val_name} is invalid! Must only contain characters [a-zA-Z0-9], spaces, "
|
132 |
+
+ "or the special characters '-' and '.', and be of length between "
|
133 |
+
+ f"{MIN_INPUT_LENGTH} and {MAX_INPUT_LENGTH}."
|
134 |
+
)
|
135 |
+
except Exception:
|
136 |
+
logger.warning("Failed to process user submission", exc_info=True)
|
137 |
+
return styled_error("An error occurred. Please try again later.") # Intentionally vague.
|
138 |
+
|
139 |
+
return add_new_solutions(
|
140 |
+
lbdb,
|
141 |
+
system_name,
|
142 |
+
org,
|
143 |
+
sys_type,
|
144 |
+
submission_path,
|
145 |
+
is_warmup_dataset=(SPLIT == "warmup"),
|
146 |
+
ensure_all_present=ENSURE_ALL_PRESENT,
|
147 |
+
)
|
148 |
+
|
149 |
+
|
150 |
blocks = gr.Blocks(css=custom_css)
|
151 |
with blocks:
|
152 |
gr.Image(
|
|
|
196 |
with gr.Row():
|
197 |
gr.Markdown("# ✉️✨ Submit your solutions here!", elem_classes="markdown-text")
|
198 |
|
199 |
+
# Shown when logged OUT
|
200 |
+
login_box = gr.Group(visible=True)
|
201 |
+
with login_box:
|
202 |
+
gr.Markdown("### Submit your results\nPlease sign in to continue.")
|
203 |
+
gr.LoginButton()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
|
205 |
+
# Shown when logged IN
|
206 |
+
submit_panel = gr.Group(visible=False)
|
207 |
+
with submit_panel:
|
208 |
+
with gr.Row():
|
209 |
+
with gr.Column():
|
210 |
+
system_name_textbox = gr.Textbox(label=AutoEvalColumn.system.name)
|
211 |
+
org_textbox = gr.Textbox(label=AutoEvalColumn.organization.name)
|
212 |
+
sys_type_dropdown = gr.Dropdown(
|
213 |
+
choices=[t.to_str() for t in ModelType],
|
214 |
+
label=AutoEvalColumn.system_type.name,
|
215 |
+
multiselect=False,
|
216 |
+
value=ModelType.LLM.to_str(),
|
217 |
+
interactive=True,
|
218 |
+
)
|
219 |
+
|
220 |
+
submission_file = gr.File(label="JSONL solutions file", file_types=[".jsonl"])
|
221 |
+
|
222 |
+
logger.info("Submit button")
|
223 |
+
submit_button = gr.Button("Submit")
|
224 |
+
submission_result = gr.Markdown()
|
225 |
+
|
226 |
+
submit_button.click(
|
227 |
+
add_solution_cbk,
|
228 |
+
[
|
229 |
+
system_name_textbox,
|
230 |
+
org_textbox,
|
231 |
+
sys_type_dropdown,
|
232 |
+
submission_file,
|
233 |
+
],
|
234 |
+
submission_result,
|
235 |
+
)
|
236 |
|
237 |
with gr.Row():
|
238 |
logger.info("Citation")
|
src/submission/submit.py
CHANGED
@@ -1,9 +1,12 @@
|
|
1 |
import time
|
2 |
from datetime import datetime, timezone
|
|
|
|
|
3 |
|
4 |
import pandas as pd
|
5 |
from datasets import Dataset
|
6 |
from pandas.api.types import is_integer_dtype
|
|
|
7 |
|
8 |
from src.datamodel.data import F1Data
|
9 |
from src.display.formatting import styled_error, styled_message
|
@@ -15,31 +18,6 @@ from src.validation.validate import is_submission_file_valid, is_valid
|
|
15 |
logger = get_logger(__name__)
|
16 |
|
17 |
|
18 |
-
def _validate_all_submissions_present(
|
19 |
-
lbdb: F1Data,
|
20 |
-
pd_ds: pd.DataFrame,
|
21 |
-
):
|
22 |
-
logger.info(f"Validating DS size {len(pd_ds)} columns {pd_ds.columns} set {set(pd_ds.columns)}")
|
23 |
-
expected_cols = ["problem_id", "solution"]
|
24 |
-
|
25 |
-
if set(pd_ds.columns) != set(expected_cols):
|
26 |
-
return ValueError(f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}")
|
27 |
-
|
28 |
-
if not is_integer_dtype(pd_ds["problem_id"]):
|
29 |
-
return ValueError("problem_id must be str convertible to int")
|
30 |
-
|
31 |
-
if any(type(v) is not str for v in pd_ds["solution"]):
|
32 |
-
return ValueError("solution must be of type str")
|
33 |
-
|
34 |
-
submitted_ids = set(pd_ds.problem_id.astype(str))
|
35 |
-
if submitted_ids != lbdb.code_problem_ids:
|
36 |
-
missing = lbdb.code_problem_ids - submitted_ids
|
37 |
-
unknown = submitted_ids - lbdb.code_problem_ids
|
38 |
-
raise ValueError(f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown")
|
39 |
-
if len(pd_ds) > len(lbdb.code_problem_ids):
|
40 |
-
return ValueError("Duplicate problem IDs exist in uploaded file")
|
41 |
-
|
42 |
-
|
43 |
def add_new_solutions(
|
44 |
lbdb: F1Data,
|
45 |
system_name: str,
|
@@ -70,7 +48,7 @@ def add_new_solutions(
|
|
70 |
"An error occurred. Please try again later."
|
71 |
) # Use same message as external error. Avoid infoleak.
|
72 |
|
73 |
-
submission_id = f"{
|
74 |
|
75 |
# Seems good, creating the eval.
|
76 |
logger.info(f"Adding new submission: {submission_id}")
|
@@ -99,3 +77,61 @@ def add_new_solutions(
|
|
99 |
"Your request has been submitted to the evaluation queue!\n"
|
100 |
+ "Results may take up to 24 hours to be processed and shown in the leaderboard."
|
101 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import time
|
2 |
from datetime import datetime, timezone
|
3 |
+
import os
|
4 |
+
import requests
|
5 |
|
6 |
import pandas as pd
|
7 |
from datasets import Dataset
|
8 |
from pandas.api.types import is_integer_dtype
|
9 |
+
import gradio as gr
|
10 |
|
11 |
from src.datamodel.data import F1Data
|
12 |
from src.display.formatting import styled_error, styled_message
|
|
|
18 |
logger = get_logger(__name__)
|
19 |
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def add_new_solutions(
|
22 |
lbdb: F1Data,
|
23 |
system_name: str,
|
|
|
48 |
"An error occurred. Please try again later."
|
49 |
) # Use same message as external error. Avoid infoleak.
|
50 |
|
51 |
+
submission_id = f"{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}_{system_name}_{org}_{sys_type}"
|
52 |
|
53 |
# Seems good, creating the eval.
|
54 |
logger.info(f"Adding new submission: {submission_id}")
|
|
|
77 |
"Your request has been submitted to the evaluation queue!\n"
|
78 |
+ "Results may take up to 24 hours to be processed and shown in the leaderboard."
|
79 |
)
|
80 |
+
|
81 |
+
|
82 |
+
def _validate_all_submissions_present(
|
83 |
+
lbdb: F1Data,
|
84 |
+
pd_ds: pd.DataFrame,
|
85 |
+
):
|
86 |
+
logger.info(f"Validating DS size {len(pd_ds)} columns {pd_ds.columns} set {set(pd_ds.columns)}")
|
87 |
+
expected_cols = ["problem_id", "solution"]
|
88 |
+
|
89 |
+
if set(pd_ds.columns) != set(expected_cols):
|
90 |
+
return ValueError(f"Expected attributes: {expected_cols}, Got: {pd_ds.columns.tolist()}")
|
91 |
+
|
92 |
+
if not is_integer_dtype(pd_ds["problem_id"]):
|
93 |
+
return ValueError("problem_id must be str convertible to int")
|
94 |
+
|
95 |
+
if any(type(v) is not str for v in pd_ds["solution"]):
|
96 |
+
return ValueError("solution must be of type str")
|
97 |
+
|
98 |
+
submitted_ids = set(pd_ds.problem_id.astype(str))
|
99 |
+
if submitted_ids != lbdb.code_problem_ids:
|
100 |
+
missing = lbdb.code_problem_ids - submitted_ids
|
101 |
+
unknown = submitted_ids - lbdb.code_problem_ids
|
102 |
+
raise ValueError(f"Mismatched problem IDs: {len(missing)} missing, {len(unknown)} unknown")
|
103 |
+
if len(pd_ds) > len(lbdb.code_problem_ids):
|
104 |
+
return ValueError("Duplicate problem IDs exist in uploaded file")
|
105 |
+
|
106 |
+
|
107 |
+
def fetch_sub_claim(oauth_token: gr.OAuthToken | None) -> dict | None:
|
108 |
+
if oauth_token is None:
|
109 |
+
return None
|
110 |
+
provider = os.getenv("OPENID_PROVIDER_URL")
|
111 |
+
if not provider:
|
112 |
+
return None
|
113 |
+
try:
|
114 |
+
oidc_meta = requests.get(f"{provider}/.well-known/openid-configuration", timeout=5).json()
|
115 |
+
userinfo_ep = oidc_meta["userinfo_endpoint"]
|
116 |
+
claims = requests.get(userinfo_ep, headers={"Authorization": f"Bearer {oauth_token.token}"}, timeout=5).json()
|
117 |
+
# Typical fields: sub (stable id), preferred_username, name, picture
|
118 |
+
return {
|
119 |
+
"sub": claims.get("sub"),
|
120 |
+
"preferred_username": claims.get("preferred_username"),
|
121 |
+
"name": claims.get("name"),
|
122 |
+
}
|
123 |
+
except Exception:
|
124 |
+
return None
|
125 |
+
|
126 |
+
|
127 |
+
# --- on-load gate: show/hide submit panel based on login state ---
|
128 |
+
def gate_submission(profile: gr.OAuthProfile | None):
|
129 |
+
"""
|
130 |
+
Returns:
|
131 |
+
- login_box visibility (True if logged OUT)
|
132 |
+
- submit_panel visibility (True if logged IN)
|
133 |
+
- status line
|
134 |
+
"""
|
135 |
+
if profile is None:
|
136 |
+
return gr.update(visible=True), gr.update(visible=False), "You're not signed in."
|
137 |
+
return gr.update(visible=False), gr.update(visible=True), f"Signed in as @{profile.username}"
|