Spaces:
Runtime error
Runtime error
Commit
·
5c5f47c
1
Parent(s):
b89cf4a
Test commit
Browse files- app.py +19 -10
- src/about.py +5 -4
- src/display/utils.py +1 -1
- src/leaderboard/read_evals.py +19 -21
- src/populate.py +1 -1
- src/submission/check_validity.py +19 -11
- src/submission/submit.py +6 -8
app.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
-
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
3 |
-
import pandas as pd
|
4 |
from apscheduler.schedulers.background import BackgroundScheduler
|
|
|
5 |
from huggingface_hub import snapshot_download
|
6 |
|
7 |
from src.about import (
|
@@ -20,9 +19,9 @@ from src.display.utils import (
|
|
20 |
EVAL_TYPES,
|
21 |
AutoEvalColumn,
|
22 |
ModelType,
|
23 |
-
|
24 |
WeightType,
|
25 |
-
|
26 |
)
|
27 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
28 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
@@ -32,18 +31,29 @@ from src.submission.submit import add_new_eval
|
|
32 |
def restart_space():
|
33 |
API.restart_space(repo_id=REPO_ID)
|
34 |
|
|
|
35 |
### Space initialisation
|
36 |
try:
|
37 |
print(EVAL_REQUESTS_PATH)
|
38 |
snapshot_download(
|
39 |
-
repo_id=QUEUE_REPO,
|
|
|
|
|
|
|
|
|
|
|
40 |
)
|
41 |
except Exception:
|
42 |
restart_space()
|
43 |
try:
|
44 |
print(EVAL_RESULTS_PATH)
|
45 |
snapshot_download(
|
46 |
-
repo_id=RESULTS_REPO,
|
|
|
|
|
|
|
|
|
|
|
47 |
)
|
48 |
except Exception:
|
49 |
restart_space()
|
@@ -57,6 +67,7 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS,
|
|
57 |
pending_eval_queue_df,
|
58 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
59 |
|
|
|
60 |
def init_leaderboard(dataframe):
|
61 |
if dataframe is None or dataframe.empty:
|
62 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
@@ -80,9 +91,7 @@ def init_leaderboard(dataframe):
|
|
80 |
max=150,
|
81 |
label="Select the number of parameters (B)",
|
82 |
),
|
83 |
-
ColumnFilter(
|
84 |
-
AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
|
85 |
-
),
|
86 |
],
|
87 |
bool_checkboxgroup_label="Hide models",
|
88 |
interactive=False,
|
@@ -201,4 +210,4 @@ with demo:
|
|
201 |
scheduler = BackgroundScheduler()
|
202 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
203 |
scheduler.start()
|
204 |
-
demo.queue(default_concurrency_limit=40).launch()
|
|
|
1 |
import gradio as gr
|
|
|
|
|
2 |
from apscheduler.schedulers.background import BackgroundScheduler
|
3 |
+
from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
|
4 |
from huggingface_hub import snapshot_download
|
5 |
|
6 |
from src.about import (
|
|
|
19 |
EVAL_TYPES,
|
20 |
AutoEvalColumn,
|
21 |
ModelType,
|
22 |
+
Precision,
|
23 |
WeightType,
|
24 |
+
fields,
|
25 |
)
|
26 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
27 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
|
|
31 |
def restart_space():
|
32 |
API.restart_space(repo_id=REPO_ID)
|
33 |
|
34 |
+
|
35 |
### Space initialisation
|
36 |
try:
|
37 |
print(EVAL_REQUESTS_PATH)
|
38 |
snapshot_download(
|
39 |
+
repo_id=QUEUE_REPO,
|
40 |
+
local_dir=EVAL_REQUESTS_PATH,
|
41 |
+
repo_type="dataset",
|
42 |
+
tqdm_class=None,
|
43 |
+
etag_timeout=30,
|
44 |
+
token=TOKEN,
|
45 |
)
|
46 |
except Exception:
|
47 |
restart_space()
|
48 |
try:
|
49 |
print(EVAL_RESULTS_PATH)
|
50 |
snapshot_download(
|
51 |
+
repo_id=RESULTS_REPO,
|
52 |
+
local_dir=EVAL_RESULTS_PATH,
|
53 |
+
repo_type="dataset",
|
54 |
+
tqdm_class=None,
|
55 |
+
etag_timeout=30,
|
56 |
+
token=TOKEN,
|
57 |
)
|
58 |
except Exception:
|
59 |
restart_space()
|
|
|
67 |
pending_eval_queue_df,
|
68 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
69 |
|
70 |
+
|
71 |
def init_leaderboard(dataframe):
|
72 |
if dataframe is None or dataframe.empty:
|
73 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
|
|
91 |
max=150,
|
92 |
label="Select the number of parameters (B)",
|
93 |
),
|
94 |
+
ColumnFilter(AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True),
|
|
|
|
|
95 |
],
|
96 |
bool_checkboxgroup_label="Hide models",
|
97 |
interactive=False,
|
|
|
210 |
scheduler = BackgroundScheduler()
|
211 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
212 |
scheduler.start()
|
213 |
+
demo.queue(default_concurrency_limit=40).launch()
|
src/about.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from dataclasses import dataclass
|
2 |
from enum import Enum
|
3 |
|
|
|
4 |
@dataclass
|
5 |
class Task:
|
6 |
benchmark: str
|
@@ -11,7 +12,7 @@ class Task:
|
|
11 |
# Select your tasks here
|
12 |
# ---------------------------------------------------
|
13 |
class Tasks(Enum):
|
14 |
-
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
15 |
task0 = Task("category_mean", "history", "History")
|
16 |
task1 = Task("category_mean", "grammar", "Grammar")
|
17 |
task2 = Task("category_mean", "logic", "Logic")
|
@@ -19,9 +20,9 @@ class Tasks(Enum):
|
|
19 |
task4 = Task("category_mean", "spelling", "Spelling")
|
20 |
task5 = Task("category_mean", "Vocabulary", "Vocabulary")
|
21 |
|
22 |
-
NUM_FEWSHOT = 0 # Change with your few shot
|
23 |
-
# ---------------------------------------------------
|
24 |
|
|
|
|
|
25 |
|
26 |
|
27 |
# Your leaderboard name
|
@@ -33,7 +34,7 @@ This leaderboard evaluates the performance of models on the HunBench benchmark.
|
|
33 |
"""
|
34 |
|
35 |
# Which evaluations are you running? how can people reproduce what you have?
|
36 |
-
LLM_BENCHMARKS_TEXT =
|
37 |
## How it works
|
38 |
TODO
|
39 |
## Reproducibility
|
|
|
1 |
from dataclasses import dataclass
|
2 |
from enum import Enum
|
3 |
|
4 |
+
|
5 |
@dataclass
|
6 |
class Task:
|
7 |
benchmark: str
|
|
|
12 |
# Select your tasks here
|
13 |
# ---------------------------------------------------
|
14 |
class Tasks(Enum):
|
15 |
+
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
16 |
task0 = Task("category_mean", "history", "History")
|
17 |
task1 = Task("category_mean", "grammar", "Grammar")
|
18 |
task2 = Task("category_mean", "logic", "Logic")
|
|
|
20 |
task4 = Task("category_mean", "spelling", "Spelling")
|
21 |
task5 = Task("category_mean", "Vocabulary", "Vocabulary")
|
22 |
|
|
|
|
|
23 |
|
24 |
+
NUM_FEWSHOT = 0 # Change with your few shot
|
25 |
+
# ---------------------------------------------------
|
26 |
|
27 |
|
28 |
# Your leaderboard name
|
|
|
34 |
"""
|
35 |
|
36 |
# Which evaluations are you running? how can people reproduce what you have?
|
37 |
+
LLM_BENCHMARKS_TEXT = """
|
38 |
## How it works
|
39 |
TODO
|
40 |
## Reproducibility
|
src/display/utils.py
CHANGED
@@ -27,7 +27,7 @@ auto_eval_column_dict = []
|
|
27 |
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
28 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
29 |
# Scores
|
30 |
-
auto_eval_column_dict.append(["
|
31 |
for task in Tasks:
|
32 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
33 |
# Model information
|
|
|
27 |
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
28 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
29 |
# Scores
|
30 |
+
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
31 |
for task in Tasks:
|
32 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
33 |
# Model information
|
src/leaderboard/read_evals.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import glob
|
2 |
import json
|
3 |
-
import math
|
4 |
import os
|
5 |
from dataclasses import dataclass
|
6 |
|
@@ -8,28 +7,28 @@ import dateutil
|
|
8 |
import numpy as np
|
9 |
|
10 |
from src.display.formatting import make_clickable_model
|
11 |
-
from src.display.utils import AutoEvalColumn, ModelType,
|
12 |
from src.submission.check_validity import is_model_on_hub
|
13 |
|
14 |
|
15 |
@dataclass
|
16 |
class EvalResult:
|
17 |
-
"""Represents one full evaluation. Built from a combination of the result and request file for a given run.
|
18 |
-
|
19 |
-
eval_name: str
|
20 |
-
full_model: str
|
21 |
-
org: str
|
22 |
model: str
|
23 |
-
revision: str
|
24 |
results: dict
|
25 |
precision: Precision = Precision.Unknown
|
26 |
-
model_type: ModelType = ModelType.Unknown
|
27 |
-
weight_type: WeightType = WeightType.Original
|
28 |
-
architecture: str = "Unknown"
|
29 |
license: str = "?"
|
30 |
likes: int = 0
|
31 |
num_params: int = 0
|
32 |
-
date: str = ""
|
33 |
still_on_hub: bool = False
|
34 |
|
35 |
@classmethod
|
@@ -85,10 +84,10 @@ class EvalResult:
|
|
85 |
org=org,
|
86 |
model=model,
|
87 |
results=results,
|
88 |
-
precision=precision,
|
89 |
-
revision=
|
90 |
still_on_hub=still_on_hub,
|
91 |
-
architecture=architecture
|
92 |
)
|
93 |
|
94 |
def update_with_request_file(self, requests_path):
|
@@ -105,7 +104,9 @@ class EvalResult:
|
|
105 |
self.num_params = request.get("params", 0)
|
106 |
self.date = request.get("submitted_time", "")
|
107 |
except Exception:
|
108 |
-
print(
|
|
|
|
|
109 |
|
110 |
def to_dict(self):
|
111 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
@@ -146,10 +147,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
146 |
for tmp_request_file in request_files:
|
147 |
with open(tmp_request_file, "r") as f:
|
148 |
req_content = json.load(f)
|
149 |
-
if (
|
150 |
-
req_content["status"] in ["FINISHED"]
|
151 |
-
and req_content["precision"] == precision.split(".")[-1]
|
152 |
-
):
|
153 |
request_file = tmp_request_file
|
154 |
return request_file
|
155 |
|
@@ -188,7 +186,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
188 |
results = []
|
189 |
for v in eval_results.values():
|
190 |
try:
|
191 |
-
v.to_dict()
|
192 |
results.append(v)
|
193 |
except KeyError: # not all eval values present
|
194 |
continue
|
|
|
1 |
import glob
|
2 |
import json
|
|
|
3 |
import os
|
4 |
from dataclasses import dataclass
|
5 |
|
|
|
7 |
import numpy as np
|
8 |
|
9 |
from src.display.formatting import make_clickable_model
|
10 |
+
from src.display.utils import AutoEvalColumn, ModelType, Precision, Tasks, WeightType
|
11 |
from src.submission.check_validity import is_model_on_hub
|
12 |
|
13 |
|
14 |
@dataclass
|
15 |
class EvalResult:
|
16 |
+
"""Represents one full evaluation. Built from a combination of the result and request file for a given run."""
|
17 |
+
|
18 |
+
eval_name: str # org_model_precision (uid)
|
19 |
+
full_model: str # org/model (path on hub)
|
20 |
+
org: str
|
21 |
model: str
|
22 |
+
revision: str # commit hash, "" if main
|
23 |
results: dict
|
24 |
precision: Precision = Precision.Unknown
|
25 |
+
model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
|
26 |
+
weight_type: WeightType = WeightType.Original # Original or Adapter
|
27 |
+
architecture: str = "Unknown"
|
28 |
license: str = "?"
|
29 |
likes: int = 0
|
30 |
num_params: int = 0
|
31 |
+
date: str = "" # submission date of request file
|
32 |
still_on_hub: bool = False
|
33 |
|
34 |
@classmethod
|
|
|
84 |
org=org,
|
85 |
model=model,
|
86 |
results=results,
|
87 |
+
precision=precision,
|
88 |
+
revision=config.get("model_sha", ""),
|
89 |
still_on_hub=still_on_hub,
|
90 |
+
architecture=architecture,
|
91 |
)
|
92 |
|
93 |
def update_with_request_file(self, requests_path):
|
|
|
104 |
self.num_params = request.get("params", 0)
|
105 |
self.date = request.get("submitted_time", "")
|
106 |
except Exception:
|
107 |
+
print(
|
108 |
+
f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}"
|
109 |
+
)
|
110 |
|
111 |
def to_dict(self):
|
112 |
"""Converts the Eval Result to a dict compatible with our dataframe display"""
|
|
|
147 |
for tmp_request_file in request_files:
|
148 |
with open(tmp_request_file, "r") as f:
|
149 |
req_content = json.load(f)
|
150 |
+
if req_content["status"] in ["FINISHED"] and req_content["precision"] == precision.split(".")[-1]:
|
|
|
|
|
|
|
151 |
request_file = tmp_request_file
|
152 |
return request_file
|
153 |
|
|
|
186 |
results = []
|
187 |
for v in eval_results.values():
|
188 |
try:
|
189 |
+
v.to_dict() # we test if the dict version is complete
|
190 |
results.append(v)
|
191 |
except KeyError: # not all eval values present
|
192 |
continue
|
src/populate.py
CHANGED
@@ -14,7 +14,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
17 |
-
df = df.sort_values(by=[AutoEvalColumn.
|
18 |
df = df[cols].round(decimals=2)
|
19 |
|
20 |
# filter out if any of the benchmarks have not been produced
|
|
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
|
16 |
df = pd.DataFrame.from_records(all_data_json)
|
17 |
+
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
18 |
df = df[cols].round(decimals=2)
|
19 |
|
20 |
# filter out if any of the benchmarks have not been produced
|
src/submission/check_validity.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1 |
import json
|
2 |
import os
|
3 |
-
import re
|
4 |
from collections import defaultdict
|
5 |
-
from datetime import datetime, timedelta, timezone
|
6 |
|
7 |
import huggingface_hub
|
8 |
from huggingface_hub import ModelCard
|
@@ -10,6 +8,7 @@ from huggingface_hub.hf_api import ModelInfo
|
|
10 |
from transformers import AutoConfig
|
11 |
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
12 |
|
|
|
13 |
def check_model_card(repo_id: str) -> tuple[bool, str]:
|
14 |
"""Checks if the model card and license exist and have been filled"""
|
15 |
try:
|
@@ -31,31 +30,38 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
|
|
31 |
|
32 |
return True, ""
|
33 |
|
34 |
-
|
|
|
|
|
|
|
35 |
"""Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
|
36 |
try:
|
37 |
-
config = AutoConfig.from_pretrained(
|
|
|
|
|
38 |
if test_tokenizer:
|
39 |
try:
|
40 |
-
tk = AutoTokenizer.from_pretrained(
|
|
|
|
|
41 |
except ValueError as e:
|
|
|
|
|
42 |
return (
|
43 |
False,
|
44 |
-
|
45 |
-
None
|
46 |
)
|
47 |
-
except Exception as e:
|
48 |
-
return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None)
|
49 |
return True, None, config
|
50 |
|
51 |
except ValueError:
|
52 |
return (
|
53 |
False,
|
54 |
"needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
|
55 |
-
None
|
56 |
)
|
57 |
|
58 |
-
except Exception
|
59 |
return False, "was not found on hub!", None
|
60 |
|
61 |
|
@@ -70,10 +76,12 @@ def get_model_size(model_info: ModelInfo, precision: str):
|
|
70 |
model_size = size_factor * model_size
|
71 |
return model_size
|
72 |
|
|
|
73 |
def get_model_arch(model_info: ModelInfo):
|
74 |
"""Gets the model architecture from the configuration"""
|
75 |
return model_info.config.get("architectures", "Unknown")
|
76 |
|
|
|
77 |
def already_submitted_models(requested_models_dir: str) -> set[str]:
|
78 |
"""Gather a list of already submitted models to avoid duplicates"""
|
79 |
depth = 1
|
|
|
1 |
import json
|
2 |
import os
|
|
|
3 |
from collections import defaultdict
|
|
|
4 |
|
5 |
import huggingface_hub
|
6 |
from huggingface_hub import ModelCard
|
|
|
8 |
from transformers import AutoConfig
|
9 |
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
10 |
|
11 |
+
|
12 |
def check_model_card(repo_id: str) -> tuple[bool, str]:
|
13 |
"""Checks if the model card and license exist and have been filled"""
|
14 |
try:
|
|
|
30 |
|
31 |
return True, ""
|
32 |
|
33 |
+
|
34 |
+
def is_model_on_hub(
|
35 |
+
model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False
|
36 |
+
) -> tuple[bool, str]:
|
37 |
"""Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
|
38 |
try:
|
39 |
+
config = AutoConfig.from_pretrained(
|
40 |
+
model_name, revision=revision, trust_remote_code=trust_remote_code, token=token
|
41 |
+
)
|
42 |
if test_tokenizer:
|
43 |
try:
|
44 |
+
tk = AutoTokenizer.from_pretrained(
|
45 |
+
model_name, revision=revision, trust_remote_code=trust_remote_code, token=token
|
46 |
+
)
|
47 |
except ValueError as e:
|
48 |
+
return (False, f"uses a tokenizer which is not in a transformers release: {e}", None)
|
49 |
+
except Exception:
|
50 |
return (
|
51 |
False,
|
52 |
+
"'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?",
|
53 |
+
None,
|
54 |
)
|
|
|
|
|
55 |
return True, None, config
|
56 |
|
57 |
except ValueError:
|
58 |
return (
|
59 |
False,
|
60 |
"needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
|
61 |
+
None,
|
62 |
)
|
63 |
|
64 |
+
except Exception:
|
65 |
return False, "was not found on hub!", None
|
66 |
|
67 |
|
|
|
76 |
model_size = size_factor * model_size
|
77 |
return model_size
|
78 |
|
79 |
+
|
80 |
def get_model_arch(model_info: ModelInfo):
|
81 |
"""Gets the model architecture from the configuration"""
|
82 |
return model_info.config.get("architectures", "Unknown")
|
83 |
|
84 |
+
|
85 |
def already_submitted_models(requested_models_dir: str) -> set[str]:
|
86 |
"""Gather a list of already submitted models to avoid duplicates"""
|
87 |
depth = 1
|
src/submission/submit.py
CHANGED
@@ -3,17 +3,13 @@ import os
|
|
3 |
from datetime import datetime, timezone
|
4 |
|
5 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
6 |
-
from src.envs import API, EVAL_REQUESTS_PATH,
|
7 |
-
from src.submission.check_validity import
|
8 |
-
already_submitted_models,
|
9 |
-
check_model_card,
|
10 |
-
get_model_size,
|
11 |
-
is_model_on_hub,
|
12 |
-
)
|
13 |
|
14 |
REQUESTED_MODELS = None
|
15 |
USERS_TO_SUBMISSION_DATES = None
|
16 |
|
|
|
17 |
def add_new_eval(
|
18 |
model: str,
|
19 |
base_model: str,
|
@@ -45,7 +41,9 @@ def add_new_eval(
|
|
45 |
|
46 |
# Is the model on the hub?
|
47 |
if weight_type in ["Delta", "Adapter"]:
|
48 |
-
base_model_on_hub, error, _ = is_model_on_hub(
|
|
|
|
|
49 |
if not base_model_on_hub:
|
50 |
return styled_error(f'Base model "{base_model}" {error}')
|
51 |
|
|
|
3 |
from datetime import datetime, timezone
|
4 |
|
5 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
6 |
+
from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
|
7 |
+
from src.submission.check_validity import already_submitted_models, check_model_card, get_model_size, is_model_on_hub
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
REQUESTED_MODELS = None
|
10 |
USERS_TO_SUBMISSION_DATES = None
|
11 |
|
12 |
+
|
13 |
def add_new_eval(
|
14 |
model: str,
|
15 |
base_model: str,
|
|
|
41 |
|
42 |
# Is the model on the hub?
|
43 |
if weight_type in ["Delta", "Adapter"]:
|
44 |
+
base_model_on_hub, error, _ = is_model_on_hub(
|
45 |
+
model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True
|
46 |
+
)
|
47 |
if not base_model_on_hub:
|
48 |
return styled_error(f'Base model "{base_model}" {error}')
|
49 |
|