Spaces:
Runtime error
Runtime error
File size: 4,474 Bytes
9b2e755 0c7ef71 28ee758 0c7ef71 05bda40 f04f90e 28ee758 0c7ef71 28ee758 0c7ef71 28ee758 0c7ef71 28ee758 0c7ef71 9b2e755 8d502c8 9b2e755 0c7ef71 9b2e755 0c7ef71 8d502c8 0c7ef71 9b2e755 0c7ef71 9b2e755 05bda40 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from huggingface_hub import ModelFilter, snapshot_download
from huggingface_hub import ModelCard
import json
import os
import time
from src.submission.check_validity import is_model_on_hub, check_model_card, get_model_tags
from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, API, H4_TOKEN
def update_one_model(model_id, data, models_on_the_hub):
# Model no longer on the hub at all
if model_id not in models_on_the_hub:
data['still_on_hub'] = False
data['likes'] = 0
data['downloads'] = 0
data['created_at'] = ""
return data
# Grabbing model parameters
model_cfg = models_on_the_hub[model_id]
data['likes'] = model_cfg.likes
data['downloads'] = model_cfg.downloads
data['created_at'] = str(model_cfg.created_at)
data['license'] = model_cfg.card_data.license if model_cfg.card_data is not None else ""
# Grabbing model details
model_name = model_id
if model_cfg.card_data is not None and model_cfg.card_data.base_model is not None:
if isinstance(model_cfg.card_data.base_model, str):
model_name = model_cfg.card_data.base_model # for adapters, we look at the parent model
still_on_hub, _, _ = is_model_on_hub(
model_name=model_name, revision=data.get("revision"), trust_remote_code=True, test_tokenizer=False, token=H4_TOKEN
)
# If the model doesn't have a model card or a license, we consider it's deleted
if still_on_hub:
try:
status, _, model_card = check_model_card(model_id)
if status is False:
still_on_hub = False
except Exception:
model_card = None
still_on_hub = False
data['still_on_hub'] = still_on_hub
tags = get_model_tags(model_card, model_id) if still_on_hub else []
data["tags"] = tags
return data
def update_models(file_path, models_on_the_hub):
"""
Search through all JSON files in the specified root folder and its subfolders,
and update the likes key in JSON dict from value of input dict
"""
seen_models = []
with open(file_path, "r") as f:
model_infos = json.load(f)
for model_id in model_infos.keys():
seen_models.append(model_id)
model_infos[model_id] = update_one_model(
model_id = model_id,
data=model_infos[model_id],
models_on_the_hub=models_on_the_hub
)
# If new requests files have been created since we started all this
# we grab them
all_models = []
try:
for ix, (root, _, files) in enumerate(os.walk(EVAL_REQUESTS_PATH)):
if ix == 0: continue
for file in files:
if "eval_request" in file:
path = root.split("/")[-1] + "/" + file.split("_eval_request")[0]
all_models.append(path)
except Exception as e:
print(e)
pass
for model_id in all_models:
if model_id not in seen_models:
model_infos[model_id] = update_one_model(
model_id = model_id,
data={},
models_on_the_hub=models_on_the_hub
)
with open(file_path, 'w') as f:
json.dump(model_infos, f, indent=2)
def update_dynamic_files():
""" This will only update metadata for models already linked in the repo, not add missing ones.
"""
snapshot_download(
repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
)
print("UPDATE_DYNAMIC: Loaded snapshot")
# Get models
start = time.time()
models = list(API.list_models(
filter=ModelFilter(task="text-generation"),
full=False,
cardData=True,
fetch_config=True,
))
id_to_model = {model.id : model for model in models}
print(f"UPDATE_DYNAMIC: Downloaded list of models in {time.time() - start:.2f} seconds")
start = time.time()
update_models(DYNAMIC_INFO_FILE_PATH, id_to_model)
print(f"UPDATE_DYNAMIC: updated in {time.time() - start:.2f} seconds")
API.upload_file(
path_or_fileobj=DYNAMIC_INFO_FILE_PATH,
path_in_repo=DYNAMIC_INFO_FILE_PATH.split("/")[-1],
repo_id=DYNAMIC_INFO_REPO,
repo_type="dataset",
commit_message=f"Daily request file update.",
)
print(f"UPDATE_DYNAMIC: pushed to hub")
|