Nathan Habib commited on
Commit
0799cf8
·
1 Parent(s): eed1ccd

use cache for model metadata

Browse files
src/display_models/get_model_metadata.py CHANGED
@@ -2,6 +2,7 @@ import glob
2
  import json
3
  import os
4
  import re
 
5
  from typing import List
6
 
7
  import huggingface_hub
@@ -16,20 +17,36 @@ api = HfApi(token=os.environ.get("H4_TOKEN", None))
16
 
17
 
18
  def get_model_infos_from_hub(leaderboard_data: List[dict]):
 
 
 
 
 
 
 
19
  for model_data in tqdm(leaderboard_data):
20
  model_name = model_data["model_name_for_query"]
21
- try:
22
- model_info = api.model_info(model_name)
23
- except huggingface_hub.utils._errors.RepositoryNotFoundError:
24
- print("Repo not found!", model_name)
25
- model_data[AutoEvalColumn.license.name] = None
26
- model_data[AutoEvalColumn.likes.name] = None
27
- model_data[AutoEvalColumn.params.name] = get_model_size(model_name, None)
28
- continue
 
 
 
 
 
29
 
30
  model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
31
  model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
32
  model_data[AutoEvalColumn.params.name] = get_model_size(model_name, model_info)
 
 
 
 
33
 
34
 
35
  def get_model_license(model_info):
 
2
  import json
3
  import os
4
  import re
5
+ import pickle
6
  from typing import List
7
 
8
  import huggingface_hub
 
17
 
18
 
19
  def get_model_infos_from_hub(leaderboard_data: List[dict]):
20
+ # load cache from disk
21
+ try:
22
+ with open("model_info_cache.pkl", "rb") as f:
23
+ model_info_cache = pickle.load(f)
24
+ except EOFError:
25
+ model_info_cache = {}
26
+
27
  for model_data in tqdm(leaderboard_data):
28
  model_name = model_data["model_name_for_query"]
29
+
30
+ if model_name in model_info_cache:
31
+ model_info = model_info_cache[model_name]
32
+ else:
33
+ try:
34
+ model_info = api.model_info(model_name)
35
+ model_info_cache[model_name] = model_info
36
+ except huggingface_hub.utils._errors.RepositoryNotFoundError:
37
+ print("Repo not found!", model_name)
38
+ model_data[AutoEvalColumn.license.name] = None
39
+ model_data[AutoEvalColumn.likes.name] = None
40
+ model_data[AutoEvalColumn.params.name] = get_model_size(model_name, None)
41
+ continue
42
 
43
  model_data[AutoEvalColumn.license.name] = get_model_license(model_info)
44
  model_data[AutoEvalColumn.likes.name] = get_model_likes(model_info)
45
  model_data[AutoEvalColumn.params.name] = get_model_size(model_name, model_info)
46
+
47
+ # save cache to disk in pickle format
48
+ with open("model_info_cache.pkl", "wb") as f:
49
+ pickle.dump(model_info_cache, f)
50
 
51
 
52
  def get_model_license(model_info):