davanstrien HF Staff commited on
Commit
04c52df
Β·
1 Parent(s): 94aadae

add datasets and models to ranking

Browse files
Files changed (1) hide show
  1. app.py +332 -76
app.py CHANGED
@@ -1,44 +1,118 @@
1
  import gradio as gr
2
- from huggingface_hub import list_spaces
3
  from cachetools import TTLCache, cached
4
  from toolz import groupby, valmap
5
 
6
- # from diskcache import Cache
7
- # import platform
8
 
9
- # is_macos = platform.system() == "Darwin"
 
 
10
 
11
- # if is_macos:
12
- # cache = Cache("cache")
13
 
14
- # def cached_decorator(func):
15
- # return cache.memoize(typed=True, expire=1)(func)
 
 
16
 
17
- # else:
18
- # ttl_cache = TTLCache(maxsize=100, ttl=60 * 10)
19
- # cached_decorator = cached(cache=ttl_cache)
 
 
 
 
 
 
20
 
21
 
22
- # @cached_decorator
23
  @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
24
- def get_spaces():
25
- return list(list_spaces(full=True))
26
 
27
 
28
  get_spaces() # to warm up the cache
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
 
 
 
 
 
 
 
 
 
 
 
 
31
  def create_space_to_like_dict():
32
  spaces = get_spaces()
33
  return {space.id: space.likes for space in spaces}
34
 
35
 
36
- def create_org_to_like_dict():
 
37
  spaces = get_spaces()
38
  grouped = groupby(lambda x: x.author, spaces)
39
  return valmap(lambda x: sum(s.likes for s in x), grouped)
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def relative_rank(my_dict, target_key, filter_zero=False):
43
  if filter_zero:
44
  my_dict = {k: v for k, v in my_dict.items() if v != 0}
@@ -50,14 +124,13 @@ def relative_rank(my_dict, target_key, filter_zero=False):
50
 
51
  position = [key for key, _ in sorted_items].index(target_key)
52
  num_lower = len(sorted_items) - position - 1
53
-
54
  num_higher = position
55
  return {
56
  "rank": (num_higher + 1) / len(my_dict) * 100,
57
  "num_higher": num_higher,
58
  "num_lower": num_lower,
59
  "value": my_dict[target_key],
60
- "position": position + 1,
61
  }
62
 
63
 
@@ -68,110 +141,293 @@ def relative_rank_for_space(space_id, filter_zero=False):
68
 
69
 
70
  @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
71
- def relative_rank_for_org(org_id, filter_zero=False):
72
- org_to_like_dict = create_org_to_like_dict()
 
 
 
 
 
 
 
 
 
 
 
 
73
  return relative_rank(org_to_like_dict, org_id, filter_zero=filter_zero)
74
 
75
 
76
  @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
77
- def rank_space(space_id):
78
- return relative_rank_for_space(space_id)
 
79
 
80
 
81
- def rank_space_and_org(space_or_org_id, filter_zero):
 
 
 
 
 
 
 
 
 
 
 
82
  filter_zero = filter_zero == "yes"
83
- split = space_or_org_id.split("/")
84
- if len(split) == 2:
85
- space_rank = relative_rank_for_space(space_or_org_id, filter_zero=filter_zero)
86
- result = "## ⭐️ Space Likes Rankings ⭐️\n"
87
- result += f"""Here are the rankings by likes for [`{space_or_org_id}`](https://huggingface.co/spaces/{space_or_org_id}) across all Spaces \n"""
88
- result += f"""- You have {space_rank['value']:,} likes for this Space.\n"""
89
- result += f"""- Your Space is ranked {space_rank['position']:,}.\n"""
90
- result += f"""- Space [{space_or_org_id}](https://huggingface.co/spaces/{space_or_org_id}) is ranked {space_rank['rank']:.2f}%\n"""
91
- result += f"""- You have {space_rank['num_higher']:,} Spaces above and {space_rank['num_lower']:,} Spaces below in the ranking of Space likes\n\n"""
92
- result += """### ✨ Remember likes aren't everything!✨\n"""
93
- result += """Some Spaces go very viral whilst other Spaces may be very useful for a smaller audience. If you think your Space is useful, please add it to this [thread](https://huggingface.co/spaces/librarian-bots/ranker/discussions/3) of awesome Spaces.
94
- We'll look out for awesome Spaces added to this thread to promote more widely!"""
95
- return result
96
-
97
- if len(split) == 1:
98
- org_rank = relative_rank_for_org(space_or_org_id, filter_zero=filter_zero)
99
- result = "## ⭐️ Org/User Space Likes Rankings ⭐️\n"
100
- result += "Here are the rankings for the org/user across all of their spaces \n"
101
- result += f"""- You have {org_rank['value']:,} likes for this org/user.\n"""
102
- result += f"""- Your org/user is ranked {org_rank['position']:,}\n"""
103
- result += f"""- You have {org_rank['num_higher']:,} orgs/users above and {org_rank['num_lower']:,} orgs/users below in the ranking of Space likes \n\n"""
104
- result += f"""- Organization or user [{space_or_org_id}](https://huggingface.co/{space_or_org_id}) is ranked in the top {org_rank['rank']:.2f}% \n\n"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  result += f"""You can find all your Spaces sorted by likes [here](https://huggingface.co/{space_or_org_id}?sort_spaces=likes#spaces)\n"""
106
- result += """### ✨ Remember likes aren't everything!✨\n"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  result += """Some Spaces go very viral whilst other Spaces may be very useful for a smaller audience. If you think your Space is useful, please add it to this [thread](https://huggingface.co/spaces/librarian-bots/ranker/discussions/3) of awesome Spaces.
108
- We'll look out for awesome Spaces added to this thread to promote more widely!"""
109
- return result
110
 
111
 
112
- def get_top_n_orgs_and_users(top_n=100):
113
- orgs_to_likes = create_org_to_like_dict()
 
114
  sorted_items = sorted(orgs_to_likes.items(), key=lambda item: item[1], reverse=True)
115
  sorted_items = sorted_items[:top_n]
116
  return sorted_items
117
 
118
 
119
- def plot_top_n_orgs_and_users(top_n=100):
120
- top_n = get_top_n_orgs_and_users(top_n)
121
- return "".join(
122
- f"\n{i+1}. [{org}](https://huggingface.co/{org}) with {likes:,} likes"
123
- for i, (org, likes) in enumerate(top_n)
124
- )
125
 
126
 
127
- def get_top_n_spaces(top_n=100):
128
- orgs_to_likes = create_space_to_like_dict()
 
129
  sorted_items = sorted(orgs_to_likes.items(), key=lambda item: item[1], reverse=True)
130
  sorted_items = sorted_items[:top_n]
131
  return sorted_items
132
 
133
 
134
- def plot_top_n_spaces(top_n=100):
135
- top_n = get_top_n_spaces(top_n)
136
- return "".join(
137
- f"\n{i+1}. [{space}](https://huggingface.co/spaces/{space}) with"
138
- f" {likes:,} likes"
139
- for i, (space, likes) in enumerate(top_n)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
 
143
  with gr.Blocks() as demo:
144
  gr.HTML("<h1 style='text-align: center;'> &#127942; HuggyRanker &#127942; </h1>")
145
  gr.HTML(
146
- """<p style='text-align: center;'>Rank a single Space or all of the Spaces created by an organization or user by likes</p>"""
147
  )
148
  gr.HTML(
149
  """<p style="text-align: center;"><i>Remember likes aren't everything!</i></p>"""
150
  )
151
  gr.Markdown(
152
- """## Rank Specific Spaces or Orgs
153
- Provide this app with a Space ID or a Username/Organization name to rank by likes."""
154
  )
155
  with gr.Row():
156
  space_id = gr.Textbox(
157
  "librarian-bots", max_lines=1, label="Space or user/organization ID"
158
  )
159
- filter_zero = gr.Radio(
160
  choices=["no", "yes"],
161
- label="Filter out spaces with 0 likes in the ranking?",
162
  value="yes",
163
  )
 
 
 
 
 
 
164
  run_btn = gr.Button("Show ranking for this Space or org/user!", label="Rank Space")
165
- # gr.Markdown("### Results")
166
  result = gr.Markdown()
167
- run_btn.click(rank_space_and_org, inputs=[space_id, filter_zero], outputs=result)
 
 
 
 
168
  gr.Markdown("## Leaderboard of Top 100 Spaces and Orgs/Users by Likes")
 
 
 
 
 
169
  with gr.Row():
170
  with gr.Accordion("Show rankings for Orgs and Users", open=False):
171
- gr.Markdown("""## πŸ₯‡ Top 100 Orgs and Users by Likes πŸ₯‡""")
172
- ranking_board = gr.Markdown(plot_top_n_orgs_and_users())
173
- with gr.Accordion("Show rankings for Spaces", open=False):
174
- gr.Markdown("""## πŸ… Top 100 Spaces by Likes πŸ…""")
175
- ranking_board = gr.Markdown(plot_top_n_spaces())
176
-
 
 
 
177
  demo.queue(concurrency_count=4).launch()
 
1
  import gradio as gr
2
+ from huggingface_hub import list_spaces, list_models, list_datasets
3
  from cachetools import TTLCache, cached
4
  from toolz import groupby, valmap
5
 
6
+ import platform
7
+ from enum import Enum
8
 
9
+ is_macos = platform.system() == "Darwin"
10
+ LIMIT = 1_000_000 if is_macos else None
11
+ NONE_AUTHOR = "HuggingFace Team" # TODO deal with this
12
 
 
 
13
 
14
+ class HubRepoType(Enum):
15
+ MODEL = "model"
16
+ DATASET = "dataset"
17
+ SPACE = "space"
18
 
19
+
20
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
21
+ def get_spaces(): # β‰ˆ
22
+ return list(list_spaces(full=True, limit=LIMIT))
23
+
24
+
25
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
26
+ def get_models():
27
+ return list(iter(list_models(full=True, limit=LIMIT)))
28
 
29
 
 
30
  @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
31
+ def get_datasets():
32
+ return list(iter(list_datasets(full=True, limit=LIMIT)))
33
 
34
 
35
  get_spaces() # to warm up the cache
36
+ get_models() # to warm up the cache
37
+ get_datasets() # to warm up the cache
38
+
39
+
40
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
41
+ def valid_dataset_ids():
42
+ return {dataset.id for dataset in get_datasets()}
43
+
44
+
45
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
46
+ def valid_model_ids():
47
+ return {model.id for model in get_models()}
48
 
49
 
50
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
51
+ def valid_space_ids():
52
+ return {space.id for space in get_spaces()}
53
+
54
+
55
+ VALID_DATASET_IDS = valid_dataset_ids()
56
+ VALID_MODEL_IDS = valid_model_ids()
57
+ VALID_SPACE_IDS = valid_space_ids()
58
+
59
+
60
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
61
  def create_space_to_like_dict():
62
  spaces = get_spaces()
63
  return {space.id: space.likes for space in spaces}
64
 
65
 
66
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
67
+ def create_org_to_space_like_dict():
68
  spaces = get_spaces()
69
  grouped = groupby(lambda x: x.author, spaces)
70
  return valmap(lambda x: sum(s.likes for s in x), grouped)
71
 
72
 
73
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
74
+ def create_model_to_like_dict(metric_kind):
75
+ models = get_models()
76
+ if metric_kind == "likes":
77
+ return {model.id: model.likes for model in models}
78
+ if metric_kind == "downloads":
79
+ return {model.id: model.downloads for model in models}
80
+ raise ValueError(f"Unsupported metric_kind: {metric_kind}")
81
+
82
+
83
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
84
+ def create_org_to_model_metrics(metric_kind="likes"):
85
+ models = get_models()
86
+ # remove authors who are None
87
+ models = [model for model in models if model.author is not None]
88
+ grouped = groupby(lambda x: x.author, models)
89
+ if metric_kind:
90
+ return valmap(lambda x: sum(s.likes for s in x), grouped)
91
+ else:
92
+ return valmap(lambda x: sum(s.downloads for s in x), grouped)
93
+
94
+
95
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
96
+ def create_dataset_to_like_dict(metric_kind="likes"):
97
+ datasets = get_datasets()
98
+ if metric_kind == "likes":
99
+ return {dataset.id: dataset.likes for dataset in datasets}
100
+ if metric_kind == "downloads":
101
+ return {dataset.id: dataset.downloads for dataset in datasets}
102
+
103
+
104
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
105
+ def create_org_to_dataset_metrics(metric_kind="likes"):
106
+ datasets = get_datasets()
107
+ # remove authors who are None
108
+ datasets = [dataset for dataset in datasets if dataset.author is not None]
109
+ grouped = groupby(lambda x: x.author, datasets)
110
+ if metric_kind:
111
+ return valmap(lambda x: sum(s.likes for s in x), grouped)
112
+ else:
113
+ return valmap(lambda x: sum(s.downloads for s in x), grouped)
114
+
115
+
116
  def relative_rank(my_dict, target_key, filter_zero=False):
117
  if filter_zero:
118
  my_dict = {k: v for k, v in my_dict.items() if v != 0}
 
124
 
125
  position = [key for key, _ in sorted_items].index(target_key)
126
  num_lower = len(sorted_items) - position - 1
 
127
  num_higher = position
128
  return {
129
  "rank": (num_higher + 1) / len(my_dict) * 100,
130
  "num_higher": num_higher,
131
  "num_lower": num_lower,
132
  "value": my_dict[target_key],
133
+ "position": num_higher + 1,
134
  }
135
 
136
 
 
141
 
142
 
143
  @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
144
+ def relative_rank_for_model(model_id, metric_kind="likes", filter_zero=False):
145
+ model_to_like_dict = create_model_to_like_dict(metric_kind)
146
+ return relative_rank(model_to_like_dict, model_id, filter_zero=filter_zero)
147
+
148
+
149
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
150
+ def relative_rank_for_dataset(dataset_id, metric_kind="likes", filter_zero=False):
151
+ dataset_to_like_dict = create_dataset_to_like_dict(metric_kind)
152
+ return relative_rank(dataset_to_like_dict, dataset_id, filter_zero=filter_zero)
153
+
154
+
155
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
156
+ def relative_space_rank_for_org(org_id, filter_zero=False):
157
+ org_to_like_dict = create_org_to_space_like_dict()
158
  return relative_rank(org_to_like_dict, org_id, filter_zero=filter_zero)
159
 
160
 
161
  @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
162
+ def relative_model_rank_for_org(org_id, metric_kind="likes", filter_zero=False):
163
+ org_to_like_dict = create_org_to_model_metrics(metric_kind)
164
+ return relative_rank(org_to_like_dict, org_id, filter_zero=filter_zero)
165
 
166
 
167
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
168
+ def relative_dataset_rank_for_org(org_id, metric_kind="likes", filter_zero=False):
169
+ org_to_like_dict = create_org_to_dataset_metrics(metric_kind)
170
+ return relative_rank(org_to_like_dict, org_id, filter_zero=filter_zero)
171
+
172
+
173
+ # @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
174
+ # def rank_space(space_id):
175
+ # return relative_rank_for_space(space_id)
176
+
177
+
178
+ def rank_space_and_org(space_or_org_id, kind, filter_zero):
179
  filter_zero = filter_zero == "yes"
180
+ split_length = len(space_or_org_id.split("/"))
181
+
182
+ # Logic for split_length == 2
183
+ if split_length == 2:
184
+ return _rank_single_repo(space_or_org_id, kind, filter_zero)
185
+
186
+ # Handle kind-specific logic for split_length == 1
187
+ if split_length == 1:
188
+ valid_ids = {"model": VALID_MODEL_IDS, "dataset": VALID_DATASET_IDS}
189
+
190
+ if kind in valid_ids and space_or_org_id in valid_ids[kind]:
191
+ return _rank_single_repo(space_or_org_id, kind, filter_zero)
192
+ else:
193
+ return _rank_by_org(space_or_org_id, kind, filter_zero)
194
+
195
+ # If no conditions match, handle unexpected cases (optional)
196
+ raise ValueError(
197
+ f"Unexpected combination of space_or_org_id '{space_or_org_id}' and kind"
198
+ f" '{kind}'"
199
+ )
200
+
201
+
202
+ def _rank_by_org(space_or_org_id, kind, filter_zero):
203
+ if kind == "space":
204
+ org_rank = relative_space_rank_for_org(space_or_org_id, filter_zero=filter_zero)
205
+ elif kind == "model":
206
+ org_rank = relative_model_rank_for_org(space_or_org_id, filter_zero=filter_zero)
207
+ elif kind == "dataset":
208
+ org_rank = relative_dataset_rank_for_org(
209
+ space_or_org_id, filter_zero=filter_zero
210
+ )
211
+ result = (
212
+ f"## ⭐️ Org/User {kind.title()} Likes Rankings ⭐️\n"
213
+ + f"Here are the rankings for the org/user across all of their {kind}s \n"
214
+ )
215
+ result += f"""- You have {org_rank['value']:,} likes for this org/user.\n"""
216
+ result += f"""- Your org/user is ranked {org_rank['position']:,}\n"""
217
+ result += f"""- You have {org_rank['num_higher']:,} orgs/users above and {org_rank['num_lower']:,} orgs/users below in the ranking of {kind} likes \n\n"""
218
+ result += f"""- Organization or user [{space_or_org_id}](https://huggingface.co/{space_or_org_id}) is ranked in the top {org_rank['rank']:.2f}% \n\n"""
219
+ if kind == "space":
220
  result += f"""You can find all your Spaces sorted by likes [here](https://huggingface.co/{space_or_org_id}?sort_spaces=likes#spaces)\n"""
221
+ if kind == "model":
222
+ result += f"""You can find all your Models sorted by likes [here](https://huggingface.co/{space_or_org_id}?sort_models=likes#models)\n"""
223
+ if kind == "dataset":
224
+ result += f"""You can find all your Datasets sorted by likes [here](https://huggingface.co/{space_or_org_id}?sort_datasets=likes#datasets)\n"""
225
+ return _create_footer_message(result, kind)
226
+
227
+
228
+ def _rank_single_repo(space_or_org_id, kind, filter_zero):
229
+ if kind == "space":
230
+ repo_rank = relative_rank_for_space(space_or_org_id, filter_zero=filter_zero)
231
+ elif kind == "model":
232
+ repo_rank = relative_rank_for_model(space_or_org_id, filter_zero=filter_zero)
233
+ elif kind == "dataset":
234
+ repo_rank = relative_rank_for_dataset(space_or_org_id, filter_zero=filter_zero)
235
+ result = f"## ⭐️ {kind.title()} Likes Rankings ⭐️\n"
236
+ result += f"""Here are the rankings by likes for [`{space_or_org_id}`](https://huggingface.co/spaces/{space_or_org_id}) across all {kind}s \n"""
237
+ result += f"""- You have {repo_rank['value']:,} likes for this {kind}.\n"""
238
+ result += f"""- Your {kind} is ranked {repo_rank['position']:,}.\n"""
239
+ if kind == "space":
240
+ result += f"""- Space [{space_or_org_id}](https://huggingface.co/spaces/{space_or_org_id}) is ranked {repo_rank['rank']:.2f}%\n"""
241
+ if kind == "model":
242
+ result += f"""- Model [{space_or_org_id}](https://huggingface.co/model/{space_or_org_id}) is ranked {repo_rank['rank']:.2f}%\n"""
243
+ if kind == "dataset":
244
+ result += f"""- Dataset [{space_or_org_id}](https://huggingface.co/dataset/{space_or_org_id}) is ranked {repo_rank['rank']:.2f}%\n"""
245
+ result += f"""- You have {repo_rank['num_higher']:,} {kind}s above and {repo_rank['num_lower']:,} {kind}s below in the ranking of {kind}s likes\n\n"""
246
+ return _create_footer_message(result, kind)
247
+
248
+
249
+ def _create_footer_message(result, kind):
250
+ result += """### ✨ Remember likes aren't everything!✨\n"""
251
+ if kind == "space":
252
  result += """Some Spaces go very viral whilst other Spaces may be very useful for a smaller audience. If you think your Space is useful, please add it to this [thread](https://huggingface.co/spaces/librarian-bots/ranker/discussions/3) of awesome Spaces.
253
+ We'll look out for awesome Spaces added to this thread to promote more widely!"""
254
+ return result
255
 
256
 
257
+ def get_top_n_orgs_and_users_spaces(top_n=100):
258
+ # gr.Info("Updating leaderboard, this may take a few seconds...")
259
+ orgs_to_likes = create_org_to_space_like_dict()
260
  sorted_items = sorted(orgs_to_likes.items(), key=lambda item: item[1], reverse=True)
261
  sorted_items = sorted_items[:top_n]
262
  return sorted_items
263
 
264
 
265
+ def get_top_n_orgs_and_users_models(metric, top_n=100):
266
+ # gr.Info("Updating leaderboard, this may take a few seconds...")
267
+ orgs_to_likes = create_org_to_model_metrics(metric)
268
+ sorted_items = sorted(orgs_to_likes.items(), key=lambda item: item[1], reverse=True)
269
+ sorted_items = sorted_items[:top_n]
270
+ return sorted_items
271
 
272
 
273
+ def get_top_n_orgs_and_users_datasets(metric, top_n=100):
274
+ # gr.Info("Updating leaderboard, this may take a few seconds...")
275
+ orgs_to_likes = create_org_to_dataset_metrics(metric)
276
  sorted_items = sorted(orgs_to_likes.items(), key=lambda item: item[1], reverse=True)
277
  sorted_items = sorted_items[:top_n]
278
  return sorted_items
279
 
280
 
281
+ def plot_top_n_orgs_and_users(kind, metric="likes", top_n=100):
282
+ if kind == "space":
283
+ top_n = get_top_n_orgs_and_users_spaces(top_n)
284
+ header = """## πŸ… Top 100 Orgs and Users by Space Likes πŸ…"""
285
+ body = "".join(
286
+ f"\n{i+1}. [{org}](https://huggingface.co/{org}) with {likes:,} likes"
287
+ for i, (org, likes) in enumerate(top_n)
288
+ )
289
+ return header + body
290
+
291
+ elif kind == "model":
292
+ top_n = get_top_n_orgs_and_users_models(metric, top_n=top_n)
293
+ header = """## πŸ… Top 100 Orgs and Users by Model Likes πŸ…"""
294
+ body = "".join(
295
+ f"\n{i+1}. [{org}](https://huggingface.co/{org}) with {likes:,} likes"
296
+ for i, (org, likes) in enumerate(top_n)
297
+ )
298
+ return header + body
299
+ elif kind == "dataset":
300
+ top_n = get_top_n_orgs_and_users_datasets(metric, top_n=top_n)
301
+ header = """## πŸ… Top 100 Orgs and Users by Dataset Likes πŸ…"""
302
+ body = "".join(
303
+ f"\n{i+1}. [{org}](https://huggingface.co/{org}) with {likes:,} likes"
304
+ for i, (org, likes) in enumerate(top_n)
305
+ )
306
+ return header + body
307
+
308
+
309
+ def get_top_n_spaces(top_n=100):
310
+ # gr.Info("Updating leaderboard, this may take a few seconds...")
311
+ space_to_likes = create_space_to_like_dict()
312
+ sorted_items = sorted(
313
+ space_to_likes.items(), key=lambda item: item[1], reverse=True
314
+ )
315
+ sorted_items = sorted_items[:top_n]
316
+ return sorted_items
317
+
318
+
319
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
320
+ def get_top_n_models(metric_kind, top_n=100):
321
+ # gr.Info("Updating leaderboard, this may take a few seconds...")
322
+ model_to_likes = create_model_to_like_dict(metric_kind)
323
+ sorted_items = sorted(
324
+ model_to_likes.items(), key=lambda item: item[1], reverse=True
325
+ )
326
+ sorted_items = sorted_items[:top_n]
327
+ return sorted_items
328
+
329
+
330
+ @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
331
+ def get_top_n_datasets(metric, top_n=100):
332
+ # gr.Info("Updating leaderboard, this may take a few seconds...")
333
+ dataset_to_likes = create_dataset_to_like_dict(metric)
334
+ sorted_items = sorted(
335
+ dataset_to_likes.items(), key=lambda item: item[1], reverse=True
336
  )
337
+ sorted_items = sorted_items[:top_n]
338
+ return sorted_items
339
+
340
+
341
+ def _plot_top_n_hub_repos(kind: HubRepoType, metric="likes", top_n=100):
342
+ if kind == HubRepoType.SPACE:
343
+ top_n = get_top_n_spaces(top_n)
344
+ header = """## πŸ… Top 100 Space repositories by Likes πŸ…"""
345
+ body = "".join(
346
+ f"\n{i+1}. [{space}](https://huggingface.co/spaces/{space}) with"
347
+ f" {likes:,} likes"
348
+ for i, (space, likes) in enumerate(top_n)
349
+ )
350
+ return header + body
351
+ elif kind == HubRepoType.MODEL:
352
+ top_n = get_top_n_models(metric, top_n)
353
+ header = """## πŸ… Top 100 Model repositories by Likes πŸ…"""
354
+ body = "".join(
355
+ f"\n{i+1}. [{model}](https://huggingface.co/model/{model}) with"
356
+ f" {likes:,} likes"
357
+ for i, (model, likes) in enumerate(top_n)
358
+ )
359
+ return header + body
360
+ elif kind == HubRepoType.DATASET:
361
+ top_n = get_top_n_datasets(metric, top_n)
362
+ header = """## πŸ… Top 100 Dataset repositories by Likes πŸ…"""
363
+ body = "".join(
364
+ f"\n{i+1}. [{dataset}](https://huggingface.co/dataset/{dataset}) with"
365
+ f" {likes:,} likes"
366
+ for i, (dataset, likes) in enumerate(top_n)
367
+ )
368
+ return header + body
369
+
370
+
371
+ def plot_top_n_hub_repos(kind, metric_kind="likes", top_n=100):
372
+ if kind == "space":
373
+ return _plot_top_n_hub_repos(HubRepoType.SPACE, top_n)
374
+ elif kind == "model":
375
+ return _plot_top_n_hub_repos(HubRepoType.MODEL, metric=metric_kind, top_n=top_n)
376
+ elif kind == "dataset":
377
+ return _plot_top_n_hub_repos(
378
+ HubRepoType.DATASET, metric=metric_kind, top_n=top_n
379
+ )
380
 
381
 
382
  with gr.Blocks() as demo:
383
  gr.HTML("<h1 style='text-align: center;'> &#127942; HuggyRanker &#127942; </h1>")
384
  gr.HTML(
385
+ """<p style='text-align: center;'>Rank a single repository or all of the repositories created by an organization or user by likes</p>"""
386
  )
387
  gr.HTML(
388
  """<p style="text-align: center;"><i>Remember likes aren't everything!</i></p>"""
389
  )
390
  gr.Markdown(
391
+ """## Rank Specific Hub repositories or rank an organization or user by likes
392
+ Provide this app with a Hub ID e.g. `librarian-bots/ranker` or a Username/Organization name e.g. `librarian-bots` to rank by likes."""
393
  )
394
  with gr.Row():
395
  space_id = gr.Textbox(
396
  "librarian-bots", max_lines=1, label="Space or user/organization ID"
397
  )
398
+ filter_zero_likes = gr.Radio(
399
  choices=["no", "yes"],
400
+ label="Filter out repositories with 0 likes in the ranking?",
401
  value="yes",
402
  )
403
+ repo_type = gr.Radio(
404
+ choices=["space", "model", "dataset"],
405
+ label="Type of repo",
406
+ value="space",
407
+ interactive=True,
408
+ )
409
  run_btn = gr.Button("Show ranking for this Space or org/user!", label="Rank Space")
 
410
  result = gr.Markdown()
411
+ run_btn.click(
412
+ rank_space_and_org,
413
+ inputs=[space_id, repo_type, filter_zero_likes],
414
+ outputs=result,
415
+ )
416
  gr.Markdown("## Leaderboard of Top 100 Spaces and Orgs/Users by Likes")
417
+ gr.Markdown(
418
+ """The leaderboard is updated every 30 minutes.
419
+ Choose the type of repo to rank by likes and click the button to show the leaderboard."""
420
+ )
421
+ show_refresh_btn = gr.Button("Show/refresh Leaderboard", label="Refresh")
422
  with gr.Row():
423
  with gr.Accordion("Show rankings for Orgs and Users", open=False):
424
+ org_user_ranking = gr.Markdown()
425
+ show_refresh_btn.click(
426
+ plot_top_n_orgs_and_users, inputs=[repo_type], outputs=org_user_ranking
427
+ )
428
+ with gr.Accordion("Show rankings for individual repositories", open=False):
429
+ repo_level_ranking = gr.Markdown()
430
+ show_refresh_btn.click(
431
+ plot_top_n_hub_repos, inputs=[repo_type], outputs=repo_level_ranking
432
+ )
433
  demo.queue(concurrency_count=4).launch()