import gradio as gr from huggingface_hub import list_models, list_datasets, list_spaces import pandas as pd from datetime import datetime def get_user_stats(): users = {} for k, fn in zip(['model', 'dataset', 'space'], [list_models, list_datasets, list_spaces]): for repo in fn(full=True): if repo.author is None: continue if repo.author not in users: users[repo.author] = { x: 0 for x in [ 'model_likes', 'num_models', 'dataset_likes', 'num_datasets', 'space_likes', 'num_spaces', 'total_likes', 'total_repos' ] } users[repo.author][f"{k}_likes"] += repo.likes users[repo.author][f"num_{k}s"] += 1 for username, user_stats in users.items(): users[username]['total_likes'] += sum([v for k, v in user_stats.items() if "likes" in k]) users[username]['total_repos'] += sum([v for k, v in user_stats.items() if "num_" in k]) for k, v in users.items(): users[k] = dict(users[k]) return users def make_clickable_user(user_id): link = "https://huggingface.co/" + user_id return f'{user_id}' def get_user_stats_df(limit=1000): users = get_user_stats() df = pd.DataFrame([{'username': make_clickable_user(k), **v} for k, v in users.items()]) df.sort_values(by=["total_likes"], ascending=False, inplace=True) df.insert(0, "rank", list(range(1, len(df) + 1))) df = df[ [ 'rank', 'username', 'total_likes', 'model_likes', 'num_models', 'dataset_likes', 'num_datasets', 'space_likes', 'num_spaces', 'total_repos' ] ] if limit: df = df.head(limit) return df df = get_user_stats_df() desc = f""" # 🤗 Hugging Face User Stats Here are some stats on the top 1000 users/organizations on the Hugging Face Hub. **Updated {datetime.now().strftime("%d/%m/%y")}** """ with gr.Blocks() as demo: gr.Markdown(desc) data = gr.components.Dataframe( df, type="pandas", datatype=["number", "markdown", "number", "number", "number", "number", "number", "number", "number", "number"], ) demo.launch()