import requests
import gradio as gr
from urllib.parse import urlencode
import os
from datetime import datetime
import json
# Load environment variables
DEFAULT_IMAGE = "https://hub-recap.imglab-cdn.net/default.jpg?width=1200&text=%3Cspan+size%3D%2212pt%22+weight%3D%22bold%22%3EHugging+Face++%E2%9D%A4%EF%B8%8F+bartowski+in+2024%3C%2Fspan%3E%0A%0A%3Cspan+weight%3D%22bold%22%3E2%2C020%2C552%3C%2Fspan%3E+model+downloads%0A%3Cspan+weight%3D%22bold%22%3E5%2C407%3C%2Fspan%3E+model+likes%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+downloads%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+likes%0A%0A%3Cspan+size%3D%2210pt%22%3EMost+Popular+Contributions%3A%3C%2Fspan%3E%0AModel%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgemma-2-9b-it-GGUF%3C%2Fspan%3E%0A++%2843%2C949+downloads%2C+196+likes%29%0ADataset%3A+%3Cspan+weight%3D%22bold%22%3ENone%3C%2Fspan%3E%0A++%280+downloads%2C+0+likes%29%0ASpace%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgguf-metadata-updater%3C%2Fspan%3E%0A++%287+likes%29&text-width=800&text-height=600&text-padding=60&text-color=39%2C71%2C111&text-x=460&text-y=40&format=png&dpr=2"
# Load percentiles data
with open("percentiles.json") as f:
PERCENTILES = json.load(f)
def get_percentile_rank(likes, category):
if likes == 0:
return 0
percentiles = PERCENTILES[f"{category}_percentiles"]
if likes >= percentiles["p_99999"]:
return 99.999
elif likes >= percentiles["p_9999"]:
return 99.99
elif likes >= percentiles["p_999"]:
return 99.9
return 0
def create_image(stats, username):
# Determine which image to use based on highest value
total_stats = stats["Total Statistics"]
model_activity = total_stats["Model Downloads"] + total_stats["Model Likes"]
dataset_activity = total_stats["Dataset Downloads"] + total_stats["Dataset Likes"]
space_activity = total_stats["Space Likes"]
# Calculate percentiles based on likes
model_percentile = get_percentile_rank(total_stats["Model Likes"], "model")
dataset_percentile = get_percentile_rank(total_stats["Dataset Likes"], "dataset")
space_percentile = get_percentile_rank(space_activity, "space")
# Choose base image URL based on highest activity (keep using activity for image selection)
if model_activity == 0 and dataset_activity == 0 and space_activity == 0:
url = "https://hub-recap.imglab-cdn.net/images/empty-v1.png"
avatar = "newbie! We couldn't find your stats on the Hub, maybe in 2025?"
elif model_activity >= max(dataset_activity, space_activity):
url = "https://hub-recap.imglab-cdn.net/images/model-v1.png"
avatar = f"Model Pro" + (
f" (top {model_percentile}%)" if model_percentile > 0 else ""
)
elif dataset_activity >= max(model_activity, space_activity):
url = "https://hub-recap.imglab-cdn.net/images/dataset-v1.png"
avatar = f"Dataset Guru" + (
f" (top {dataset_percentile}%)" if dataset_percentile > 0 else ""
)
elif space_activity >= max(model_activity, dataset_activity):
url = "https://hub-recap.imglab-cdn.net/images/space-v1.png"
avatar = f"Space Artiste" + (
f" (top {space_percentile}%)" if space_percentile > 0 else ""
)
else:
url = "https://hub-recap.imglab-cdn.net/images/empty-v1.png"
avatar = "newbie! We couldn't find your stats on the Hub, maybe in 2025?"
# Build text content with proper formatting
text_parts = []
text_parts.append(
f'Hugging Face ❤️ {username} in 2024'
)
text_parts.append("") # Empty line for spacing
# Stats section
stats_lines = []
if total_stats["Model Downloads"] > 0:
stats_lines.append(
f'{total_stats["Model Downloads"]:,} model downloads'
)
if total_stats["Model Likes"] > 0:
stats_lines.append(
f'{total_stats["Model Likes"]:,} model likes'
)
if total_stats["Dataset Downloads"] > 0:
stats_lines.append(
f'{total_stats["Dataset Downloads"]:,} dataset downloads'
)
if total_stats["Dataset Likes"] > 0:
stats_lines.append(
f'{total_stats["Dataset Likes"]:,} dataset likes'
)
if total_stats["Space Likes"] > 0:
stats_lines.append(
f'{total_stats["Space Likes"]:,} space likes'
)
if stats_lines:
text_parts.extend(stats_lines)
text_parts.append("") # Empty line for spacing
# Popular items section
top_items = stats["Most Popular Items"]
if any(
item["likes"] > 0 or item.get("downloads", 0) > 0 for item in top_items.values()
):
text_parts.append(
'Most Popular Contributions:'
)
if top_items["Top Model"]["downloads"] > 0:
text_parts.append(
f'Model: {top_items["Top Model"]["name"]}'
)
text_parts.append(
f' ({top_items["Top Model"]["downloads"]:,} downloads, {top_items["Top Model"]["likes"]} likes)'
)
if top_items["Top Dataset"]["downloads"] > 0:
text_parts.append(
f'Dataset: {top_items["Top Dataset"]["name"]}'
)
text_parts.append(
f' ({top_items["Top Dataset"]["downloads"]:,} downloads, {top_items["Top Dataset"]["likes"]} likes)'
)
if top_items["Top Space"]["likes"] > 0:
text_parts.append(
f'Space: {top_items["Top Space"]["name"]}'
)
text_parts.append(
f' ({top_items["Top Space"]["likes"]} likes)'
)
# Update the avatar message with percentile
text_parts.append("") # Empty line for spacing
text_parts.append(f'You are a {avatar}!')
# Add additional percentile info if other categories are significant
other_percentiles = []
if model_percentile > 0 and "model" not in avatar.lower():
other_percentiles.append(f"Top {model_percentile}% in models")
if dataset_percentile > 0 and "dataset" not in avatar.lower():
other_percentiles.append(f"Top {dataset_percentile}% in datasets")
if space_percentile > 0 and "space" not in avatar.lower():
other_percentiles.append(f"Top {space_percentile}% in spaces")
if other_percentiles:
text_parts.append(f'{". ".join(other_percentiles)}!')
# Join all parts with newlines
text = "\n".join(text_parts)
params = {
"width": "1200",
"text": text,
"text-width": "700",
"text-height": "600",
"text-padding": "30",
"text-color": "39,71,111",
"text-x": "460",
"text-y": "40",
"format": "png",
"dpr": "2",
}
return f"{url}?{urlencode(params)}"
def is_from_2024(created_at_str):
if not created_at_str:
return False
created_at = datetime.strptime(created_at_str, "%Y-%m-%dT%H:%M:%S.%fZ")
return created_at.year == 2024
def get_user_stats(username):
headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}
# Get models stats
models_response = requests.get(
"https://huggingface.co/api/models",
params={"author": username, "full": "False", "limit": 100, "sort": "downloads"},
headers=headers,
)
# Filter for 2024 models only
models = [
model
for model in models_response.json()
# if is_from_2024(model.get("createdAt"))
]
# Get datasets stats
datasets_response = requests.get(
"https://huggingface.co/api/datasets",
params={"author": username, "full": "True"},
headers=headers,
)
# Filter for 2024 datasets only
datasets = [
dataset
for dataset in datasets_response.json()
# if is_from_2024(dataset.get("createdAt"))
]
# Get spaces stats
spaces_response = requests.get(
"https://huggingface.co/api/spaces",
params={"author": username, "full": "True"},
headers=headers,
)
# Filter for 2024 spaces only
spaces = [
space
for space in spaces_response.json()
# if is_from_2024(space.get("createdAt"))
]
# Calculate totals for 2024 items only
total_model_downloads = sum(model.get("downloads", 0) for model in models)
total_model_likes = sum(model.get("likes", 0) for model in models)
total_dataset_downloads = sum(dataset.get("downloads", 0) for dataset in datasets)
total_dataset_likes = sum(dataset.get("likes", 0) for dataset in datasets)
total_space_likes = sum(space.get("likes", 0) for space in spaces)
# Find most liked items from 2024
most_liked_model = max(models, key=lambda x: x.get("likes", 0), default=None)
most_liked_dataset = max(datasets, key=lambda x: x.get("likes", 0), default=None)
most_liked_space = max(spaces, key=lambda x: x.get("likes", 0), default=None)
stats = {
"Total Statistics": {
"Model Downloads": total_model_downloads,
"Model Likes": total_model_likes,
"Dataset Downloads": total_dataset_downloads,
"Dataset Likes": total_dataset_likes,
"Space Likes": total_space_likes,
},
"Most Popular Items": {
"Top Model": {
"name": (
most_liked_model.get("modelId", "None")
if most_liked_model
else "None"
),
"likes": most_liked_model.get("likes", 0) if most_liked_model else 0,
"downloads": (
most_liked_model.get("downloads", 0) if most_liked_model else 0
),
},
"Top Dataset": {
"name": (
most_liked_dataset.get("id", "None")
if most_liked_dataset
else "None"
),
"likes": (
most_liked_dataset.get("likes", 0) if most_liked_dataset else 0
),
"downloads": (
most_liked_dataset.get("downloads", 0) if most_liked_dataset else 0
),
},
"Top Space": {
"name": (
most_liked_space.get("id", "None") if most_liked_space else "None"
),
"likes": most_liked_space.get("likes", 0) if most_liked_space else 0,
},
},
}
# Generate image URL
image_url = create_image(stats, username)
return image_url
with gr.Blocks(title="Hugging Face Community Stats") as demo:
gr.Markdown("# Hugging Face Community Recap")
gr.Markdown(
"Enter a username to see their impact and top contributions across the Hugging Face Hub"
)
with gr.Row():
username_input = gr.Textbox(
label="Hub username",
placeholder="Enter Hugging Face username...",
scale=6,
value="bartowski",
)
submit_btn = gr.Button("Get Stats", scale=6)
with gr.Row():
# Add example usernames
gr.Examples(
examples=[
["merve"],
["mlabonne"],
["bartowski"],
["huggingface"],
["cfahlgren1"],
["argilla"],
],
inputs=username_input,
label="Try these examples",
)
with gr.Row():
with gr.Column():
stats_image = gr.Markdown(f"![Hugging Face Stats]({DEFAULT_IMAGE})")
def format_markdown(image_url):
return f"![Hugging Face Stats]({image_url}) \n\n * *Downloads are for the last 30 days, likes are for 2024*"
# Handle submission
submit_btn.click(
fn=lambda x: format_markdown(get_user_stats(x)),
inputs=username_input,
outputs=stats_image,
api_name="get_stats",
)
# Also trigger on enter key
username_input.submit(
fn=lambda x: format_markdown(get_user_stats(x)),
inputs=username_input,
outputs=stats_image,
)
if __name__ == "__main__":
demo.launch()