import requests import gradio as gr from urllib.parse import urlencode import os from datetime import datetime import json # Load environment variables DEFAULT_IMAGE = "https://hub-recap.imglab-cdn.net/default.jpg?width=1200&text=%3Cspan+size%3D%2212pt%22+weight%3D%22bold%22%3EHugging+Face++%E2%9D%A4%EF%B8%8F+bartowski+in+2024%3C%2Fspan%3E%0A%0A%3Cspan+weight%3D%22bold%22%3E2%2C020%2C552%3C%2Fspan%3E+model+downloads%0A%3Cspan+weight%3D%22bold%22%3E5%2C407%3C%2Fspan%3E+model+likes%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+downloads%0A%3Cspan+weight%3D%22bold%22%3E0%3C%2Fspan%3E+dataset+likes%0A%0A%3Cspan+size%3D%2210pt%22%3EMost+Popular+Contributions%3A%3C%2Fspan%3E%0AModel%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgemma-2-9b-it-GGUF%3C%2Fspan%3E%0A++%2843%2C949+downloads%2C+196+likes%29%0ADataset%3A+%3Cspan+weight%3D%22bold%22%3ENone%3C%2Fspan%3E%0A++%280+downloads%2C+0+likes%29%0ASpace%3A+%3Cspan+weight%3D%22bold%22%3Ebartowski%2Fgguf-metadata-updater%3C%2Fspan%3E%0A++%287+likes%29&text-width=800&text-height=600&text-padding=60&text-color=39%2C71%2C111&text-x=460&text-y=40&format=png&dpr=2" # Load percentiles data with open("percentiles.json") as f: PERCENTILES = json.load(f) def get_percentile_rank(likes, category): if likes == 0: return 0 percentiles = PERCENTILES[f"{category}_percentiles"] if likes >= percentiles["p_99999"]: return 99.999 elif likes >= percentiles["p_9999"]: return 99.99 elif likes >= percentiles["p_999"]: return 99.9 return 0 def create_image(stats, username): # Determine which image to use based on highest value total_stats = stats["Total Statistics"] model_activity = total_stats["Model Downloads"] + total_stats["Model Likes"] dataset_activity = total_stats["Dataset Downloads"] + total_stats["Dataset Likes"] space_activity = total_stats["Space Likes"] # Calculate percentiles based on likes model_percentile = get_percentile_rank(total_stats["Model Likes"], "model") dataset_percentile = get_percentile_rank(total_stats["Dataset Likes"], "dataset") space_percentile = get_percentile_rank(space_activity, "space") # Choose base image URL based on highest activity (keep using activity for image selection) if model_activity == 0 and dataset_activity == 0 and space_activity == 0: url = "https://hub-recap.imglab-cdn.net/images/empty.png" avatar = "new! We couldn't find your stats on the Hub, maybe in 2025?" elif model_activity >= max(dataset_activity, space_activity): url = "https://hub-recap.imglab-cdn.net/images/model.png" avatar = f"Model Pro" + ( f" (top {model_percentile}%)" if model_percentile > 0 else "" ) elif dataset_activity >= max(model_activity, space_activity): url = "https://hub-recap.imglab-cdn.net/images/dataset.png" avatar = f"Dataset Guru" + ( f" (top {dataset_percentile}%)" if dataset_percentile > 0 else "" ) elif space_activity >= max(model_activity, dataset_activity): url = "https://hub-recap.imglab-cdn.net/images/space.png" avatar = f"Space Artiste" + ( f" (top {space_percentile}%)" if space_percentile > 0 else "" ) else: url = "https://hub-recap.imglab-cdn.net/images/empty.png" avatar = "newbie! We couldn't find your stats on the Hub, maybe in 2025?" # Build text content with proper formatting text_parts = [] text_parts.append( f'Hugging Face ❤️ {username} in 2024' ) text_parts.append("") # Empty line for spacing # Stats section stats_lines = [] if total_stats["Model Downloads"] > 0: stats_lines.append( f'{total_stats["Model Downloads"]:,} model downloads' ) if total_stats["Model Likes"] > 0: stats_lines.append( f'{total_stats["Model Likes"]:,} model likes' ) if total_stats["Dataset Downloads"] > 0: stats_lines.append( f'{total_stats["Dataset Downloads"]:,} dataset downloads' ) if total_stats["Dataset Likes"] > 0: stats_lines.append( f'{total_stats["Dataset Likes"]:,} dataset likes' ) if total_stats["Space Likes"] > 0: stats_lines.append( f'{total_stats["Space Likes"]:,} space likes' ) if stats_lines: text_parts.extend(stats_lines) text_parts.append("") # Empty line for spacing # Popular items section top_items = stats["Most Popular Items"] if any( item["likes"] > 0 or item.get("downloads", 0) > 0 for item in top_items.values() ): text_parts.append( 'Most Popular Contributions:' ) if top_items["Top Model"]["downloads"] > 0: text_parts.append( f'Model: {top_items["Top Model"]["name"]}' ) text_parts.append( f' ({top_items["Top Model"]["downloads"]:,} downloads, {top_items["Top Model"]["likes"]} likes)' ) if top_items["Top Dataset"]["downloads"] > 0: text_parts.append( f'Dataset: {top_items["Top Dataset"]["name"]}' ) text_parts.append( f' ({top_items["Top Dataset"]["downloads"]:,} downloads, {top_items["Top Dataset"]["likes"]} likes)' ) if top_items["Top Space"]["likes"] > 0: text_parts.append( f'Space: {top_items["Top Space"]["name"]}' ) text_parts.append( f' ({top_items["Top Space"]["likes"]} likes)' ) # Update the avatar message with percentile text_parts.append("") # Empty line for spacing text_parts.append(f'You are a {avatar}!') # Add additional percentile info if other categories are significant other_percentiles = [] if model_percentile > 0 and "model" not in avatar.lower(): other_percentiles.append(f"Top {model_percentile}% in models") if dataset_percentile > 0 and "dataset" not in avatar.lower(): other_percentiles.append(f"Top {dataset_percentile}% in datasets") if space_percentile > 0 and "space" not in avatar.lower(): other_percentiles.append(f"Top {space_percentile}% in spaces") if other_percentiles: text_parts.append( f'{". ".join(other_percentiles)}!' ) # Join all parts with newlines text = "\n".join(text_parts) params = { "width": "1200", "text": text, "text-width": "700", "text-height": "600", "text-padding": "30", "text-color": "39,71,111", "text-x": "460", "text-y": "40", "format": "png", "dpr": "2", } return f"{url}?{urlencode(params)}" def is_from_2024(created_at_str): if not created_at_str: return False created_at = datetime.strptime(created_at_str, "%Y-%m-%dT%H:%M:%S.%fZ") return created_at.year == 2024 def get_user_stats(username): headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} # Get models stats models_response = requests.get( "https://huggingface.co/api/models", params={"author": username, "full": "True"}, headers=headers, ) # Filter for 2024 models only models = [ model for model in models_response.json() if is_from_2024(model.get("createdAt")) ] # Get datasets stats datasets_response = requests.get( "https://huggingface.co/api/datasets", params={"author": username, "full": "True"}, headers=headers, ) # Filter for 2024 datasets only datasets = [ dataset for dataset in datasets_response.json() if is_from_2024(dataset.get("createdAt")) ] # Get spaces stats spaces_response = requests.get( "https://huggingface.co/api/spaces", params={"author": username, "full": "True"}, headers=headers, ) # Filter for 2024 spaces only spaces = [ space for space in spaces_response.json() if is_from_2024(space.get("createdAt")) ] # Calculate totals for 2024 items only total_model_downloads = sum(model.get("downloads", 0) for model in models) total_model_likes = sum(model.get("likes", 0) for model in models) total_dataset_downloads = sum(dataset.get("downloads", 0) for dataset in datasets) total_dataset_likes = sum(dataset.get("likes", 0) for dataset in datasets) total_space_likes = sum(space.get("likes", 0) for space in spaces) # Find most liked items from 2024 most_liked_model = max(models, key=lambda x: x.get("likes", 0), default=None) most_liked_dataset = max(datasets, key=lambda x: x.get("likes", 0), default=None) most_liked_space = max(spaces, key=lambda x: x.get("likes", 0), default=None) stats = { "Total Statistics": { "Model Downloads": total_model_downloads, "Model Likes": total_model_likes, "Dataset Downloads": total_dataset_downloads, "Dataset Likes": total_dataset_likes, "Space Likes": total_space_likes, }, "Most Popular Items": { "Top Model": { "name": ( most_liked_model.get("modelId", "None") if most_liked_model else "None" ), "likes": most_liked_model.get("likes", 0) if most_liked_model else 0, "downloads": ( most_liked_model.get("downloads", 0) if most_liked_model else 0 ), }, "Top Dataset": { "name": ( most_liked_dataset.get("id", "None") if most_liked_dataset else "None" ), "likes": ( most_liked_dataset.get("likes", 0) if most_liked_dataset else 0 ), "downloads": ( most_liked_dataset.get("downloads", 0) if most_liked_dataset else 0 ), }, "Top Space": { "name": ( most_liked_space.get("id", "None") if most_liked_space else "None" ), "likes": most_liked_space.get("likes", 0) if most_liked_space else 0, }, }, } # Generate image URL image_url = create_image(stats, username) return image_url with gr.Blocks(title="Hugging Face Community Stats") as demo: gr.Markdown("# Hugging Face Community Recap") gr.Markdown( "Enter a username to see their impact and top contributions across the Hugging Face Hub" ) with gr.Row(): username_input = gr.Textbox( label="Hub username", placeholder="Enter Hugging Face username...", scale=6, value="bartowski", ) submit_btn = gr.Button("Get Stats", scale=6) with gr.Row(): # Add example usernames gr.Examples( examples=[ ["merve"], ["mlabonne"], ["bartowski"], ["huggingface"], ["cfahlgren1"], ], inputs=username_input, label="Try these examples", ) with gr.Row(): with gr.Column(): stats_image = gr.Markdown(f"![Hugging Face Stats]({DEFAULT_IMAGE})") def format_markdown(image_url): return f"![Hugging Face Stats]({image_url})" # Handle submission submit_btn.click( fn=lambda x: format_markdown(get_user_stats(x)), inputs=username_input, outputs=stats_image, api_name="get_stats", ) # Also trigger on enter key username_input.submit( fn=lambda x: format_markdown(get_user_stats(x)), inputs=username_input, outputs=stats_image, ) if __name__ == "__main__": demo.launch()