import gradio as gr import pandas as pd from huggingface_hub import HfApi from collections import defaultdict # ------------------------------------------------------ # Get spaces with more details api = HfApi() spaces = api.list_spaces(limit=60000) # Limiting to 60000 for now # Create a DataFrame data = [] for space in spaces: data.append({ 'id': space.id, 'title': space.id.split('/')[-1], 'author': space.author if space.author else space.id.split('/')[0], 'likes': space.likes, 'tags': space.tags if hasattr(space, 'tags') else [], }) df = pd.DataFrame(data) print("Total spaces collected:", len(df)) print("\nSample of the data:") print(df.head()) # ------------------------------------------------------ # Define categories and their keywords categories = { 'Text-to-Speech': ['tts', 'speech', 'voice', 'audio', 'kokoro'], 'Transcription': ['transcribe', 'transcription'], 'Agents': ['agent', 'agents', 'smol', 'multi-step', 'autobot', 'autoGPT' 'agentic'], 'Image Gen/Editing': ['stable-diffusion', 'diffusion', 'flux', 'dalle', 'CLIP', 'comic', 'gan', 'sdxl', 'pic', 'img', 'stable', 'midjourney', 'diffusion', 'image', 'ControlNet', 'Control Net', 'dreambooth', 'blip', 'LoRA', 'img2img', 'style', 'art'], 'Video': ['video', 'animation', 'motion', 'sora'], 'Face/Portrait': ['face', 'portrait', 'gaze', 'facial'], 'Chat/LLM': ['chat', 'llm', 'gpt', 'llama', 'text', 'language'], '3D': ['3d', 'mesh', 'point-cloud', 'depth'], 'Audio': ['audio', 'tts', 'music', 'whisper', 'sound', 'voice'], 'Vision': ['vision', 'detection', 'recognition', 'classifier'], 'CLIP': ['image-to-text', 'describe-image'], 'Games': ['game', 'games', 'play', 'playground'], 'Finance': ['finance', 'stock', 'money', 'currency', 'bank', 'market'], 'SAM': ['sam', 'segmentation', 'mask'], 'Science': ['science', 'physics', 'chemistry', 'biology', 'math', 'astronomy', 'geology', 'meteorology', 'engineering', 'medicine', 'health', 'nutrition', 'environment', 'ecology', 'geography', 'geology', 'geophysics'], 'Education': ['education', 'school', 'university', 'college', 'teaching', 'learning', 'study', 'research'], 'Graph': ['graph', 'network', 'node', 'edge', 'path', 'tree', 'cycle', 'flow', 'matching', 'coloring', 'swarm'], 'Research': ['research', 'study', 'experiment', 'paper', 'discovery', 'innovation', 'exploration', 'analysis'], 'Document Analyis': ['pdf', 'RAG', 'idefecs'], 'WebGPU': ['localModel', 'webGPU'], 'Point Tracking': ['CoTracker', 'tapir', 'tapnet', 'point', 'track'], 'Games': ['game', 'Unity', 'UE5', 'Unreal'], 'Leaderboard': ['arena', 'leaderboard', 'timeline'], 'Other': [] # Default category } def categorize_space(title, tags): title_lower = title.lower() # Convert tags to lowercase if tags exist tags_lower = [t.lower() for t in tags] if tags else [] for category, keywords in categories.items(): # Check both title and tags for keywords if any(keyword in title_lower for keyword in keywords) or \ any(keyword in tag for keyword in keywords for tag in tags_lower): return category return 'Other' # Add category to DataFrame df['category'] = df.apply(lambda x: categorize_space(x['title'], x['tags']), axis=1) # Show category distribution category_counts = df['category'].value_counts() print("\nCategory Distribution:") print(category_counts) # Show sample spaces from each category print("\nSample spaces from each category:") for category in categories.keys(): print(f"\n{category}:") sample = df[df['category'] == category].head(3) print(sample[['title', 'likes']].to_string()) # ------------------------------------------------------ # Add total likes per category category_likes = df.groupby('category')['likes'].sum().sort_values(ascending=False) print("Total likes per category:") print(category_likes) print("\nTop 10 spaces in each category (sorted by likes):") for category in categories.keys(): print(f"\n=== {category} ===") top_10 = df[df['category'] == category].nlargest(10, 'likes')[['title', 'likes']] # Format output with padding for better readability print(top_10.to_string(index=False)) # ------------------------------------------------------ # Add space URLs df['url'] = 'https://huggingface.co/spaces/' + df['id'] # Show the top 10 spaces from each category with their links # print("Top 10 spaces in each category with links:") # for category in categories.keys(): # print(f"\n=== {category} ===") # top_10 = df[df['category'] == category].nlargest(10, 'likes')[['title', 'likes', 'url']] # Format output with padding for better readability # print(top_5.to_string(index=False)) # ------------------------------------------------------ def search_spaces(search_text="", category="All Categories", offset=0, limit=100): # Filter spaces if category == "All Categories": spaces_df = df else: spaces_df = df[df['category'] == category] if search_text: spaces_df = spaces_df[spaces_df['title'].str.lower().str.contains(search_text.lower())] # Sort by likes and get total count spaces_df = spaces_df.sort_values('likes', ascending=False) total_spaces = len(spaces_df) total_pages = (total_spaces + limit - 1) // limit current_page = (offset // limit) + 1 # Get the current page of spaces spaces = spaces_df.iloc[offset:offset + limit][['title', 'likes', 'url', 'category']] total_likes = spaces_df['likes'].sum() # Generate HTML content html_content = f"""
Page {current_page} of {total_pages}
Showing {offset + 1}-{min(offset + limit, total_spaces)} of {total_spaces} Spaces
Total Likes: {total_likes:,}
❤️ {row['likes']:,} likes