Spaces:

jkorstad
/

spaces-explorer

Sleeping

App Files Files Community

jkorstad commited on Jan 20

Commit

fb53d5e

verified ·

1 Parent(s): 250f026

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -0

app.py CHANGED Viewed

@@ -1,5 +1,113 @@
 import gradio as gr
 import pandas as pd
 def search_spaces(search_text, category):
     if category == "All Categories":

 import gradio as gr
 import pandas as pd
+from huggingface_hub import HfApi
+from collections import defaultdict
+# ------------------------------------------------------
+# Get spaces with more details
+api = HfApi()
+spaces = api.list_spaces(limit=40000)  # Limiting to 40000 for now
+# Create a DataFrame
+data = []
+for space in spaces:
+    data.append({
+        'id': space.id,
+        'title': space.id.split('/')[-1],
+        'author': space.author if space.author else space.id.split('/')[0],
+        'likes': space.likes,
+        'tags': space.tags if hasattr(space, 'tags') else [],
+    })
+df = pd.DataFrame(data)
+print("Total spaces collected:", len(df))
+print("\nSample of the data:")
+print(df.head())
+# ------------------------------------------------------
+# Define categories and their keywords
+categories = {
+    'Text-to-Speech': ['tts', 'speech', 'voice', 'audio', 'kokoro'],
+    'Transcription': ['transcribe', 'transcription'],
+    'Agents': ['agent', 'agents', 'smol', 'multi-step', 'autobot', 'autoGPT' 'agentic'],
+    'Image Generation': ['stable-diffusion', 'diffusion', 'gan', 'image', 'img2img', 'style', 'art'],
+    'Video': ['video', 'animation', 'motion', 'sora'],
+    'Face/Portrait': ['face', 'portrait', 'gaze', 'facial'],
+    'Chat/LLM': ['chat', 'llm', 'gpt', 'llama', 'text', 'language'],
+    '3D': ['3d', 'mesh', 'point-cloud', 'depth'],
+    'Audio': ['audio', 'music', 'sound', 'voice'],
+    'Vision': ['vision', 'detection', 'recognition', 'classifier'],
+    'CLIP': ['image-to-text', 'describe-image'],
+    'Games': ['game', 'games', 'play', 'playground'],
+    'Finance': ['finance', 'stock', 'money', 'currency', 'bank', 'market'],
+    'SAM': ['sam', 'segmentation', 'mask'],
+    'Science': ['science', 'physics', 'chemistry', 'biology', 'math', 'astronomy', 'geology', 'meteorology', 'engineering', 'medicine', 'health', 'nutrition', 'environment', 'ecology', 'geography', 'geology', 'geophysics'],
+    'Education': ['education', 'school', 'university', 'college', 'teaching', 'learning', 'study', 'research'],
+    'Graph': ['graph', 'network', 'node', 'edge', 'path', 'tree', 'cycle', 'flow', 'matching', 'coloring', 'swarm'],
+    'Research': ['research', 'study', 'experiment', 'paper', 'discovery', 'innovation', 'exploration', 'analysis'],
+    'Document Analyis': ['pdf', 'RAG', 'idefecs'],
+    'WebGPU': ['localModel', 'webGPU'],
+    'Point Tracking': ['CoTracker', 'tapir', 'tapnet', 'point', 'track'],
+    'Games': ['game', 'Unity', 'UE5', 'Unreal'],
+    'Leaderboard': ['arena', 'leaderboard', 'timeline'],
+    'Other': []  # Default category
+}
+def categorize_space(title, tags):
+    title_lower = title.lower()
+    # Convert tags to lowercase if tags exist
+    tags_lower = [t.lower() for t in tags] if tags else []
+    for category, keywords in categories.items():
+        # Check both title and tags for keywords
+        if any(keyword in title_lower for keyword in keywords) or \
+           any(keyword in tag for keyword in keywords for tag in tags_lower):
+            return category
+    return 'Other'
+# Add category to DataFrame
+df['category'] = df.apply(lambda x: categorize_space(x['title'], x['tags']), axis=1)
+# Show category distribution
+category_counts = df['category'].value_counts()
+print("\nCategory Distribution:")
+print(category_counts)
+# Show sample spaces from each category
+print("\nSample spaces from each category:")
+for category in categories.keys():
+    print(f"\n{category}:")
+    sample = df[df['category'] == category].head(3)
+    print(sample[['title', 'likes']].to_string())
+# ------------------------------------------------------
+# Add total likes per category
+category_likes = df.groupby('category')['likes'].sum().sort_values(ascending=False)
+print("Total likes per category:")
+print(category_likes)
+print("\nTop 10 spaces in each category (sorted by likes):")
+for category in categories.keys():
+    print(f"\n=== {category} ===")
+    top_10 = df[df['category'] == category].nlargest(10, 'likes')[['title', 'likes']]
+    # Format output with padding for better readability
+    print(top_10.to_string(index=False))
+# ------------------------------------------------------
+# Add space URLs
+df['url'] = 'https://huggingface.co/spaces/' + df['id']
+# Let's show the top 5 spaces from each category with their links
+print("Top 5 spaces in each category with links:")
+for category in categories.keys():
+    print(f"\n=== {category} ===")
+    top_5 = df[df['category'] == category].nlargest(5, 'likes')[['title', 'likes', 'url']]
+    # Format output with padding for better readability
+    print(top_5.to_string(index=False))
+# ------------------------------------------------------
 def search_spaces(search_text, category):
     if category == "All Categories":