jkorstad commited on
Commit
fb53d5e
·
verified ·
1 Parent(s): 250f026

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -0
app.py CHANGED
@@ -1,5 +1,113 @@
1
  import gradio as gr
2
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def search_spaces(search_text, category):
5
  if category == "All Categories":
 
1
  import gradio as gr
2
  import pandas as pd
3
+ from huggingface_hub import HfApi
4
+ from collections import defaultdict
5
+
6
+ # ------------------------------------------------------
7
+ # Get spaces with more details
8
+ api = HfApi()
9
+ spaces = api.list_spaces(limit=40000) # Limiting to 40000 for now
10
+
11
+ # Create a DataFrame
12
+ data = []
13
+ for space in spaces:
14
+ data.append({
15
+ 'id': space.id,
16
+ 'title': space.id.split('/')[-1],
17
+ 'author': space.author if space.author else space.id.split('/')[0],
18
+ 'likes': space.likes,
19
+ 'tags': space.tags if hasattr(space, 'tags') else [],
20
+ })
21
+
22
+ df = pd.DataFrame(data)
23
+ print("Total spaces collected:", len(df))
24
+ print("\nSample of the data:")
25
+ print(df.head())
26
+
27
+ # ------------------------------------------------------
28
+
29
+ # Define categories and their keywords
30
+ categories = {
31
+ 'Text-to-Speech': ['tts', 'speech', 'voice', 'audio', 'kokoro'],
32
+ 'Transcription': ['transcribe', 'transcription'],
33
+ 'Agents': ['agent', 'agents', 'smol', 'multi-step', 'autobot', 'autoGPT' 'agentic'],
34
+ 'Image Generation': ['stable-diffusion', 'diffusion', 'gan', 'image', 'img2img', 'style', 'art'],
35
+ 'Video': ['video', 'animation', 'motion', 'sora'],
36
+ 'Face/Portrait': ['face', 'portrait', 'gaze', 'facial'],
37
+ 'Chat/LLM': ['chat', 'llm', 'gpt', 'llama', 'text', 'language'],
38
+ '3D': ['3d', 'mesh', 'point-cloud', 'depth'],
39
+ 'Audio': ['audio', 'music', 'sound', 'voice'],
40
+ 'Vision': ['vision', 'detection', 'recognition', 'classifier'],
41
+ 'CLIP': ['image-to-text', 'describe-image'],
42
+ 'Games': ['game', 'games', 'play', 'playground'],
43
+ 'Finance': ['finance', 'stock', 'money', 'currency', 'bank', 'market'],
44
+ 'SAM': ['sam', 'segmentation', 'mask'],
45
+ 'Science': ['science', 'physics', 'chemistry', 'biology', 'math', 'astronomy', 'geology', 'meteorology', 'engineering', 'medicine', 'health', 'nutrition', 'environment', 'ecology', 'geography', 'geology', 'geophysics'],
46
+ 'Education': ['education', 'school', 'university', 'college', 'teaching', 'learning', 'study', 'research'],
47
+ 'Graph': ['graph', 'network', 'node', 'edge', 'path', 'tree', 'cycle', 'flow', 'matching', 'coloring', 'swarm'],
48
+ 'Research': ['research', 'study', 'experiment', 'paper', 'discovery', 'innovation', 'exploration', 'analysis'],
49
+ 'Document Analyis': ['pdf', 'RAG', 'idefecs'],
50
+ 'WebGPU': ['localModel', 'webGPU'],
51
+ 'Point Tracking': ['CoTracker', 'tapir', 'tapnet', 'point', 'track'],
52
+ 'Games': ['game', 'Unity', 'UE5', 'Unreal'],
53
+ 'Leaderboard': ['arena', 'leaderboard', 'timeline'],
54
+ 'Other': [] # Default category
55
+ }
56
+
57
+ def categorize_space(title, tags):
58
+ title_lower = title.lower()
59
+ # Convert tags to lowercase if tags exist
60
+ tags_lower = [t.lower() for t in tags] if tags else []
61
+
62
+ for category, keywords in categories.items():
63
+ # Check both title and tags for keywords
64
+ if any(keyword in title_lower for keyword in keywords) or \
65
+ any(keyword in tag for keyword in keywords for tag in tags_lower):
66
+ return category
67
+ return 'Other'
68
+
69
+ # Add category to DataFrame
70
+ df['category'] = df.apply(lambda x: categorize_space(x['title'], x['tags']), axis=1)
71
+
72
+ # Show category distribution
73
+ category_counts = df['category'].value_counts()
74
+ print("\nCategory Distribution:")
75
+ print(category_counts)
76
+
77
+ # Show sample spaces from each category
78
+ print("\nSample spaces from each category:")
79
+ for category in categories.keys():
80
+ print(f"\n{category}:")
81
+ sample = df[df['category'] == category].head(3)
82
+ print(sample[['title', 'likes']].to_string())
83
+
84
+ # ------------------------------------------------------
85
+ # Add total likes per category
86
+ category_likes = df.groupby('category')['likes'].sum().sort_values(ascending=False)
87
+ print("Total likes per category:")
88
+ print(category_likes)
89
+
90
+ print("\nTop 10 spaces in each category (sorted by likes):")
91
+ for category in categories.keys():
92
+ print(f"\n=== {category} ===")
93
+ top_10 = df[df['category'] == category].nlargest(10, 'likes')[['title', 'likes']]
94
+ # Format output with padding for better readability
95
+ print(top_10.to_string(index=False))
96
+
97
+ # ------------------------------------------------------
98
+
99
+ # Add space URLs
100
+ df['url'] = 'https://huggingface.co/spaces/' + df['id']
101
+
102
+ # Let's show the top 5 spaces from each category with their links
103
+ print("Top 5 spaces in each category with links:")
104
+ for category in categories.keys():
105
+ print(f"\n=== {category} ===")
106
+ top_5 = df[df['category'] == category].nlargest(5, 'likes')[['title', 'likes', 'url']]
107
+ # Format output with padding for better readability
108
+ print(top_5.to_string(index=False))
109
+
110
+ # ------------------------------------------------------
111
 
112
  def search_spaces(search_text, category):
113
  if category == "All Categories":