fantos commited on
Commit
b07ae4e
ยท
verified ยท
1 Parent(s): 8b6deaa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -20
app.py CHANGED
@@ -26,6 +26,39 @@ def create_trend_chart(space_id, daily_ranks_df):
26
  height=500 # ์ˆ˜์ •๋œ ๋ถ€๋ถ„
27
  )
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  fig.update_layout(
30
  xaxis_title="Date",
31
  yaxis_title="Rank",
@@ -57,18 +90,22 @@ def create_trend_chart(space_id, daily_ranks_df):
57
  return None
58
 
59
  def get_duplicate_spaces(top_100_spaces):
60
- # ID์—์„œ username/spacename ํ˜•์‹์—์„œ username๋งŒ ์ถ”์ถœ
 
 
 
 
61
  top_100_spaces['clean_id'] = top_100_spaces['id'].apply(lambda x: x.split('/')[0])
62
 
63
- # username๋ณ„ trending score ํ•ฉ์‚ฐ
64
  score_sums = top_100_spaces.groupby('clean_id')['trendingScore'].sum()
65
 
66
  # ๋””๋ฒ„๊น…์šฉ ์ถœ๋ ฅ
67
- print("\n=== ID๋ณ„ ์Šค์ฝ”์–ด ํ•ฉ์‚ฐ ๊ฒฐ๊ณผ ===")
68
- for id, score in score_sums.sort_values(ascending=False).head(20).items():
69
- print(f"ID: {id}, Total Score: {score}")
70
 
71
- # ํ•ฉ์‚ฐ๋œ ์Šค์ฝ”์–ด๋กœ ์ •๋ ฌํ•˜์—ฌ ์ƒ์œ„ 20๊ฐœ ์„ ํƒ
72
  top_20_scores = score_sums.sort_values(ascending=False).head(20)
73
  return top_20_scores
74
 
@@ -84,7 +121,7 @@ def create_duplicates_chart(score_sums):
84
  })
85
 
86
  # ๋””๋ฒ„๊น…์šฉ ์ถœ๋ ฅ
87
- print("\n=== ์ฐจํŠธ ๋ฐ์ดํ„ฐ ===")
88
  print(df)
89
 
90
  fig = px.bar(
@@ -92,7 +129,7 @@ def create_duplicates_chart(score_sums):
92
  x='id',
93
  y='rank',
94
  title="Top 20 Spaces by Combined Trending Score",
95
- height=500, # ์ˆ˜์ •๋œ ๋ถ€๋ถ„
96
  text='total_score'
97
  )
98
 
@@ -103,7 +140,7 @@ def create_duplicates_chart(score_sums):
103
  paper_bgcolor='white',
104
  xaxis_tickangle=-45,
105
  yaxis=dict(
106
- range=[20.5, 0.5],
107
  tickmode='linear',
108
  tick0=1,
109
  dtick=1
@@ -172,66 +209,97 @@ def update_display(selection):
172
  return None, gr.HTML(value=f"<div style='color: red;'>Error processing data: {str(e)}</div>")
173
 
174
  def load_and_process_data():
 
 
 
 
 
 
 
175
  try:
176
  url = "https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/spaces.parquet"
177
  response = requests.get(url)
178
  df = pd.read_parquet(BytesIO(response.content))
179
 
 
180
  thirty_days_ago = datetime.now() - timedelta(days=30)
181
  df['createdAt'] = pd.to_datetime(df['createdAt'])
 
 
182
  df = df[df['createdAt'] >= thirty_days_ago].copy()
183
 
 
 
 
 
 
 
 
 
 
 
184
  dates = pd.date_range(start=thirty_days_ago, end=datetime.now(), freq='D')
185
  daily_ranks = []
186
 
 
187
  for date in dates:
 
188
  date_data = df[df['createdAt'].dt.date <= date.date()].copy()
 
189
  date_data = date_data.sort_values(['trendingScore', 'id'], ascending=[False, True])
190
  date_data['rank'] = range(1, len(date_data) + 1)
191
  date_data['date'] = date.date()
 
192
  daily_ranks.append(
193
  date_data[['id', 'date', 'rank', 'trendingScore', 'createdAt']]
194
  )
195
 
 
196
  daily_ranks_df = pd.concat(daily_ranks, ignore_index=True)
197
 
 
198
  latest_date = daily_ranks_df['date'].max()
199
  top_100_spaces = daily_ranks_df[
200
  (daily_ranks_df['date'] == latest_date) &
201
  (daily_ranks_df['rank'] <= 100)
202
  ].sort_values('rank').copy()
203
 
 
 
 
204
  return daily_ranks_df, top_100_spaces
205
  except Exception as e:
206
  print(f"Error loading data: {e}")
207
  return pd.DataFrame(), pd.DataFrame()
208
 
209
- # ๋ฐ์ดํ„ฐ ๋กœ๋“œ
210
  print("Loading initial data...")
211
  daily_ranks_df, top_100_spaces = load_and_process_data()
212
  print("Data loaded successfully!")
213
 
214
- # ์ค‘๋ณต ์ŠคํŽ˜์ด์Šค ๋ฐ์ดํ„ฐ ๊ณ„์‚ฐ
215
  duplicates = get_duplicate_spaces(top_100_spaces)
216
  duplicates_chart = create_duplicates_chart(duplicates)
217
 
218
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
219
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
220
  gr.Markdown("""
221
- # HF Space Ranking Tracker(~30 Dailys)
222
 
223
- Track, analyze, and discover trending AI applications in the Hugging Face ecosystem. Our service continuously monitors and ranks all Spaces over a 30-day period, providing detailed analytics and daily ranking changes for the top 100 performers.
 
 
224
  """)
225
 
226
  with gr.Tabs():
227
  with gr.Tab("Dashboard"):
228
  with gr.Row(variant="panel"):
229
- with gr.Column(scale=5): # ์ˆ˜์ •๋œ ๋ถ€๋ถ„
230
  trend_plot = gr.Plot(
231
  label="Daily Rank Trend",
232
  container=True
233
  )
234
- with gr.Column(scale=5): # ์ˆ˜์ •๋œ ๋ถ€๋ถ„
235
  duplicates_plot = gr.Plot(
236
  label="Multiple Entries Analysis",
237
  value=duplicates_chart,
@@ -243,12 +311,14 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
243
  value="<div style='text-align: center; padding: 20px; color: #666;'>Select a space to view details</div>"
244
  )
245
 
 
246
  space_selection = gr.Radio(
247
  choices=[row['id'] for _, row in top_100_spaces.iterrows()],
248
  value=None,
249
  visible=False
250
  )
251
 
 
252
  html_content = """
253
  <div style='display: flex; flex-wrap: wrap; gap: 16px; justify-content: center;'>
254
  """ + "".join([
@@ -299,6 +369,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
299
  </div>
300
  <script>
301
  function gradioEvent(spaceId) {
 
302
  const radio = document.querySelector(`input[type="radio"][value="${spaceId}"]`);
303
  if (radio) {
304
  radio.checked = true;
@@ -336,11 +407,16 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
336
  - Make data-driven decisions about your AI projects
337
  - Stay ahead of the curve in AI application development
338
 
339
- Our dashboard provides a comprehensive view of the Hugging Face Spaces ecosystem, helping developers, researchers, and enthusiasts track and understand the dynamics of popular AI applications. Whether you're monitoring your own Space's performance or discovering new trending applications, HF Space Ranking Tracker offers the insights you need.
 
 
 
340
 
341
- Experience the pulse of the AI community through our daily updated rankings and discover what's making waves in the world of practical AI applications.
 
342
  """)
343
-
 
344
  space_selection.change(
345
  fn=update_display,
346
  inputs=[space_selection],
@@ -349,4 +425,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
349
  )
350
 
351
  if __name__ == "__main__":
352
- demo.launch(share=True)
 
26
  height=500 # ์ˆ˜์ •๋œ ๋ถ€๋ถ„
27
  )
28
 
29
+ fig.update_layout(
30
+ xaxis_title="Date",
31
+ yaxis_title="Rank",
32
+ yaxis=dict(
33
+ range=[100, 1],
34
+ tickmode='linear',import gradio as gr
35
+ import pandas as pd
36
+ import plotly.express as px
37
+ from datetime import datetime, timedelta
38
+ import requests
39
+ from io import BytesIO
40
+
41
+ def create_trend_chart(space_id, daily_ranks_df):
42
+ if space_id is None or daily_ranks_df.empty:
43
+ return None
44
+
45
+ try:
46
+ space_data = daily_ranks_df[daily_ranks_df['id'] == space_id].copy()
47
+ if space_data.empty:
48
+ return None
49
+
50
+ space_data = space_data.sort_values('date')
51
+
52
+ fig = px.line(
53
+ space_data,
54
+ x='date',
55
+ y='rank',
56
+ title=f'Daily Rank Trend for {space_id}',
57
+ labels={'date': 'Date', 'rank': 'Rank'},
58
+ markers=True,
59
+ height=500 # ํ•„์š”์‹œ ์กฐ์ •
60
+ )
61
+
62
  fig.update_layout(
63
  xaxis_title="Date",
64
  yaxis_title="Rank",
 
90
  return None
91
 
92
  def get_duplicate_spaces(top_100_spaces):
93
+ """
94
+ top_100_spaces ์•ˆ์—์„œ username/spacename ํ˜•ํƒœ์˜ id์—์„œ username๋งŒ ๋–ผ์–ด๋‚ธ ํ›„
95
+ (clean_id), ํ•ด๋‹น username์— ์†ํ•œ ์—ฌ๋Ÿฌ ์ŠคํŽ˜์ด์Šค ์ ์ˆ˜๋ฅผ ํ•ฉ์‚ฐํ•˜์—ฌ ์ƒ์œ„ 20์„ ์ถ”์ถœ
96
+ """
97
+ # clean_id ์ถ”์ถœ
98
  top_100_spaces['clean_id'] = top_100_spaces['id'].apply(lambda x: x.split('/')[0])
99
 
100
+ # username๋ณ„ ํŠธ๋ Œ๋”ฉ ์Šค์ฝ”์–ด ํ•ฉ์‚ฐ
101
  score_sums = top_100_spaces.groupby('clean_id')['trendingScore'].sum()
102
 
103
  # ๋””๋ฒ„๊น…์šฉ ์ถœ๋ ฅ
104
+ print("\n=== ID๋ณ„ ์Šค์ฝ”์–ด ํ•ฉ์‚ฐ ๊ฒฐ๊ณผ (์ƒ์œ„ 20) ===")
105
+ for cid, score in score_sums.sort_values(ascending=False).head(20).items():
106
+ print(f"Clean ID: {cid}, Total Score: {score}")
107
 
108
+ # ์ƒ์œ„ 20๊ฐœ๋งŒ ์ถ”์ถœ
109
  top_20_scores = score_sums.sort_values(ascending=False).head(20)
110
  return top_20_scores
111
 
 
121
  })
122
 
123
  # ๋””๋ฒ„๊น…์šฉ ์ถœ๋ ฅ
124
+ print("\n=== ์ฐจํŠธ ๋ฐ์ดํ„ฐ (clean_id ๋‹จ์œ„) ===")
125
  print(df)
126
 
127
  fig = px.bar(
 
129
  x='id',
130
  y='rank',
131
  title="Top 20 Spaces by Combined Trending Score",
132
+ height=500, # ํ•„์š”์‹œ ์กฐ์ •
133
  text='total_score'
134
  )
135
 
 
140
  paper_bgcolor='white',
141
  xaxis_tickangle=-45,
142
  yaxis=dict(
143
+ range=[len(df) + 0.5, 0.5], # ์ƒ์œ„ 20๊ฐœ ๊ธฐ์ค€
144
  tickmode='linear',
145
  tick0=1,
146
  dtick=1
 
209
  return None, gr.HTML(value=f"<div style='color: red;'>Error processing data: {str(e)}</div>")
210
 
211
  def load_and_process_data():
212
+ """
213
+ - spaces.parquet ํŒŒ์ผ์„ ๋กœ๋“œ ํ›„ 30์ผ ์ด๋‚ด ๋ฐ์ดํ„ฐ๋งŒ ํ•„ํ„ฐ๋ง.
214
+ - ์ค‘๋ณต ๋ฐฉ์ง€:
215
+ 1) (์„ ํƒ) createdAt/ID ๊ธฐ์ค€์œผ๋กœ ์ค‘๋ณต ์ œ๊ฑฐ (๋™์ผ ์‹œ๊ฐ„๋Œ€์— ์—ฌ๋Ÿฌ๋ฒˆ ๊ธฐ๋ก๋œ Space๊ฐ€ ์žˆ์œผ๋ฉด)
216
+ 2) ๋‚ ์งœ๋ณ„๋กœ ๋žญํ‚น ์‚ฐ์ • -> daily_ranks_df
217
+ 3) ์ตœ์ข… ์ตœ์‹  ๋‚ ์งœ ๊ธฐ์ค€ Top 100 ์ถ”์ถœ ํ›„ ๋™์ผ ID ์ค‘๋ณต ์ œ๊ฑฐ
218
+ """
219
  try:
220
  url = "https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/main/spaces.parquet"
221
  response = requests.get(url)
222
  df = pd.read_parquet(BytesIO(response.content))
223
 
224
+ # 30์ผ ์ „ ์‹œ์  ๊ณ„์‚ฐ
225
  thirty_days_ago = datetime.now() - timedelta(days=30)
226
  df['createdAt'] = pd.to_datetime(df['createdAt'])
227
+
228
+ # 30์ผ ๋‚ด์— ์ƒ์„ฑ๋œ ๊ธฐ๋ก๋งŒ ํ•„ํ„ฐ๋ง
229
  df = df[df['createdAt'] >= thirty_days_ago].copy()
230
 
231
+ # (์„ ํƒ) createdAt & id ๊ธฐ์ค€ ์ค‘๋ณต ์ œ๊ฑฐ
232
+ # ๋งŒ์•ฝ ๋™์ผ createdAt ์‹œ์ ์— ๋™์ผ id๊ฐ€ ์—ฌ๋Ÿฌ ํ–‰์œผ๋กœ ๋“ค์–ด์˜จ ๊ฒฝ์šฐ ๊ฐ€์žฅ ์ตœ์‹ (๋˜๋Š” ๊ฐ€์žฅ ๋†’์€ ์Šค์ฝ”์–ด)๋งŒ ๋‚จ๊น€
233
+ df = (
234
+ df
235
+ .sort_values(['createdAt', 'trendingScore'], ascending=[True, False])
236
+ .drop_duplicates(subset=['createdAt', 'id'], keep='first')
237
+ .reset_index(drop=True)
238
+ )
239
+
240
+ # ๋‚ ์งœ ๋ฒ”์œ„ ์ƒ์„ฑ
241
  dates = pd.date_range(start=thirty_days_ago, end=datetime.now(), freq='D')
242
  daily_ranks = []
243
 
244
+ # ๋‚ ์งœ๋ณ„๋กœ rank ๊ณ„์‚ฐ
245
  for date in dates:
246
+ # date ๊ธฐ์ค€์œผ๋กœ createdAt์ด date ์ดํ•˜์ธ ์ŠคํŽ˜์ด์Šค๋งŒ ์ถ”์ถœ
247
  date_data = df[df['createdAt'].dt.date <= date.date()].copy()
248
+ # trendingScore ๋‚ด๋ฆผ์ฐจ์ˆœ, id ์˜ค๋ฆ„์ฐจ์ˆœ ์ •๋ ฌ
249
  date_data = date_data.sort_values(['trendingScore', 'id'], ascending=[False, True])
250
  date_data['rank'] = range(1, len(date_data) + 1)
251
  date_data['date'] = date.date()
252
+
253
  daily_ranks.append(
254
  date_data[['id', 'date', 'rank', 'trendingScore', 'createdAt']]
255
  )
256
 
257
+ # ์ผ์ž๋ณ„ ๋žญํ‚น ๋ฐ์ดํ„ฐ๋ฅผ ํ•ฉ์นจ
258
  daily_ranks_df = pd.concat(daily_ranks, ignore_index=True)
259
 
260
+ # ์ตœ์‹  ๋‚ ์งœ ๊ธฐ์ค€ Top 100 ์ถ”์ถœ
261
  latest_date = daily_ranks_df['date'].max()
262
  top_100_spaces = daily_ranks_df[
263
  (daily_ranks_df['date'] == latest_date) &
264
  (daily_ranks_df['rank'] <= 100)
265
  ].sort_values('rank').copy()
266
 
267
+ # ํ˜น์‹œ ์ค‘๋ณต(id๊ฐ€ ๋™์ผ) ํ–‰์ด ์žˆ์„ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ ํ•œ ๋ฒˆ ๋” ์ œ๊ฑฐ
268
+ top_100_spaces = top_100_spaces.drop_duplicates(subset=['id'], keep='first').reset_index(drop=True)
269
+
270
  return daily_ranks_df, top_100_spaces
271
  except Exception as e:
272
  print(f"Error loading data: {e}")
273
  return pd.DataFrame(), pd.DataFrame()
274
 
275
+ # ์‹ค์ œ ์‹คํ–‰: ๋ฐ์ดํ„ฐ ๋กœ๋“œ
276
  print("Loading initial data...")
277
  daily_ranks_df, top_100_spaces = load_and_process_data()
278
  print("Data loaded successfully!")
279
 
280
+ # ์ค‘๋ณต ์ŠคํŽ˜์ด์Šค ๋ฐ์ดํ„ฐ(= ๋™์ผ username์ด ์—ฌ๋Ÿฌ ์ŠคํŽ˜์ด์Šค ์šด์˜)๋ฅผ ๊ณ„์‚ฐ
281
  duplicates = get_duplicate_spaces(top_100_spaces)
282
  duplicates_chart = create_duplicates_chart(duplicates)
283
 
284
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
285
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
286
  gr.Markdown("""
287
+ # HF Space Ranking Tracker (~30 Days)
288
 
289
+ Track, analyze, and discover trending AI applications in the Hugging Face ecosystem.
290
+ Our service continuously monitors and ranks all Spaces over a 30-day period,
291
+ providing detailed analytics and daily ranking changes for the top 100 performers.
292
  """)
293
 
294
  with gr.Tabs():
295
  with gr.Tab("Dashboard"):
296
  with gr.Row(variant="panel"):
297
+ with gr.Column(scale=5):
298
  trend_plot = gr.Plot(
299
  label="Daily Rank Trend",
300
  container=True
301
  )
302
+ with gr.Column(scale=5):
303
  duplicates_plot = gr.Plot(
304
  label="Multiple Entries Analysis",
305
  value=duplicates_chart,
 
311
  value="<div style='text-align: center; padding: 20px; color: #666;'>Select a space to view details</div>"
312
  )
313
 
314
+ # Radio ๋ฒ„ํŠผ์€ ์ˆจ๊ฒจ๋‘๊ณ , ์นด๋“œ ํด๋ฆญ์œผ๋กœ ์„ ํƒํ•˜๋„๋ก ๊ตฌ์„ฑ
315
  space_selection = gr.Radio(
316
  choices=[row['id'] for _, row in top_100_spaces.iterrows()],
317
  value=None,
318
  visible=False
319
  )
320
 
321
+ # Top 100์„ ์นด๋“œ ํ˜•ํƒœ๋กœ ํ‘œ์‹œ
322
  html_content = """
323
  <div style='display: flex; flex-wrap: wrap; gap: 16px; justify-content: center;'>
324
  """ + "".join([
 
369
  </div>
370
  <script>
371
  function gradioEvent(spaceId) {
372
+ // Radio ๋ฒ„ํŠผ ์ค‘์—์„œ ํ•ด๋‹น value๋ฅผ ๊ฐ€์ง„ ํ•ญ๋ชฉ์„ ์ฐพ์•„ ์„ ํƒ ์ด๋ฒคํŠธ ๋ฐœ์ƒ
373
  const radio = document.querySelector(`input[type="radio"][value="${spaceId}"]`);
374
  if (radio) {
375
  radio.checked = true;
 
407
  - Make data-driven decisions about your AI projects
408
  - Stay ahead of the curve in AI application development
409
 
410
+ Our dashboard provides a comprehensive view of the Hugging Face Spaces ecosystem,
411
+ helping developers, researchers, and enthusiasts track and understand the dynamics of popular AI applications.
412
+ Whether you're monitoring your own Space's performance or discovering new trending applications,
413
+ HF Space Ranking Tracker offers the insights you need.
414
 
415
+ Experience the pulse of the AI community through our daily updated rankings and discover
416
+ what's making waves in the world of practical AI applications.
417
  """)
418
+
419
+ # ์ŠคํŽ˜์ด์Šค ์„ ํƒ์‹œ ์ฐจํŠธ/์ •๋ณด ์—…๋ฐ์ดํŠธ
420
  space_selection.change(
421
  fn=update_display,
422
  inputs=[space_selection],
 
425
  )
426
 
427
  if __name__ == "__main__":
428
+ demo.launch(share=True)