sanchit-gandhi HF staff commited on
Commit
da67f65
1 Parent(s): a6ebfe4
Files changed (2) hide show
  1. app.py +131 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ from huggingface_hub import Repository
3
+ import os
4
+ import pandas as pd
5
+ import streamlit as st
6
+ import altair as alt
7
+ import numpy as np
8
+ import plotly.graph_objects as go
9
+
10
+ today = datetime.date.today()
11
+ year, week, _ = today.isocalendar()
12
+
13
+ DATASET_REPO_URL = (
14
+ "https://huggingface.co/datasets/huggingface/transformers-stats-space-data"
15
+ )
16
+
17
+ DATA_FILENAME = f"data_{week}_{year}.csv"
18
+ DATA_FILE = os.path.join("data", DATA_FILENAME)
19
+
20
+ MODELS_TO_TRACK = ["wav2vec2", "whisper"]
21
+
22
+ repo = Repository(local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=True)
23
+
24
+ valid_weeks = []
25
+ download_results = []
26
+ model_download_results = {model_name: [] for model_name in MODELS_TO_TRACK}
27
+
28
+ # loop over past data, finding where we have data saved (valid weeks) and tracking monthly downloads for each week
29
+ for i in range(1, week + 1)[::-1]:
30
+ data_filename = f"data_{i}_{year}.csv"
31
+ data_file = os.path.join("data", data_filename)
32
+
33
+ if os.path.exists(data_file):
34
+ valid_weeks.append(i)
35
+
36
+ dataframe = pd.read_csv(data_file)
37
+ df_audio = dataframe[dataframe["modality"] == "audio"]
38
+
39
+ audio_int_downloads = {model: int(x.replace(",", "")) for model, x in
40
+ zip(df_audio["model_names"], df_audio["num_downloads"].values)}
41
+
42
+ download_results.append(sum(audio_int_downloads.values()))
43
+ for model_name in MODELS_TO_TRACK:
44
+ model_download_results[model_name].append(audio_int_downloads.get(model_name))
45
+
46
+ last_year = year - 1
47
+ last_week = 52
48
+ data_filename = f"data_{last_week}_{last_year}.csv"
49
+ data_file = os.path.join("data", data_filename)
50
+
51
+ if os.path.exists(data_file):
52
+ valid_weeks.append(0)
53
+
54
+ dataframe = pd.read_csv(data_file)
55
+ df_audio = dataframe[dataframe["modality"] == "audio"]
56
+
57
+ audio_int_downloads = {model: int(x.replace(",", "")) for model, x in
58
+ zip(df_audio["model_names"], df_audio["num_downloads"].values)}
59
+
60
+ download_results.append(sum(audio_int_downloads.values()))
61
+ for model_name in MODELS_TO_TRACK:
62
+ model_download_results[model_name].append(audio_int_downloads.get(model_name))
63
+
64
+ fig = go.Figure()
65
+ fig.update_layout(
66
+ title="Monthly downloads",
67
+ xaxis_title="Week",
68
+ yaxis_title="Downloads",)
69
+
70
+ fig.add_trace(
71
+ go.Scatter(x=valid_weeks, y=download_results, mode='lines+markers', name="Total")
72
+ )
73
+
74
+ for model_name in MODELS_TO_TRACK:
75
+ fig.add_trace(
76
+ go.Scatter(x=valid_weeks, y=model_download_results[model_name], mode='lines+markers', name=model_name)
77
+ )
78
+
79
+ st.title("Audio Stats")
80
+ st.plotly_chart(fig)
81
+
82
+
83
+ week = st.selectbox(
84
+ "Week",
85
+ valid_weeks,
86
+ index=0,
87
+ help="Filter the download results by week"
88
+ )
89
+
90
+ DATA_FILENAME = f"data_{week}_{year}.csv"
91
+ DATA_FILE = os.path.join("data", DATA_FILENAME)
92
+
93
+ with open(DATA_FILE, "r") as f:
94
+ dataframe = pd.read_csv(DATA_FILE)
95
+
96
+ st.header(f"Stats for year {year} and week {week}")
97
+
98
+ # print audio
99
+ df_audio = dataframe[dataframe["modality"] == "audio"]
100
+ audio_int_downloads = np.array(
101
+ [int(x.replace(",", "")) for x in df_audio["num_downloads"].values]
102
+ )
103
+ source = pd.DataFrame(
104
+ {
105
+ "Number of total downloads": audio_int_downloads,
106
+ "Model architecture name": df_audio["model_names"].values,
107
+ }
108
+ )
109
+ bar_chart = (
110
+ alt.Chart(source)
111
+ .mark_bar()
112
+ .encode(
113
+ y="Number of total downloads",
114
+ x=alt.X("Model architecture name", sort=None),
115
+ )
116
+ )
117
+ st.subheader(f"Top audio downloads last 30 days")
118
+ st.altair_chart(bar_chart, use_container_width=True)
119
+
120
+ st.subheader("Audio stats last 30 days")
121
+
122
+ dataframe = dataframe[dataframe["modality"] == "audio"].drop("modality", axis=1)
123
+ dataframe.loc["Total"] = dataframe.sum(numeric_only=True)
124
+ total_audio_downloads = sum(audio_int_downloads)
125
+
126
+ # nice formatting
127
+ dataframe.at["Total", "num_downloads"] = "{:,}".format(total_audio_downloads)
128
+ dataframe.at["Total", "model_names"] = ""
129
+ dataframe.at["Total", "download_per_model"] = ""
130
+
131
+ st.table(dataframe)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ huggingface_hub
2
+ plotly