Spaces:
Sleeping
Sleeping
most filtering is done
Browse files
app.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
|
|
3 |
|
4 |
# Load the spaces.parquet file as a dataframe
|
5 |
df = pd.read_parquet("spaces.parquet")
|
6 |
-
|
7 |
"""
|
8 |
Todos:
|
9 |
Create tabbed interface for filtering and graphs
|
@@ -14,10 +14,40 @@ Todos:
|
|
14 |
Plotly graph of hardware
|
15 |
Investigate README lengths
|
16 |
bar chart of the number of spaces per author
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
"""
|
18 |
|
19 |
|
20 |
-
def filtered_df(emoji, likes):
|
21 |
_df = df
|
22 |
# if emoji is not none, filter the dataframe with it
|
23 |
if emoji:
|
@@ -25,11 +55,50 @@ def filtered_df(emoji, likes):
|
|
25 |
# if likes is not none, filter the dataframe with it
|
26 |
if likes:
|
27 |
_df = _df[_df["likes"] >= likes]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
return _df
|
29 |
|
30 |
|
31 |
with gr.Blocks() as demo:
|
32 |
df = df[df["stage"] == "RUNNING"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
emoji = gr.Dropdown(
|
34 |
df["emoji"].unique().tolist(), label="Search by Emoji 🤗", multiselect=True
|
35 |
) # Dropdown to select the emoji
|
@@ -42,13 +111,87 @@ with gr.Blocks() as demo:
|
|
42 |
hardware = gr.Dropdown(
|
43 |
df["hardware"].unique().tolist(), label="Search by Hardware", multiselect=True
|
44 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
devMode = gr.Checkbox(value=False, label="DevMode Enabled")
|
46 |
clear = gr.ClearButton(components=[emoji])
|
47 |
|
48 |
-
df = pd.DataFrame(
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
|
53 |
-
print(df.head())
|
54 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
|
5 |
# Load the spaces.parquet file as a dataframe
|
6 |
df = pd.read_parquet("spaces.parquet")
|
|
|
7 |
"""
|
8 |
Todos:
|
9 |
Create tabbed interface for filtering and graphs
|
|
|
14 |
Plotly graph of hardware
|
15 |
Investigate README lengths
|
16 |
bar chart of the number of spaces per author
|
17 |
+
Is there a correlation between pinning a space and the number of likes?
|
18 |
+
Is a correlation between the emoji and the number of likes?
|
19 |
+
distribution of python versions
|
20 |
+
what models are most used
|
21 |
+
what organizations are most popular in terms of their models and datasets being used
|
22 |
+
most duplicated spaces
|
23 |
+
|
24 |
+
"id",
|
25 |
+
"author",
|
26 |
+
"created_at",
|
27 |
+
"last_modified",
|
28 |
+
"subdomain",
|
29 |
+
"host",
|
30 |
+
"likes",
|
31 |
+
"sdk",
|
32 |
+
"tags",
|
33 |
+
"readme_size",
|
34 |
+
"python_version",
|
35 |
+
"license",
|
36 |
+
"duplicated_from",
|
37 |
+
"models",
|
38 |
+
"datasets",
|
39 |
+
"emoji",
|
40 |
+
"colorFrom",
|
41 |
+
"colorTo",
|
42 |
+
"pinned",
|
43 |
+
"stage",
|
44 |
+
"hardware",
|
45 |
+
"devMode",
|
46 |
+
"custom_domains",
|
47 |
"""
|
48 |
|
49 |
|
50 |
+
def filtered_df(emoji, likes, author, hardware, tags, models, datasets):
|
51 |
_df = df
|
52 |
# if emoji is not none, filter the dataframe with it
|
53 |
if emoji:
|
|
|
55 |
# if likes is not none, filter the dataframe with it
|
56 |
if likes:
|
57 |
_df = _df[_df["likes"] >= likes]
|
58 |
+
if author:
|
59 |
+
_df = _df[_df["author"].isin(author)]
|
60 |
+
if hardware:
|
61 |
+
_df = _df[_df["hardware"].isin(hardware)]
|
62 |
+
# check to see if the array of sdk_tags contains any of the selected tags
|
63 |
+
if tags:
|
64 |
+
_df = _df[_df["sdk_tags"].apply(lambda x: any(tag in x for tag in tags))]
|
65 |
+
if models:
|
66 |
+
_df = _df[
|
67 |
+
_df["models"].apply(
|
68 |
+
lambda x: (
|
69 |
+
any(model in x for model in models) if x is not None else False
|
70 |
+
)
|
71 |
+
)
|
72 |
+
]
|
73 |
+
if datasets:
|
74 |
+
_df = _df[
|
75 |
+
_df["datasets"].apply(
|
76 |
+
lambda x: (
|
77 |
+
any(dataset in x for dataset in datasets)
|
78 |
+
if x is not None
|
79 |
+
else False
|
80 |
+
)
|
81 |
+
)
|
82 |
+
]
|
83 |
return _df
|
84 |
|
85 |
|
86 |
with gr.Blocks() as demo:
|
87 |
df = df[df["stage"] == "RUNNING"]
|
88 |
+
# combine the sdk and tags columns, one of which is a string and the other is an array of strings
|
89 |
+
# first convert the sdk column to an array of strings
|
90 |
+
df["sdk"] = df["sdk"].apply(lambda x: np.array([x]))
|
91 |
+
# then combine the sdk and tags columns so that their elements are together
|
92 |
+
df["sdk_tags"] = df[["sdk", "tags"]].apply(
|
93 |
+
lambda x: np.concatenate((x[0], x[1])), axis=1
|
94 |
+
)
|
95 |
+
|
96 |
+
# where the custom_domains column is not null, use that as the url, otherwise, use the host column
|
97 |
+
df["url"] = np.where(
|
98 |
+
df["custom_domains"].isnull(),
|
99 |
+
df["id"],
|
100 |
+
df["custom_domains"],
|
101 |
+
)
|
102 |
emoji = gr.Dropdown(
|
103 |
df["emoji"].unique().tolist(), label="Search by Emoji 🤗", multiselect=True
|
104 |
) # Dropdown to select the emoji
|
|
|
111 |
hardware = gr.Dropdown(
|
112 |
df["hardware"].unique().tolist(), label="Search by Hardware", multiselect=True
|
113 |
)
|
114 |
+
author = gr.Dropdown(
|
115 |
+
df["author"].unique().tolist(), label="Search by Author", multiselect=True
|
116 |
+
)
|
117 |
+
# get the list of unique strings in the sdk_tags column
|
118 |
+
sdk_tags = np.unique(np.concatenate(df["sdk_tags"].values))
|
119 |
+
# create a dropdown for the sdk_tags
|
120 |
+
sdk_tags = gr.Dropdown(
|
121 |
+
sdk_tags.tolist(), label="Filter by SDK/Tags", multiselect=True
|
122 |
+
)
|
123 |
+
# create a gradio checkbox group for hardware
|
124 |
+
hardware = gr.CheckboxGroup(
|
125 |
+
df["hardware"].unique().tolist(), label="Filter by Hardware"
|
126 |
+
)
|
127 |
+
|
128 |
+
space_license = gr.CheckboxGroup(
|
129 |
+
df["license"].unique().tolist(), label="Filter by license"
|
130 |
+
)
|
131 |
+
|
132 |
+
# Assuming df is your dataframe and 'array_column' is the column containing np.array of strings
|
133 |
+
array_column_as_lists = df["models"].apply(
|
134 |
+
lambda x: np.array(["None"]) if np.ndim(x) == 0 else x
|
135 |
+
)
|
136 |
+
# Now, flatten all arrays into one list
|
137 |
+
flattened_strings = np.concatenate(array_column_as_lists.values)
|
138 |
+
# Get unique strings
|
139 |
+
unique_strings = np.unique(flattened_strings)
|
140 |
+
# Convert to a list if needed
|
141 |
+
unique_strings_list = unique_strings.tolist()
|
142 |
+
models = gr.Dropdown(
|
143 |
+
unique_strings_list,
|
144 |
+
label="Search by Model",
|
145 |
+
multiselect=True,
|
146 |
+
)
|
147 |
+
|
148 |
+
# Assuming df is your dataframe and 'array_column' is the column containing np.array of strings
|
149 |
+
array_column_as_lists = df["datasets"].apply(
|
150 |
+
lambda x: np.array(["None"]) if np.ndim(x) == 0 else x
|
151 |
+
)
|
152 |
+
|
153 |
+
# Now, flatten all arrays into one list
|
154 |
+
flattened_strings = np.concatenate(array_column_as_lists.values)
|
155 |
+
# Get unique strings
|
156 |
+
unique_strings = np.unique(flattened_strings)
|
157 |
+
# Convert to a list if needed
|
158 |
+
unique_strings_list = unique_strings.tolist()
|
159 |
+
datasets = gr.Dropdown(
|
160 |
+
unique_strings_list,
|
161 |
+
label="Search by Model",
|
162 |
+
multiselect=True,
|
163 |
+
)
|
164 |
+
|
165 |
devMode = gr.Checkbox(value=False, label="DevMode Enabled")
|
166 |
clear = gr.ClearButton(components=[emoji])
|
167 |
|
168 |
+
df = pd.DataFrame(
|
169 |
+
df[
|
170 |
+
[
|
171 |
+
"id",
|
172 |
+
"emoji",
|
173 |
+
"author",
|
174 |
+
"url",
|
175 |
+
"likes",
|
176 |
+
"hardware",
|
177 |
+
"sdk_tags",
|
178 |
+
"models",
|
179 |
+
"datasets",
|
180 |
+
]
|
181 |
+
]
|
182 |
+
)
|
183 |
+
df["url"] = df["url"].apply(
|
184 |
+
lambda x: (
|
185 |
+
f"<a target='_blank' href=https://huggingface.co/spaces/{x}>{x}</a>"
|
186 |
+
if x is not None and "/" in x
|
187 |
+
else f"<a target='_blank' href=https://{x[0]}>{x[0]}</a>"
|
188 |
+
)
|
189 |
+
)
|
190 |
+
gr.DataFrame(
|
191 |
+
filtered_df,
|
192 |
+
inputs=[emoji, likes, author, hardware, sdk_tags, models, datasets],
|
193 |
+
datatype="html",
|
194 |
+
)
|
195 |
|
196 |
|
|
|
197 |
demo.launch()
|