jsulz HF staff commited on
Commit
c1b5e3a
1 Parent(s): dc572b4

most filtering is done

Browse files
Files changed (1) hide show
  1. app.py +149 -6
app.py CHANGED
@@ -1,9 +1,9 @@
1
  import gradio as gr
2
  import pandas as pd
 
3
 
4
  # Load the spaces.parquet file as a dataframe
5
  df = pd.read_parquet("spaces.parquet")
6
-
7
  """
8
  Todos:
9
  Create tabbed interface for filtering and graphs
@@ -14,10 +14,40 @@ Todos:
14
  Plotly graph of hardware
15
  Investigate README lengths
16
  bar chart of the number of spaces per author
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  """
18
 
19
 
20
- def filtered_df(emoji, likes):
21
  _df = df
22
  # if emoji is not none, filter the dataframe with it
23
  if emoji:
@@ -25,11 +55,50 @@ def filtered_df(emoji, likes):
25
  # if likes is not none, filter the dataframe with it
26
  if likes:
27
  _df = _df[_df["likes"] >= likes]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  return _df
29
 
30
 
31
  with gr.Blocks() as demo:
32
  df = df[df["stage"] == "RUNNING"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  emoji = gr.Dropdown(
34
  df["emoji"].unique().tolist(), label="Search by Emoji 🤗", multiselect=True
35
  ) # Dropdown to select the emoji
@@ -42,13 +111,87 @@ with gr.Blocks() as demo:
42
  hardware = gr.Dropdown(
43
  df["hardware"].unique().tolist(), label="Search by Hardware", multiselect=True
44
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  devMode = gr.Checkbox(value=False, label="DevMode Enabled")
46
  clear = gr.ClearButton(components=[emoji])
47
 
48
- df = pd.DataFrame(df[["emoji", "host", "likes", "hardware"]])
49
- df["host"] = df["host"].apply(lambda x: f"<a href={x}>{x}</a>")
50
- gr.DataFrame(filtered_df, inputs=[emoji, likes], datatype=["str", "html"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
 
53
- print(df.head())
54
  demo.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import numpy as np
4
 
5
  # Load the spaces.parquet file as a dataframe
6
  df = pd.read_parquet("spaces.parquet")
 
7
  """
8
  Todos:
9
  Create tabbed interface for filtering and graphs
 
14
  Plotly graph of hardware
15
  Investigate README lengths
16
  bar chart of the number of spaces per author
17
+ Is there a correlation between pinning a space and the number of likes?
18
+ Is a correlation between the emoji and the number of likes?
19
+ distribution of python versions
20
+ what models are most used
21
+ what organizations are most popular in terms of their models and datasets being used
22
+ most duplicated spaces
23
+
24
+ "id",
25
+ "author",
26
+ "created_at",
27
+ "last_modified",
28
+ "subdomain",
29
+ "host",
30
+ "likes",
31
+ "sdk",
32
+ "tags",
33
+ "readme_size",
34
+ "python_version",
35
+ "license",
36
+ "duplicated_from",
37
+ "models",
38
+ "datasets",
39
+ "emoji",
40
+ "colorFrom",
41
+ "colorTo",
42
+ "pinned",
43
+ "stage",
44
+ "hardware",
45
+ "devMode",
46
+ "custom_domains",
47
  """
48
 
49
 
50
+ def filtered_df(emoji, likes, author, hardware, tags, models, datasets):
51
  _df = df
52
  # if emoji is not none, filter the dataframe with it
53
  if emoji:
 
55
  # if likes is not none, filter the dataframe with it
56
  if likes:
57
  _df = _df[_df["likes"] >= likes]
58
+ if author:
59
+ _df = _df[_df["author"].isin(author)]
60
+ if hardware:
61
+ _df = _df[_df["hardware"].isin(hardware)]
62
+ # check to see if the array of sdk_tags contains any of the selected tags
63
+ if tags:
64
+ _df = _df[_df["sdk_tags"].apply(lambda x: any(tag in x for tag in tags))]
65
+ if models:
66
+ _df = _df[
67
+ _df["models"].apply(
68
+ lambda x: (
69
+ any(model in x for model in models) if x is not None else False
70
+ )
71
+ )
72
+ ]
73
+ if datasets:
74
+ _df = _df[
75
+ _df["datasets"].apply(
76
+ lambda x: (
77
+ any(dataset in x for dataset in datasets)
78
+ if x is not None
79
+ else False
80
+ )
81
+ )
82
+ ]
83
  return _df
84
 
85
 
86
  with gr.Blocks() as demo:
87
  df = df[df["stage"] == "RUNNING"]
88
+ # combine the sdk and tags columns, one of which is a string and the other is an array of strings
89
+ # first convert the sdk column to an array of strings
90
+ df["sdk"] = df["sdk"].apply(lambda x: np.array([x]))
91
+ # then combine the sdk and tags columns so that their elements are together
92
+ df["sdk_tags"] = df[["sdk", "tags"]].apply(
93
+ lambda x: np.concatenate((x[0], x[1])), axis=1
94
+ )
95
+
96
+ # where the custom_domains column is not null, use that as the url, otherwise, use the host column
97
+ df["url"] = np.where(
98
+ df["custom_domains"].isnull(),
99
+ df["id"],
100
+ df["custom_domains"],
101
+ )
102
  emoji = gr.Dropdown(
103
  df["emoji"].unique().tolist(), label="Search by Emoji 🤗", multiselect=True
104
  ) # Dropdown to select the emoji
 
111
  hardware = gr.Dropdown(
112
  df["hardware"].unique().tolist(), label="Search by Hardware", multiselect=True
113
  )
114
+ author = gr.Dropdown(
115
+ df["author"].unique().tolist(), label="Search by Author", multiselect=True
116
+ )
117
+ # get the list of unique strings in the sdk_tags column
118
+ sdk_tags = np.unique(np.concatenate(df["sdk_tags"].values))
119
+ # create a dropdown for the sdk_tags
120
+ sdk_tags = gr.Dropdown(
121
+ sdk_tags.tolist(), label="Filter by SDK/Tags", multiselect=True
122
+ )
123
+ # create a gradio checkbox group for hardware
124
+ hardware = gr.CheckboxGroup(
125
+ df["hardware"].unique().tolist(), label="Filter by Hardware"
126
+ )
127
+
128
+ space_license = gr.CheckboxGroup(
129
+ df["license"].unique().tolist(), label="Filter by license"
130
+ )
131
+
132
+ # Assuming df is your dataframe and 'array_column' is the column containing np.array of strings
133
+ array_column_as_lists = df["models"].apply(
134
+ lambda x: np.array(["None"]) if np.ndim(x) == 0 else x
135
+ )
136
+ # Now, flatten all arrays into one list
137
+ flattened_strings = np.concatenate(array_column_as_lists.values)
138
+ # Get unique strings
139
+ unique_strings = np.unique(flattened_strings)
140
+ # Convert to a list if needed
141
+ unique_strings_list = unique_strings.tolist()
142
+ models = gr.Dropdown(
143
+ unique_strings_list,
144
+ label="Search by Model",
145
+ multiselect=True,
146
+ )
147
+
148
+ # Assuming df is your dataframe and 'array_column' is the column containing np.array of strings
149
+ array_column_as_lists = df["datasets"].apply(
150
+ lambda x: np.array(["None"]) if np.ndim(x) == 0 else x
151
+ )
152
+
153
+ # Now, flatten all arrays into one list
154
+ flattened_strings = np.concatenate(array_column_as_lists.values)
155
+ # Get unique strings
156
+ unique_strings = np.unique(flattened_strings)
157
+ # Convert to a list if needed
158
+ unique_strings_list = unique_strings.tolist()
159
+ datasets = gr.Dropdown(
160
+ unique_strings_list,
161
+ label="Search by Model",
162
+ multiselect=True,
163
+ )
164
+
165
  devMode = gr.Checkbox(value=False, label="DevMode Enabled")
166
  clear = gr.ClearButton(components=[emoji])
167
 
168
+ df = pd.DataFrame(
169
+ df[
170
+ [
171
+ "id",
172
+ "emoji",
173
+ "author",
174
+ "url",
175
+ "likes",
176
+ "hardware",
177
+ "sdk_tags",
178
+ "models",
179
+ "datasets",
180
+ ]
181
+ ]
182
+ )
183
+ df["url"] = df["url"].apply(
184
+ lambda x: (
185
+ f"<a target='_blank' href=https://huggingface.co/spaces/{x}>{x}</a>"
186
+ if x is not None and "/" in x
187
+ else f"<a target='_blank' href=https://{x[0]}>{x[0]}</a>"
188
+ )
189
+ )
190
+ gr.DataFrame(
191
+ filtered_df,
192
+ inputs=[emoji, likes, author, hardware, sdk_tags, models, datasets],
193
+ datatype="html",
194
+ )
195
 
196
 
 
197
  demo.launch()