nazneen commited on
Commit
79de90d
·
1 Parent(s): 8dbc06e
Files changed (4) hide show
  1. Pipfile +11 -0
  2. Pipfile.lock +20 -0
  3. app.py +19 -23
  4. requirements.txt +12 -285
Pipfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [[source]]
2
+ url = "https://pypi.org/simple"
3
+ verify_ssl = true
4
+ name = "pypi"
5
+
6
+ [packages]
7
+
8
+ [dev-packages]
9
+
10
+ [requires]
11
+ python_version = "3.9"
Pipfile.lock ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_meta": {
3
+ "hash": {
4
+ "sha256": "a36a5392bb1e8bbc06bfaa0761e52593cf2d83b486696bf54667ba8da616c839"
5
+ },
6
+ "pipfile-spec": 6,
7
+ "requires": {
8
+ "python_version": "3.9"
9
+ },
10
+ "sources": [
11
+ {
12
+ "name": "pypi",
13
+ "url": "https://pypi.org/simple",
14
+ "verify_ssl": true
15
+ }
16
+ ]
17
+ },
18
+ "default": {},
19
+ "develop": {}
20
+ }
app.py CHANGED
@@ -1,40 +1,26 @@
1
  ## LIBRARIES ###
2
  ## Data
3
- import numpy as np
4
  import pandas as pd
5
- import torch
6
- import json
7
- from tqdm import tqdm
8
- from math import floor
9
- from datasets import load_dataset
10
- from collections import defaultdict
11
- from transformers import AutoTokenizer
12
  pd.options.display.float_format = '${:,.2f}'.format
13
 
14
  # Analysis
15
 
16
  # App & Visualization
17
  import streamlit as st
18
- from bokeh.models import CustomJS, ColumnDataSource, HoverTool, BoxSelectTool, Callback, Select, TextInput, DataTable, TableColumn
19
- from bokeh.events import SelectionGeometry
20
- from bokeh.plotting import figure, output_file, show
21
  from bokeh.transform import factor_cmap
22
  from bokeh.palettes import Category20c_20
23
  from bokeh.layouts import column, row
24
 
25
  # utils
26
- from random import sample
27
 
28
  def datasets_explorer_viz(df):
29
  s = ColumnDataSource(df)
30
- text_input = TextInput(value="", title="Search")
31
- text_input.js_on_change("value", CustomJS(code="""
32
- console.log('text_input: value=' + this.value, this.toString())
33
- """))
34
  TOOLTIPS= [("dataset_id", "@dataset_id"), ("task", "@task")]
35
  color = factor_cmap('task', palette=Category20c_20, factors=df['task'].unique())
36
  p = figure(plot_width=1000, plot_height=1000, tools="hover,wheel_zoom,pan,box_select", title="Dataset explorer", tooltips=TOOLTIPS, toolbar_location="above")
37
- p.scatter('x', 'y', size=3, source=s, alpha=0.8,marker='circle',fill_color = color, line_color=color, legend_field = 'task')
38
  p.legend.location = "bottom_right"
39
  #p.legend.click_policy="mute"
40
  p.legend.label_text_font_size="8pt"
@@ -51,15 +37,11 @@ def datasets_explorer_viz(df):
51
  const inds = cb_obj.indices;
52
  const tableData = table_source.data;
53
  const umapData = umap_source.data;
54
-
55
- //tableData['x'] = []
56
- //tableData['y'] = []
57
  tableData['task'] = []
58
  tableData['dataset_id'] = []
59
 
60
  for (let i = 0; i < inds.length; i++) {
61
- // tableData['x'].push(umapData['x'][inds[i]])
62
- // tableData['y'].push(umapData['y'][inds[i]])
63
  tableData['task'].push(umapData['task'][inds[i]])
64
  tableData['dataset_id'].push(umapData['dataset_id'][inds[i]])
65
  }
@@ -67,7 +49,21 @@ def datasets_explorer_viz(df):
67
  table_source.change.emit();
68
  """
69
  ))
70
- show(row(column(text_input,p), data_table))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
 
73
  if __name__ == "__main__":
 
1
  ## LIBRARIES ###
2
  ## Data
 
3
  import pandas as pd
 
 
 
 
 
 
 
4
  pd.options.display.float_format = '${:,.2f}'.format
5
 
6
  # Analysis
7
 
8
  # App & Visualization
9
  import streamlit as st
10
+ from bokeh.models import CustomJS, ColumnDataSource, TextInput, DataTable, TableColumn
11
+ from bokeh.plotting import figure
 
12
  from bokeh.transform import factor_cmap
13
  from bokeh.palettes import Category20c_20
14
  from bokeh.layouts import column, row
15
 
16
  # utils
 
17
 
18
  def datasets_explorer_viz(df):
19
  s = ColumnDataSource(df)
 
 
 
 
20
  TOOLTIPS= [("dataset_id", "@dataset_id"), ("task", "@task")]
21
  color = factor_cmap('task', palette=Category20c_20, factors=df['task'].unique())
22
  p = figure(plot_width=1000, plot_height=1000, tools="hover,wheel_zoom,pan,box_select", title="Dataset explorer", tooltips=TOOLTIPS, toolbar_location="above")
23
+ p.scatter('x', 'y', size=500, source=s, alpha=0.8,marker='circle',fill_color = color, line_color=color, legend_field = 'task')
24
  p.legend.location = "bottom_right"
25
  #p.legend.click_policy="mute"
26
  p.legend.label_text_font_size="8pt"
 
37
  const inds = cb_obj.indices;
38
  const tableData = table_source.data;
39
  const umapData = umap_source.data;
40
+
 
 
41
  tableData['task'] = []
42
  tableData['dataset_id'] = []
43
 
44
  for (let i = 0; i < inds.length; i++) {
 
 
45
  tableData['task'].push(umapData['task'][inds[i]])
46
  tableData['dataset_id'].push(umapData['dataset_id'][inds[i]])
47
  }
 
49
  table_source.change.emit();
50
  """
51
  ))
52
+ p.selection = ColumnDataSource(data=dict())
53
+ #text_input.on_change("value_input",
54
+ text_input.value.js_on_change('value', CustomJS(args=dict(plot_source=s, text_input=text_input), code="""
55
+ const indices = [];
56
+ const plot_data = plot_source.data;
57
+ for (var i = 0; i < plot_data['dataset_id'].length(); i++) {
58
+ console.log(plot_data['dataset_id'][i]);
59
+ if (plot_data['dataset_id'][i] == text_input.value || plot_data['task'][i] == text_input.value) {
60
+ indices.push(i)
61
+ }
62
+ }
63
+ plot_source.change.emit()
64
+ """))
65
+ text_input = TextInput(value="", title="Search")
66
+ st.bokeh_chart(row(column(text_input,p), data_table))
67
 
68
 
69
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,285 +1,12 @@
1
- # This file may be used to create an environment using:
2
- # $ conda create --name <env> --file <this file>
3
- # platform: osx-arm64
4
- bokeh=2.4.2=py39hca03da5_0
5
- boto3=1.21.32=pyhd3eb1b0_0
6
- botocore=1.24.32=pyhd3eb1b0_0
7
- bottleneck=1.3.4=py39heec5a64_0
8
- brotli=1.0.7=hc377ac9_0
9
- brotlipy=0.7.0=py39h1a28f6b_1002
10
- brunsli=0.1=hc377ac9_1
11
- bzip2=1.0.8=h620ffc9_4
12
- c-ares=1.18.1=h1a28f6b_0
13
- c-blosc2=2.0.4=h0095615_1
14
- ca-certificates=2021.10.8=h4653dfc_0
15
- cachetools=4.2.2=pyhd3eb1b0_0
16
- certifi=2021.10.8=py39h2804cbe_2
17
- cffi=1.15.0=py39h22df2f2_1
18
- cfitsio=4.0.0=h99351b2_0
19
- charls=2.2.0=hc377ac9_0
20
- charset-normalizer=2.0.4=pyhd3eb1b0_0
21
- click=8.0.4=py39hca03da5_0
22
- cloudpickle=2.0.0=pyhd3eb1b0_0
23
- colorcet=3.0.0=py39hca03da5_0
24
- cryptography=37.0.1=py39h834c97f_0
25
- cycler=0.11.0=pyhd3eb1b0_0
26
- cytoolz=0.11.0=py39h1a28f6b_0
27
- dask=2022.2.1=pyhd3eb1b0_0
28
- dask-core=2022.2.1=pyhd3eb1b0_0
29
- dataclasses=0.8=pyh6d0b6a4_7
30
- datasets=2.0.0=py_0
31
- datasets-sql=0.1.1=pypi_0
32
- datashader=0.13.0=pyhd3eb1b0_1
33
- datashape=0.5.4=py39hca03da5_1
34
- debugpy=1.5.1=py39hc377ac9_0
35
- decorator=5.1.1=pyhd3eb1b0_0
36
- defusedxml=0.7.1=pyhd3eb1b0_0
37
- dill=0.3.4=pyhd3eb1b0_0
38
- distributed=2022.2.1=pyhd3eb1b0_0
39
- docopt=0.6.2=pypi_0
40
- duckdb=0.3.4=pypi_0
41
- entrypoints=0.4=py39hca03da5_0
42
- executing=0.8.3=pyhd3eb1b0_0
43
- filelock=3.6.0=pyhd3eb1b0_0
44
- fonttools=4.31.2=pypi_0
45
- freetype=2.11.0=h1192e45_0
46
- frozenlist=1.2.0=py39h1a28f6b_0
47
- fsspec=2022.2.0=pyhd3eb1b0_0
48
- future=0.18.2=py39hca03da5_1
49
- fuzzywuzzy=0.18.0=pypi_0
50
- gflags=2.2.2=hc377ac9_0
51
- gh=2.10.1=h75b854d_0
52
- giflib=5.2.1=h1a28f6b_0
53
- gitdb=4.0.7=pyhd3eb1b0_0
54
- gitpython=3.1.18=pyhd3eb1b0_1
55
- glog=0.5.0=hc377ac9_0
56
- grpc-cpp=1.42.0=hedfbb7c_1
57
- heapdict=1.0.1=pyhd3eb1b0_0
58
- holoviews=1.14.8=pyhd3eb1b0_0
59
- htmlmin=0.1.12=pypi_0
60
- huggingface-hub=0.6.0=pypi_0
61
- idna=3.3=pyhd3eb1b0_0
62
- imagecodecs=2021.11.20=py39hcb02aed_1
63
- imagehash=4.2.1=pypi_0
64
- imageio=2.9.0=pyhd3eb1b0_0
65
- importlib-metadata=4.11.3=py39hca03da5_0
66
- importlib_metadata=4.11.3=hd3eb1b0_0
67
- ipykernel=6.9.1=py39hca03da5_0
68
- ipython=8.3.0=py39hca03da5_0
69
- ipython_genutils=0.2.0=pyhd3eb1b0_1
70
- ipywidgets=7.6.5=pyhd3eb1b0_1
71
- jbig=2.1=h1a28f6b_0
72
- jedi=0.18.1=py39hca03da5_1
73
- jellyfish=0.9.0=pypi_0
74
- jinja2=3.0.3=pyhd3eb1b0_0
75
- jmespath=0.10.0=pyhd3eb1b0_0
76
- joblib=1.0.1=pypi_0
77
- jpeg=9e=h1a28f6b_0
78
- jsonlines=3.0.0=pypi_0
79
- jsonschema=4.4.0=py39hca03da5_0
80
- jupyter=1.0.0=pypi_0
81
- jupyter-console=6.4.3=pypi_0
82
- jupyter_client=7.2.2=py39hca03da5_0
83
- jupyter_core=4.10.0=py39hca03da5_0
84
- jupyterlab_pygments=0.1.2=py_0
85
- jupyterlab_widgets=1.0.0=pyhd3eb1b0_1
86
- jxrlib=1.1=h1a28f6b_2
87
- kaleido=0.2.1=pypi_0
88
- kiwisolver=1.4.2=pypi_0
89
- krb5=1.19.2=h3b8d789_0
90
- lcms2=2.12=hba8e193_0
91
- lerc=3.0=hc377ac9_0
92
- libaec=1.0.6=hbdafb3b_0
93
- libblas=3.9.0=14_osxarm64_openblas
94
- libbrotlicommon=1.0.9=h1c322ee_7
95
- libbrotlidec=1.0.9=h1c322ee_7
96
- libbrotlienc=1.0.9=h1c322ee_7
97
- libcblas=3.9.0=14_osxarm64_openblas
98
- libcurl=7.82.0=hc6d1d07_0
99
- libcxx=12.0.0=hf6beb65_1
100
- libdeflate=1.8=h1a28f6b_5
101
- libedit=3.1.20210910=h1a28f6b_0
102
- libev=4.33=h1a28f6b_1
103
- libevent=2.1.10=hbae9a57_4
104
- libffi=3.4.2=hc377ac9_2
105
- libgfortran=5.0.0=11_1_0_h6a59814_26
106
- libgfortran5=11.1.0=h6a59814_26
107
- liblapack=3.9.0=14_osxarm64_openblas
108
- liblapacke=3.9.0=14_osxarm64_openblas
109
- libllvm11=11.1.0=h12f7ac0_4
110
- libnghttp2=1.46.0=h95c9599_0
111
- libopenblas=0.3.20=openmp_h2209c59_0
112
- libpng=1.6.37=hb8d0fd4_0
113
- libprotobuf=3.19.1=h98b2900_0
114
- libsodium=1.0.18=h1a28f6b_0
115
- libssh2=1.10.0=hf27765b_0
116
- libthrift=0.15.0=h28a9c34_1
117
- libtiff=4.3.0=h74060c4_2
118
- libutf8proc=2.6.1=h1a28f6b_0
119
- libwebp=1.2.2=h68602c7_0
120
- libwebp-base=1.2.2=h1a28f6b_0
121
- libzlib=1.2.11=h90dfc92_1014
122
- libzopfli=1.0.3=hc377ac9_0
123
- llvm-openmp=14.0.3=hd125106_0
124
- llvmlite=0.38.0=py39h98b2900_0
125
- locket=0.2.1=py39hca03da5_2
126
- lz4-c=1.9.3=hc377ac9_0
127
- markdown=3.3.4=py39hca03da5_0
128
- markupsafe=2.0.1=py39h1a28f6b_0
129
- matplotlib=3.5.1=py39hca03da5_1
130
- matplotlib-base=3.5.1=py39hc377ac9_1
131
- matplotlib-inline=0.1.2=pyhd3eb1b0_2
132
- missingno=0.5.1=pypi_0
133
- mistune=0.8.4=py39h1a28f6b_1000
134
- msgpack-python=1.0.3=py39h525c30c_0
135
- multidict=5.2.0=py39h1a28f6b_2
136
- multimethod=1.7=pypi_0
137
- multipledispatch=0.6.0=py39hca03da5_0
138
- multiprocess=0.70.12.2=py39hb18efdd_2
139
- munkres=1.1.4=py_0
140
- nbclient=0.5.13=py39hca03da5_0
141
- nbconvert=6.4.4=py39hca03da5_0
142
- nbformat=5.3.0=py39hca03da5_0
143
- ncurses=6.3=h1a28f6b_2
144
- nest-asyncio=1.5.5=py39hca03da5_0
145
- networkx=2.7.1=pyhd3eb1b0_0
146
- ninja=1.10.2=hca03da5_5
147
- ninja-base=1.10.2=h525c30c_5
148
- nltk=3.7=pyhd3eb1b0_0
149
- notebook=6.4.11=py39hca03da5_0
150
- numba=0.55.1=py39h9197a36_0
151
- numexpr=2.8.1=py39h144ceef_0
152
- numpy=1.21.5=py39h25ab29e_2
153
- numpy-base=1.21.5=py39h974a1f5_2
154
- openblas=0.3.20=openmp_h745f6c2_0
155
- openjpeg=2.4.0=h062765e_1
156
- openssl=1.1.1o=ha287fd2_0
157
- orc=1.7.1=hcb6706d_1
158
- packaging=21.3=pyhd3eb1b0_0
159
- pandas=1.4.2=py39hc377ac9_0
160
- pandas-profiling=3.1.0=pypi_0
161
- pandocfilters=1.5.0=pyhd3eb1b0_0
162
- panel=0.13.0=py39hca03da5_0
163
- param=1.12.0=pyhd3eb1b0_0
164
- parquet-cpp=1.5.1=h34088ae_4
165
- parso=0.8.3=pyhd3eb1b0_0
166
- partd=1.2.0=pyhd3eb1b0_1
167
- pexpect=4.8.0=pyhd3eb1b0_3
168
- phik=0.12.2=pypi_0
169
- pickleshare=0.7.5=pyhd3eb1b0_1003
170
- pillow=9.1.0=pypi_0
171
- pip=21.2.4=py39hca03da5_0
172
- pipreqs=0.4.11=pypi_0
173
- plotly=5.6.0=pyhd3eb1b0_0
174
- progressbar=2.5=pypi_0
175
- prometheus_client=0.13.1=pyhd3eb1b0_0
176
- prompt-toolkit=3.0.20=pyhd3eb1b0_0
177
- protobuf=3.20.0=pypi_0
178
- psutil=5.8.0=py39h1a28f6b_1
179
- ptyprocess=0.7.0=pyhd3eb1b0_2
180
- pure_eval=0.2.2=pyhd3eb1b0_0
181
- pyahocorasick=1.4.4=pypi_0
182
- pyarrow=6.0.1=py39hd3b58d7_5_cpu
183
- pyasn1=0.4.8=pypi_0
184
- pycodestyle=2.8.0=pyhd3eb1b0_0
185
- pycparser=2.21=pyhd3eb1b0_0
186
- pyct=0.4.6=py39hca03da5_0
187
- pydantic=1.9.0=pypi_0
188
- pydeck=0.7.1=pyh6c4a22f_0
189
- pygments=2.11.2=pyhd3eb1b0_0
190
- pympler=1.0.1=pypi_0
191
- pynndescent=0.5.4=pyhd3eb1b0_0
192
- pyopenssl=22.0.0=pyhd3eb1b0_0
193
- pyparsing=3.0.4=pyhd3eb1b0_0
194
- pyrsistent=0.18.0=py39h1a28f6b_0
195
- pysocks=1.7.1=py39hca03da5_0
196
- python=3.9.12=hfc7342c_1_cpython
197
- python-dateutil=2.8.2=pyhd3eb1b0_0
198
- python-dotenv=0.19.2=pypi_0
199
- python-fastjsonschema=2.15.3=pyhd8ed1ab_0
200
- python-xxhash=3.0.0=py39hb18efdd_1
201
- python_abi=3.9=1_cp39
202
- pytorch=1.10.2=cpu_py39h23cb94c_0
203
- pytz=2021.3=pyhd3eb1b0_0
204
- pyviz_comms=2.0.2=pyhd3eb1b0_0
205
- pywavelets=1.3.0=py39h1a28f6b_0
206
- pyyaml=6.0=py39h1a28f6b_0
207
- pyzmq=22.3.0=py39hc377ac9_2
208
- qtconsole=5.3.0=pypi_0
209
- qtpy=2.0.1=pypi_0
210
- re2=2021.11.01=hbdafb3b_0
211
- readline=8.1.2=h1a28f6b_1
212
- regex=2022.3.15=py39h1a28f6b_0
213
- requests=2.27.1=pyhd3eb1b0_0
214
- responses=0.18.0=pypi_0
215
- s3transfer=0.5.0=pyhd3eb1b0_0
216
- sacremoses=0.0.43=pyhd3eb1b0_0
217
- scikit-image=0.19.2=py39h9197a36_0
218
- scikit-learn=1.0.2=py39h9197a36_1
219
- scipy=1.7.3=py39h2f0f56f_0
220
- seaborn=0.11.2=pyhd3eb1b0_0
221
- segtok=1.5.11=pypi_0
222
- semver=2.13.0=pyhd3eb1b0_0
223
- send2trash=1.8.0=pyhd3eb1b0_1
224
- sentence-transformers=2.2.0=pyhd8ed1ab_0
225
- sentencepiece=0.1.95=py39h525c30c_0
226
- setuptools=61.2.0=py39hca03da5_0
227
- simplejson=3.17.6=pypi_0
228
- six=1.16.0=pyhd3eb1b0_1
229
- smmap=5.0.0=pypi_0
230
- snappy=1.1.9=hc377ac9_0
231
- sortedcontainers=2.4.0=pyhd3eb1b0_0
232
- soupsieve=2.3.2=pypi_0
233
- sql-metadata=2.5.0=pypi_0
234
- sqlite=3.38.2=h1058600_0
235
- sqlparse=0.4.2=pypi_0
236
- stack_data=0.2.0=pyhd3eb1b0_0
237
- streamlit=1.9.0=pyhd8ed1ab_0
238
- streamlit-aggrid=0.2.3.post2=pypi_0
239
- streamlit-vega-lite=0.1.0=pypi_0
240
- tabulate=0.8.9=pypi_0
241
- tangled-up-in-unicode=0.1.0=pypi_0
242
- tbb=2021.5.0=h525c30c_0
243
- tblib=1.7.0=pyhd3eb1b0_0
244
- tenacity=8.0.1=py39hca03da5_0
245
- tensorboard-plugin-wit=1.8.1=pypi_0
246
- terminado=0.13.1=py39hca03da5_0
247
- testpath=0.5.0=pyhd3eb1b0_0
248
- threadpoolctl=2.2.0=pyh0d69192_0
249
- tifffile=2021.7.2=pyhd3eb1b0_2
250
- tk=8.6.12=he1e0b03_0
251
- tokenizers=0.11.6=pypi_0
252
- toml=0.10.2=pyhd3eb1b0_0
253
- toolz=0.11.2=pyhd3eb1b0_0
254
- torchvision=0.2.2=py_3
255
- tornado=6.1=py39h1a28f6b_0
256
- tqdm=4.64.0=py39hca03da5_0
257
- traitlets=5.1.1=pyhd3eb1b0_0
258
- transformers=4.18.0=py39hca03da5_0
259
- typing-extensions=4.1.1=hd3eb1b0_0
260
- typing_extensions=4.1.1=pyh06a4308_0
261
- tzdata=2022a=hda174b7_0
262
- tzlocal=2.1=py39hca03da5_0
263
- umap-learn=0.5.3=py39h2804cbe_0
264
- urllib3=1.26.9=py39hca03da5_0
265
- validators=0.18.2=pyhd3eb1b0_0
266
- visions=0.7.4=pypi_0
267
- watchdog=2.1.6=py39h1a28f6b_0
268
- wcwidth=0.2.5=pyhd3eb1b0_0
269
- webencodings=0.5.1=pypi_0
270
- wheel=0.37.1=pyhd3eb1b0_0
271
- widgetsnbextension=3.5.2=py39hca03da5_0
272
- wordcloud=1.8.1=pypi_0
273
- xarray=0.20.1=pyhd3eb1b0_1
274
- xxhash=0.8.0=h1a28f6b_3
275
- xz=5.2.5=h1a28f6b_1
276
- yake=0.4.8=pypi_0
277
- yaml=0.2.5=h1a28f6b_0
278
- yarg=0.1.9=pypi_0
279
- yarl=1.6.3=py39h1a28f6b_1
280
- zeromq=4.3.4=hc377ac9_0
281
- zfp=0.5.5=hc377ac9_6
282
- zict=2.0.0=pyhd3eb1b0_0
283
- zipp=3.8.0=py39hca03da5_0
284
- zlib=1.2.11=h90dfc92_1014
285
- zstd=1.5.2=h861e0a7_0
 
1
+ bokeh==2.4.1
2
+ Jinja2==3.1.2
3
+ PyYAML==6.0
4
+ numpy==1.22.4
5
+ packaging==21.3
6
+ Pillow==9.1.1
7
+ tornado==6.1
8
+ typing_extensions==4.2.0
9
+ MarkupSafe==2.1.1
10
+ pyparsing==3.0.9
11
+ pandas==1.4.2
12
+ streamlit==1.2.0