nazneen's picture
update
79de90d
raw
history blame
3.1 kB
## LIBRARIES ###
## Data
import pandas as pd
pd.options.display.float_format = '${:,.2f}'.format
# Analysis
# App & Visualization
import streamlit as st
from bokeh.models import CustomJS, ColumnDataSource, TextInput, DataTable, TableColumn
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
from bokeh.palettes import Category20c_20
from bokeh.layouts import column, row
# utils
def datasets_explorer_viz(df):
s = ColumnDataSource(df)
TOOLTIPS= [("dataset_id", "@dataset_id"), ("task", "@task")]
color = factor_cmap('task', palette=Category20c_20, factors=df['task'].unique())
p = figure(plot_width=1000, plot_height=1000, tools="hover,wheel_zoom,pan,box_select", title="Dataset explorer", tooltips=TOOLTIPS, toolbar_location="above")
p.scatter('x', 'y', size=500, source=s, alpha=0.8,marker='circle',fill_color = color, line_color=color, legend_field = 'task')
p.legend.location = "bottom_right"
#p.legend.click_policy="mute"
p.legend.label_text_font_size="8pt"
table_source = ColumnDataSource(data=dict())
columns = [
# TableColumn(field="x", title="X data"),
# TableColumn(field="y", title="Y data"),
TableColumn(field="task", title="Task"),
TableColumn(field="dataset_id", title="Dataset ID"),
]
data_table = DataTable(source=table_source, columns=columns, width=300)
s.selected.js_on_change('indices', CustomJS(args=dict(umap_source=s, table_source=table_source), code="""
const inds = cb_obj.indices;
const tableData = table_source.data;
const umapData = umap_source.data;
tableData['task'] = []
tableData['dataset_id'] = []
for (let i = 0; i < inds.length; i++) {
tableData['task'].push(umapData['task'][inds[i]])
tableData['dataset_id'].push(umapData['dataset_id'][inds[i]])
}
table_source.data = tableData;
table_source.change.emit();
"""
))
p.selection = ColumnDataSource(data=dict())
#text_input.on_change("value_input",
text_input.value.js_on_change('value', CustomJS(args=dict(plot_source=s, text_input=text_input), code="""
const indices = [];
const plot_data = plot_source.data;
for (var i = 0; i < plot_data['dataset_id'].length(); i++) {
console.log(plot_data['dataset_id'][i]);
if (plot_data['dataset_id'][i] == text_input.value || plot_data['task'][i] == text_input.value) {
indices.push(i)
}
}
plot_source.change.emit()
"""))
text_input = TextInput(value="", title="Search")
st.bokeh_chart(row(column(text_input,p), data_table))
if __name__ == "__main__":
### STREAMLIT APP CONGFIG ###
st.set_page_config(layout="wide", page_title="Datasets Explorer")
#lcol, rcol = st.columns([2, 2])
# ******* loading the mode and the data
### LOAD DATA AND SESSION VARIABLES ###
datasets_df = pd.read_parquet('./assets/data/datasets_df.parquet')
datasets_explorer_viz(datasets_df)