Spaces:
Running
Running
import pandas as pd | |
import math | |
import re | |
from bokeh.models import HoverTool | |
from bokeh.plotting import figure | |
from bokeh.models import CustomJS | |
def split_created_year(created_year): | |
matches = re.findall("([1-3][0-9]{3})", created_year) | |
start_date = int(matches[0]) | |
if len(matches) > 1: | |
end_date = int(matches[-1]) | |
else: | |
end_date = start_date | |
return start_date, end_date | |
def display_event() -> CustomJS: | |
js_code = """ | |
console.log('Hello mum!) | |
""" | |
return CustomJS(code=js_code) | |
def clean_df(columns=None, dataset="", subset=[]): | |
df = pd.read_csv(dataset) | |
if columns: | |
df = df[columns] | |
if subset: | |
df = df.dropna(subset=subset) | |
else: | |
df = df.dropna() | |
df = df.reset_index(drop=True) | |
return df | |
def create_rgba(df, palette_col): | |
rgba = f'rgb({df[f"red_{palette_col}"]}, {df[f"green_{palette_col}"]}, {df[f"blue_{palette_col}"]})' | |
return rgba | |
def get_square_coords(df): | |
start = 0 | |
x = 0 | |
length = len(df) | |
sq = math.sqrt(length) | |
increment = round(sq) | |
end = increment | |
coords = [] | |
while end <= length: | |
arr = [(x, y) for y, item in enumerate(df.index[start:end])] | |
coords.extend(arr) | |
end += increment | |
start += increment | |
x += 1 | |
return coords | |
def get_tooltips_dict(df, columns): | |
df = df[columns] | |
tooltips = df.to_dict() | |
return tooltips | |
def create_grid(df, palette_columns=["pal_1", "pal_3", "pal_5"]): | |
markers = { | |
"pal_1": "circle", | |
"pal_2": "hex", | |
"pal_3": "square_pin", | |
"pal_4": "plus", | |
"pal_5": "triangle", | |
} | |
coords = get_square_coords(df) | |
# bodge the df to ensure the length matches the coords | |
df = df.head(len(coords)) | |
df["viewer_link"] = ( | |
"https://viewer.slv.vic.gov.au/?entity=" | |
+ df["IE PID"].astype(str) | |
+ "&mode=browse" | |
) | |
TOOLS = "crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset" | |
data = pd.DataFrame( | |
{ | |
"x": [coord[0] for coord in coords], | |
"y": [coord[1] for coord in coords], | |
"titles": df["Title (DC)"].values.tolist(), | |
"created": df["Created - W 3 CDTF (DCTERMS)"].values.tolist(), | |
"viewer_link": df["viewer_link"].values.tolist(), | |
} | |
) | |
tt = """ | |
<div> | |
<a href=@viewer_link{safe}}>@viewer_link{safe}</a> | |
</div> | |
""" | |
p = figure(sizing_mode="stretch_width", max_width=1000, tools=TOOLS) | |
p.grid.grid_line_color = None | |
p.axis.visible = False | |
# hover = HoverTool( | |
# # tooltips=[ | |
# # ("Title", "@titles"), | |
# # ("Date created", "@created"), | |
# # ("SLV Viewer", "@viewer_link"), | |
# # ] | |
# tt | |
# ) | |
hover = HoverTool(tooltips=tt) | |
p.add_tools(hover) | |
radius = 12 | |
alpha = 0.25 | |
hover_alpha = 0.5 | |
for col in palette_columns: | |
df[col] = df[col].str.strip("[") | |
df[col] = df[col].str.strip("]") | |
df[[f"red_{col}", f"green_{col}", f"blue_{col}"]] = df[col].str.split( | |
",", expand=True | |
) | |
df[f"rgba_{col}"] = ( | |
"rgb(" | |
+ df[f"red_{col}"] | |
+ "," | |
+ df[f"green_{col}"] | |
+ "," | |
+ df[f"blue_{col}"] | |
+ ")" | |
) | |
data["rgba"] = df[f"rgba_{col}"].values.tolist() | |
p.scatter( | |
x="x", | |
y="y", | |
size=radius, | |
color="rgba", | |
alpha=alpha, | |
hover_alpha=hover_alpha, | |
hover_line_color="white", | |
marker=markers[col], | |
source=data, | |
) | |
return p | |
# div = Div(width=1000) | |
# curdoc().on_event(events.DocumentReady, display_event(div)) | |
# from bokeh import events | |
# dataset = "https://raw.githubusercontent.com/StateLibraryVictoria/public-domain-hack-2024/refs/heads/ch4-data-viz/datasets/ch3_colour_data_viz_suggestions_set_2_augmented.csv" | |
# p = create_grid(dataset) | |
# show(p) | |