Spaces:
Running
Running
Synced repo using 'sync_with_huggingface' Github Action
Browse files- app.py +3 -4
- dataset_wrangler.py +82 -2
app.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
import streamlit as st
|
2 |
|
3 |
-
import pandas as pd
|
4 |
-
|
5 |
import dataset_wrangler
|
6 |
|
7 |
|
@@ -9,7 +7,8 @@ st.write(
|
|
9 |
"Scrambled Images from [https://www.slv.vic.gov.au/images](https://www.slv.vic.gov.au/images)"
|
10 |
)
|
11 |
|
12 |
-
|
|
|
13 |
|
14 |
|
15 |
-
st.
|
|
|
1 |
import streamlit as st
|
2 |
|
|
|
|
|
3 |
import dataset_wrangler
|
4 |
|
5 |
|
|
|
7 |
"Scrambled Images from [https://www.slv.vic.gov.au/images](https://www.slv.vic.gov.au/images)"
|
8 |
)
|
9 |
|
10 |
+
|
11 |
+
p = dataset_wrangler.create_grid()
|
12 |
|
13 |
|
14 |
+
st.bokeh_chart(p, use_container_width=True)
|
dataset_wrangler.py
CHANGED
@@ -1,14 +1,94 @@
|
|
1 |
import pandas as pd
|
|
|
|
|
|
|
|
|
2 |
|
3 |
dataset = "https://raw.githubusercontent.com/StateLibraryVictoria/public-domain-hack-2024/refs/heads/ch4-data-viz/datasets/ch3_colour_data_viz_suggestions_set_2_augmented.csv"
|
4 |
|
5 |
|
6 |
-
def clean_df(columns=None, dataset=dataset):
|
7 |
|
8 |
df = pd.read_csv(dataset)
|
9 |
|
10 |
if columns:
|
11 |
df = df[columns]
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import pandas as pd
|
2 |
+
import math
|
3 |
+
|
4 |
+
from bokeh.plotting import figure, show
|
5 |
+
|
6 |
|
7 |
dataset = "https://raw.githubusercontent.com/StateLibraryVictoria/public-domain-hack-2024/refs/heads/ch4-data-viz/datasets/ch3_colour_data_viz_suggestions_set_2_augmented.csv"
|
8 |
|
9 |
|
10 |
+
def clean_df(columns=None, dataset=dataset, subset=[]):
|
11 |
|
12 |
df = pd.read_csv(dataset)
|
13 |
|
14 |
if columns:
|
15 |
df = df[columns]
|
16 |
+
|
17 |
+
if subset:
|
18 |
+
df = df.dropna(subset=subset)
|
19 |
+
else:
|
20 |
+
df = df.dropna()
|
21 |
+
|
22 |
+
df = df.reset_index(drop=True)
|
23 |
|
24 |
return df
|
25 |
+
|
26 |
+
|
27 |
+
def parse_rgb(df):
|
28 |
+
|
29 |
+
palette_col = df["pal_5"]
|
30 |
+
|
31 |
+
palette_col = palette_col.strip("[")
|
32 |
+
palette_col = palette_col.strip("]")
|
33 |
+
|
34 |
+
(
|
35 |
+
r,
|
36 |
+
g,
|
37 |
+
b,
|
38 |
+
) = palette_col.split(",")
|
39 |
+
|
40 |
+
return r, g, b
|
41 |
+
|
42 |
+
|
43 |
+
def create_rgba(df):
|
44 |
+
|
45 |
+
rgba = f'rgb({df["red"]}, {df["green"]}, {df["blue"]})'
|
46 |
+
|
47 |
+
return rgba
|
48 |
+
|
49 |
+
|
50 |
+
def get_square_coords(df):
|
51 |
+
start = 0
|
52 |
+
x = 0
|
53 |
+
|
54 |
+
length = len(df)
|
55 |
+
sq = math.sqrt(length)
|
56 |
+
increment = round(sq)
|
57 |
+
end = increment
|
58 |
+
|
59 |
+
coords = []
|
60 |
+
while end <= length:
|
61 |
+
arr = [(x, y) for y, item in enumerate(df.index[start:end])]
|
62 |
+
coords.extend(arr)
|
63 |
+
end += increment
|
64 |
+
start += increment
|
65 |
+
x += 1
|
66 |
+
|
67 |
+
return coords
|
68 |
+
|
69 |
+
|
70 |
+
def create_grid():
|
71 |
+
|
72 |
+
df = clean_df(subset=["pal_5"])
|
73 |
+
coords = get_square_coords(df)
|
74 |
+
|
75 |
+
df[["red", "green", "blue"]] = df.apply(parse_rgb, result_type="expand", axis=1)
|
76 |
+
|
77 |
+
df["rgba"] = df.apply(create_rgba, axis=1)
|
78 |
+
|
79 |
+
rgba_list = df["rgba"].values.tolist()
|
80 |
+
|
81 |
+
df["red"] = df["red"].astype("int64")
|
82 |
+
df["green"] = df["green"].astype("int64")
|
83 |
+
df["blue"] = df["blue"].astype("int64")
|
84 |
+
|
85 |
+
x = [coord[0] for coord in coords]
|
86 |
+
y = [coord[1] for coord in coords]
|
87 |
+
|
88 |
+
TOOLS = "hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select,examine,help"
|
89 |
+
|
90 |
+
p = figure(tools=TOOLS)
|
91 |
+
|
92 |
+
p.circle(x=x, y=y, radius=0.3, color=rgba_list)
|
93 |
+
|
94 |
+
return p
|