Spaces:
Running
Running
Synced repo using 'sync_with_huggingface' Github Action
Browse files- app.py +3 -11
- dataset_wrangler.py +23 -0
- image_analysis.py +77 -0
app.py
CHANGED
@@ -2,22 +2,14 @@ import streamlit as st
|
|
2 |
|
3 |
import pandas as pd
|
4 |
|
|
|
|
|
5 |
|
6 |
st.write(
|
7 |
"Scrambled Images from [https://www.slv.vic.gov.au/images](https://www.slv.vic.gov.au/images)"
|
8 |
)
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
df = pd.read_csv(
|
13 |
-
"https://raw.githubusercontent.com/StateLibraryVictoria/public-domain-hack-2024/refs/heads/main/datasets/challenge-3-Image-Pool-2024-11-27.csv"
|
14 |
-
)
|
15 |
-
|
16 |
-
except:
|
17 |
-
|
18 |
-
df = pd.read_csv(
|
19 |
-
"https://raw.githubusercontent.com/StateLibraryVictoria/public-domain-hack-2024/refs/heads/main/datasets/challenge-3-Image-Pool-2024-11-27.csv"
|
20 |
-
)
|
21 |
|
22 |
|
23 |
st.dataframe(df.head(10))
|
|
|
2 |
|
3 |
import pandas as pd
|
4 |
|
5 |
+
import dataset_wrangler
|
6 |
+
|
7 |
|
8 |
st.write(
|
9 |
"Scrambled Images from [https://www.slv.vic.gov.au/images](https://www.slv.vic.gov.au/images)"
|
10 |
)
|
11 |
|
12 |
+
df = dataset_wrangler.clean_df()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
|
15 |
st.dataframe(df.head(10))
|
dataset_wrangler.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
dataset = "https://raw.githubusercontent.com/StateLibraryVictoria/public-domain-hack-2024/refs/heads/main/datasets/challenge-3-Image-Pool-2024-11-27.csv"
|
4 |
+
|
5 |
+
columns = [
|
6 |
+
"IE PID",
|
7 |
+
"Title (DC)",
|
8 |
+
"ALMA _ MMS (Object Identifier - IE)",
|
9 |
+
"HANDLE (Object Identifier - IE)",
|
10 |
+
"Creator (DC)",
|
11 |
+
"Genre (DCTERMS)",
|
12 |
+
"Created (DCTERMS)",
|
13 |
+
]
|
14 |
+
|
15 |
+
|
16 |
+
def clean_df(columns=columns, dataset=dataset):
|
17 |
+
|
18 |
+
df = pd.read_csv(dataset)
|
19 |
+
|
20 |
+
df = df[columns]
|
21 |
+
df = df.dropna()
|
22 |
+
|
23 |
+
return df
|
image_analysis.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2 as cv
|
2 |
+
import numpy as np
|
3 |
+
import requests
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
|
8 |
+
from sklearn.cluster import KMeans
|
9 |
+
|
10 |
+
|
11 |
+
def get_iiif_image_urls(ie_pid: str):
|
12 |
+
|
13 |
+
manifest_url = f"https://rosetta.slv.vic.gov.au/delivery/iiif/presentation/2.1/{ie_pid}/manifest"
|
14 |
+
print(manifest_url)
|
15 |
+
session = requests.Session()
|
16 |
+
|
17 |
+
response = session.get(manifest_url)
|
18 |
+
|
19 |
+
manifest = response.json()
|
20 |
+
|
21 |
+
image_ids = [
|
22 |
+
canvas["images"][0]["resource"]["service"]["@id"]
|
23 |
+
for canvas in manifest["sequences"][0]["canvases"]
|
24 |
+
]
|
25 |
+
|
26 |
+
image_urls = [f"{image_id}/full/600,/0/default.jpg" for image_id in image_ids]
|
27 |
+
|
28 |
+
return image_urls
|
29 |
+
|
30 |
+
|
31 |
+
def show_img_compare(img_1, img_2):
|
32 |
+
f, ax = plt.subplots(1, 2, figsize=(10, 10))
|
33 |
+
ax[0].imshow(img_1)
|
34 |
+
ax[1].imshow(img_2)
|
35 |
+
ax[0].axis("off") # hide the axis
|
36 |
+
ax[1].axis("off")
|
37 |
+
f.tight_layout()
|
38 |
+
plt.show()
|
39 |
+
|
40 |
+
|
41 |
+
def palette(clusters):
|
42 |
+
width = 300
|
43 |
+
palette = np.zeros((50, width, 3), np.uint8)
|
44 |
+
steps = width / clusters.cluster_centers_.shape[0]
|
45 |
+
for idx, centers in enumerate(clusters.cluster_centers_):
|
46 |
+
palette[:, int(idx * steps) : (int((idx + 1) * steps)), :] = centers
|
47 |
+
return palette
|
48 |
+
|
49 |
+
|
50 |
+
def get_palette_clusters(img, no_of_clusters=5):
|
51 |
+
|
52 |
+
cluster = KMeans(n_clusters=no_of_clusters)
|
53 |
+
cluster.fit(img.reshape(-1, 3))
|
54 |
+
|
55 |
+
clusters = cluster.fit(img.reshape(-1, 3))
|
56 |
+
|
57 |
+
return clusters
|
58 |
+
|
59 |
+
|
60 |
+
image_urls = get_iiif_image_urls("IE1267294")
|
61 |
+
|
62 |
+
response = requests.get(image_urls[0])
|
63 |
+
|
64 |
+
img = cv.imdecode(np.frombuffer(response.content, np.uint8), -1)
|
65 |
+
img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
|
66 |
+
|
67 |
+
dim = (500, 300)
|
68 |
+
img = cv.resize(img, dim, interpolation=cv.INTER_AREA)
|
69 |
+
|
70 |
+
clt_1 = get_palette_clusters(img)
|
71 |
+
|
72 |
+
|
73 |
+
img_palette = palette(clt_1)
|
74 |
+
|
75 |
+
print(img_palette)
|
76 |
+
|
77 |
+
show_img_compare(img, img_palette)
|