sotirios-slv commited on
Commit
07c7fb5
·
verified ·
1 Parent(s): 97d9f54

Synced repo using 'sync_with_huggingface' Github Action

Browse files
Files changed (3) hide show
  1. app.py +3 -11
  2. dataset_wrangler.py +23 -0
  3. image_analysis.py +77 -0
app.py CHANGED
@@ -2,22 +2,14 @@ import streamlit as st
2
 
3
  import pandas as pd
4
 
 
 
5
 
6
  st.write(
7
  "Scrambled Images from [https://www.slv.vic.gov.au/images](https://www.slv.vic.gov.au/images)"
8
  )
9
 
10
- try:
11
-
12
- df = pd.read_csv(
13
- "https://raw.githubusercontent.com/StateLibraryVictoria/public-domain-hack-2024/refs/heads/main/datasets/challenge-3-Image-Pool-2024-11-27.csv"
14
- )
15
-
16
- except:
17
-
18
- df = pd.read_csv(
19
- "https://raw.githubusercontent.com/StateLibraryVictoria/public-domain-hack-2024/refs/heads/main/datasets/challenge-3-Image-Pool-2024-11-27.csv"
20
- )
21
 
22
 
23
  st.dataframe(df.head(10))
 
2
 
3
  import pandas as pd
4
 
5
+ import dataset_wrangler
6
+
7
 
8
  st.write(
9
  "Scrambled Images from [https://www.slv.vic.gov.au/images](https://www.slv.vic.gov.au/images)"
10
  )
11
 
12
+ df = dataset_wrangler.clean_df()
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  st.dataframe(df.head(10))
dataset_wrangler.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ dataset = "https://raw.githubusercontent.com/StateLibraryVictoria/public-domain-hack-2024/refs/heads/main/datasets/challenge-3-Image-Pool-2024-11-27.csv"
4
+
5
+ columns = [
6
+ "IE PID",
7
+ "Title (DC)",
8
+ "ALMA _ MMS (Object Identifier - IE)",
9
+ "HANDLE (Object Identifier - IE)",
10
+ "Creator (DC)",
11
+ "Genre (DCTERMS)",
12
+ "Created (DCTERMS)",
13
+ ]
14
+
15
+
16
+ def clean_df(columns=columns, dataset=dataset):
17
+
18
+ df = pd.read_csv(dataset)
19
+
20
+ df = df[columns]
21
+ df = df.dropna()
22
+
23
+ return df
image_analysis.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2 as cv
2
+ import numpy as np
3
+ import requests
4
+ from pathlib import Path
5
+
6
+ import matplotlib.pyplot as plt
7
+
8
+ from sklearn.cluster import KMeans
9
+
10
+
11
+ def get_iiif_image_urls(ie_pid: str):
12
+
13
+ manifest_url = f"https://rosetta.slv.vic.gov.au/delivery/iiif/presentation/2.1/{ie_pid}/manifest"
14
+ print(manifest_url)
15
+ session = requests.Session()
16
+
17
+ response = session.get(manifest_url)
18
+
19
+ manifest = response.json()
20
+
21
+ image_ids = [
22
+ canvas["images"][0]["resource"]["service"]["@id"]
23
+ for canvas in manifest["sequences"][0]["canvases"]
24
+ ]
25
+
26
+ image_urls = [f"{image_id}/full/600,/0/default.jpg" for image_id in image_ids]
27
+
28
+ return image_urls
29
+
30
+
31
+ def show_img_compare(img_1, img_2):
32
+ f, ax = plt.subplots(1, 2, figsize=(10, 10))
33
+ ax[0].imshow(img_1)
34
+ ax[1].imshow(img_2)
35
+ ax[0].axis("off") # hide the axis
36
+ ax[1].axis("off")
37
+ f.tight_layout()
38
+ plt.show()
39
+
40
+
41
+ def palette(clusters):
42
+ width = 300
43
+ palette = np.zeros((50, width, 3), np.uint8)
44
+ steps = width / clusters.cluster_centers_.shape[0]
45
+ for idx, centers in enumerate(clusters.cluster_centers_):
46
+ palette[:, int(idx * steps) : (int((idx + 1) * steps)), :] = centers
47
+ return palette
48
+
49
+
50
+ def get_palette_clusters(img, no_of_clusters=5):
51
+
52
+ cluster = KMeans(n_clusters=no_of_clusters)
53
+ cluster.fit(img.reshape(-1, 3))
54
+
55
+ clusters = cluster.fit(img.reshape(-1, 3))
56
+
57
+ return clusters
58
+
59
+
60
+ image_urls = get_iiif_image_urls("IE1267294")
61
+
62
+ response = requests.get(image_urls[0])
63
+
64
+ img = cv.imdecode(np.frombuffer(response.content, np.uint8), -1)
65
+ img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
66
+
67
+ dim = (500, 300)
68
+ img = cv.resize(img, dim, interpolation=cv.INTER_AREA)
69
+
70
+ clt_1 = get_palette_clusters(img)
71
+
72
+
73
+ img_palette = palette(clt_1)
74
+
75
+ print(img_palette)
76
+
77
+ show_img_compare(img, img_palette)