File size: 511 Bytes
97d9f54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import pandas as pd

dataset = "https://raw.githubusercontent.com/StateLibraryVictoria/public-domain-hack-2024/refs/heads/main/datasets/challenge-3-Image-Pool-2024-11-27.csv"

columns = [
    "IE PID",
    "Title (DC)",
    "ALMA _ MMS (Object Identifier - IE)",
    "HANDLE (Object Identifier - IE)",
    "Creator (DC)",
    "Genre (DCTERMS)",
    "Created (DCTERMS)",
]


def clean_df(columns=columns, dataset=dataset):

    df = pd.read_csv(dataset)

    df = df[columns]
    df = df.dropna()

    return df