File size: 6,480 Bytes
36f4fe3
cb5b71d
 
 
 
bc133ae
cb5b71d
36f4fe3
cb5b71d
e92e659
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb5b71d
 
 
 
 
 
36f4fe3
cb5b71d
 
 
bc133ae
 
 
 
36f4fe3
 
 
 
cb5b71d
 
 
 
bc133ae
cb5b71d
 
 
e92e659
cb5b71d
 
 
 
bc133ae
 
 
 
 
 
 
 
36f4fe3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import datetime
import enum

import streamlit as st

from core.names import find_unique_name
from core.state import Metadata
import mlcroissant as mlc

# List from:
LICENSES_URL = "https://huggingface.co/docs/hub/repositories-licenses"
LICENSES = {
    "Unknown": "unknown",
    "Other": "other",
    "Apache license 2.0": "apache-2.0",
    "MIT": "mit",
    "OpenRAIL license family": "openrail",
    "BigScience OpenRAIL-M": "bigscience-openrail-m",
    "CreativeML OpenRAIL-M": "creativeml-openrail-m",
    "BigScience BLOOM RAIL 1.0": "bigscience-bloom-rail-1.0",
    "BigCode Open RAIL-M v1": "bigcode-openrail-m",
    "Academic Free License v3.0": "afl-3.0",
    "Artistic license 2.0": "artistic-2.0",
    "Boost Software License 1.0": "bsl-1.0",
    "BSD license family": "bsd",
    "BSD 2-clause “Simplified” license": "bsd-2-clause",
    "BSD 3-clause “New” or “Revised” license": "bsd-3-clause",
    "BSD 3-clause Clear license": "bsd-3-clause-clear",
    "Computational Use of Data Agreement": "c-uda",
    "Creative Commons license family": "cc",
    "Creative Commons Zero v1.0 Universal": "cc0-1.0",
    "Creative Commons Attribution 2.0": "cc-by-2.0",
    "Creative Commons Attribution 2.5": "cc-by-2.5",
    "Creative Commons Attribution 3.0": "cc-by-3.0",
    "Creative Commons Attribution 4.0": "cc-by-4.0",
    "Creative Commons Attribution Share Alike 3.0": "cc-by-sa-3.0",
    "Creative Commons Attribution Share Alike 4.0": "cc-by-sa-4.0",
    "Creative Commons Attribution Non Commercial 2.0": "cc-by-nc-2.0",
    "Creative Commons Attribution Non Commercial 3.0": "cc-by-nc-3.0",
    "Creative Commons Attribution Non Commercial 4.0": "cc-by-nc-4.0",
    "Creative Commons Attribution No Derivatives 4.0": "cc-by-nd-4.0",
    "Creative Commons Attribution Non Commercial No Derivatives 3.0": "cc-by-nc-nd-3.0",
    "Creative Commons Attribution Non Commercial No Derivatives 4.0": "cc-by-nc-nd-4.0",
    "Creative Commons Attribution Non Commercial Share Alike 2.0": "cc-by-nc-sa-2.0",
    "Creative Commons Attribution Non Commercial Share Alike 3.0": "cc-by-nc-sa-3.0",
    "Creative Commons Attribution Non Commercial Share Alike 4.0": "cc-by-nc-sa-4.0",
    "Community Data License Agreement – Sharing, Version 1.0": "cdla-sharing-1.0",
    "Community Data License Agreement – Permissive, Version 1.0": "cdla-permissive-1.0",
    "Community Data License Agreement – Permissive, Version 2.0": "cdla-permissive-2.0",
    "Do What The F*ck You Want To Public License": "wtfpl",
    "Educational Community License v2.0": "ecl-2.0",
    "Eclipse Public License 1.0": "epl-1.0",
    "Eclipse Public License 2.0": "epl-2.0",
    "European Union Public License 1.1": "eupl-1.1",
    "GNU Affero General Public License v3.0": "agpl-3.0",
    "GNU Free Documentation License family": "gfdl",
    "GNU General Public License family": "gpl",
    "GNU General Public License v2.0": "gpl-2.0",
    "GNU General Public License v3.0": "gpl-3.0",
    "GNU Lesser General Public License family": "lgpl",
    "GNU Lesser General Public License v2.1": "lgpl-2.1",
    "GNU Lesser General Public License v3.0": "lgpl-3.0",
    "ISC": "isc",
    "LaTeX Project Public License v1.3c": "lppl-1.3c",
    "Microsoft Public License": "ms-pl",
    "Mozilla Public License 2.0": "mpl-2.0",
    "Open Data Commons License Attribution family": "odc-by",
    "Open Database License family": "odbl",
    "Open Rail++-M License": "openrail++",
    "Open Software License 3.0": "osl-3.0",
    "PostgreSQL License": "postgresql",
    "SIL Open Font License 1.1": "ofl-1.1",
    "University of Illinois/NCSA Open Source License": "ncsa",
    "The Unlicense": "unlicense",
    "zLib License": "zlib",
    "Open Data Commons Public Domain Dedication and License": "pddl",
    "Lesser General Public License For Linguistic Resources": "lgpl-lr",
    "DeepFloyd IF Research License Agreement": "deepfloyd-if-license",
    "Llama 2 Community License Agreement": "llama2",
}


def find_license_index(code: str) -> int | None:
    """Finds the index in the list of LICENSES."""
    for index, license_code in enumerate(LICENSES.values()):
        if license_code == code:
            return index
    return None


class MetadataEvent(enum.Enum):
    """Event that triggers a metadata change."""

    NAME = "NAME"
    DESCRIPTION = "DESCRIPTION"
    DATE_PUBLISHED = "DATE_PUBLISHED"
    URL = "URL"
    LICENSE = "LICENSE"
    CITATION = "CITATION"
    VERSION = "VERSION"
    DATA_BIASES = "DATA_BIASES"
    DATA_COLLECTION = "DATA_COLLECTION"
    PERSONAL_SENSITIVE_INFORMATION = "PERSONAL_SENSITIVE_INFORMATION"
    CREATOR_ADD = "CREATOR_ADD"
    CREATOR_NAME = "CREATOR_NAME"
    CREATOR_URL = "CREATOR_URL"
    CREATOR_REMOVE = "CREATOR_REMOVE"


def handle_metadata_change(event: MetadataEvent, metadata: Metadata, key: str):
    if event == MetadataEvent.NAME:
        metadata.name = find_unique_name(set(), st.session_state[key])
    elif event == MetadataEvent.DESCRIPTION:
        metadata.description = st.session_state[key]
    elif event == MetadataEvent.LICENSE:
        metadata.license = LICENSES.get(st.session_state[key])
    elif event == MetadataEvent.CITATION:
        metadata.citation = st.session_state[key]
    elif event == MetadataEvent.URL:
        metadata.url = st.session_state[key]
    elif event == MetadataEvent.VERSION:
        metadata.version = st.session_state[key]
    elif event == MetadataEvent.DATA_BIASES:
        metadata.data_biases = st.session_state[key]
    elif event == MetadataEvent.DATA_COLLECTION:
        metadata.data_collection = st.session_state[key]
    elif event == MetadataEvent.PERSONAL_SENSITIVE_INFORMATION:
        metadata.personal_sensitive_information = st.session_state[key]
    elif event == MetadataEvent.DATE_PUBLISHED:
        date = st.session_state[key]
        metadata.date_published = datetime.datetime(date.year, date.month, date.day)
    elif event == MetadataEvent.CREATOR_ADD:
        metadata.creator = mlc.PersonOrOrganization()
    elif event == MetadataEvent.CREATOR_REMOVE:
        metadata.creator = None
    elif event == MetadataEvent.CREATOR_NAME:
        if not metadata.creator:
            metadata.creator = mlc.PersonOrOrganization()
        metadata.creator.name = st.session_state[key]
    elif event == MetadataEvent.CREATOR_URL:
        if not metadata.creator:
            metadata.creator = mlc.PersonOrOrganization()
        metadata.creator.url = st.session_state[key]