Spaces:
Running
Running
File size: 6,480 Bytes
36f4fe3 cb5b71d bc133ae cb5b71d 36f4fe3 cb5b71d e92e659 cb5b71d 36f4fe3 cb5b71d bc133ae 36f4fe3 cb5b71d bc133ae cb5b71d e92e659 cb5b71d bc133ae 36f4fe3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import datetime
import enum
import streamlit as st
from core.names import find_unique_name
from core.state import Metadata
import mlcroissant as mlc
# List from:
LICENSES_URL = "https://huggingface.co/docs/hub/repositories-licenses"
LICENSES = {
"Unknown": "unknown",
"Other": "other",
"Apache license 2.0": "apache-2.0",
"MIT": "mit",
"OpenRAIL license family": "openrail",
"BigScience OpenRAIL-M": "bigscience-openrail-m",
"CreativeML OpenRAIL-M": "creativeml-openrail-m",
"BigScience BLOOM RAIL 1.0": "bigscience-bloom-rail-1.0",
"BigCode Open RAIL-M v1": "bigcode-openrail-m",
"Academic Free License v3.0": "afl-3.0",
"Artistic license 2.0": "artistic-2.0",
"Boost Software License 1.0": "bsl-1.0",
"BSD license family": "bsd",
"BSD 2-clause “Simplified” license": "bsd-2-clause",
"BSD 3-clause “New” or “Revised” license": "bsd-3-clause",
"BSD 3-clause Clear license": "bsd-3-clause-clear",
"Computational Use of Data Agreement": "c-uda",
"Creative Commons license family": "cc",
"Creative Commons Zero v1.0 Universal": "cc0-1.0",
"Creative Commons Attribution 2.0": "cc-by-2.0",
"Creative Commons Attribution 2.5": "cc-by-2.5",
"Creative Commons Attribution 3.0": "cc-by-3.0",
"Creative Commons Attribution 4.0": "cc-by-4.0",
"Creative Commons Attribution Share Alike 3.0": "cc-by-sa-3.0",
"Creative Commons Attribution Share Alike 4.0": "cc-by-sa-4.0",
"Creative Commons Attribution Non Commercial 2.0": "cc-by-nc-2.0",
"Creative Commons Attribution Non Commercial 3.0": "cc-by-nc-3.0",
"Creative Commons Attribution Non Commercial 4.0": "cc-by-nc-4.0",
"Creative Commons Attribution No Derivatives 4.0": "cc-by-nd-4.0",
"Creative Commons Attribution Non Commercial No Derivatives 3.0": "cc-by-nc-nd-3.0",
"Creative Commons Attribution Non Commercial No Derivatives 4.0": "cc-by-nc-nd-4.0",
"Creative Commons Attribution Non Commercial Share Alike 2.0": "cc-by-nc-sa-2.0",
"Creative Commons Attribution Non Commercial Share Alike 3.0": "cc-by-nc-sa-3.0",
"Creative Commons Attribution Non Commercial Share Alike 4.0": "cc-by-nc-sa-4.0",
"Community Data License Agreement – Sharing, Version 1.0": "cdla-sharing-1.0",
"Community Data License Agreement – Permissive, Version 1.0": "cdla-permissive-1.0",
"Community Data License Agreement – Permissive, Version 2.0": "cdla-permissive-2.0",
"Do What The F*ck You Want To Public License": "wtfpl",
"Educational Community License v2.0": "ecl-2.0",
"Eclipse Public License 1.0": "epl-1.0",
"Eclipse Public License 2.0": "epl-2.0",
"European Union Public License 1.1": "eupl-1.1",
"GNU Affero General Public License v3.0": "agpl-3.0",
"GNU Free Documentation License family": "gfdl",
"GNU General Public License family": "gpl",
"GNU General Public License v2.0": "gpl-2.0",
"GNU General Public License v3.0": "gpl-3.0",
"GNU Lesser General Public License family": "lgpl",
"GNU Lesser General Public License v2.1": "lgpl-2.1",
"GNU Lesser General Public License v3.0": "lgpl-3.0",
"ISC": "isc",
"LaTeX Project Public License v1.3c": "lppl-1.3c",
"Microsoft Public License": "ms-pl",
"Mozilla Public License 2.0": "mpl-2.0",
"Open Data Commons License Attribution family": "odc-by",
"Open Database License family": "odbl",
"Open Rail++-M License": "openrail++",
"Open Software License 3.0": "osl-3.0",
"PostgreSQL License": "postgresql",
"SIL Open Font License 1.1": "ofl-1.1",
"University of Illinois/NCSA Open Source License": "ncsa",
"The Unlicense": "unlicense",
"zLib License": "zlib",
"Open Data Commons Public Domain Dedication and License": "pddl",
"Lesser General Public License For Linguistic Resources": "lgpl-lr",
"DeepFloyd IF Research License Agreement": "deepfloyd-if-license",
"Llama 2 Community License Agreement": "llama2",
}
def find_license_index(code: str) -> int | None:
"""Finds the index in the list of LICENSES."""
for index, license_code in enumerate(LICENSES.values()):
if license_code == code:
return index
return None
class MetadataEvent(enum.Enum):
"""Event that triggers a metadata change."""
NAME = "NAME"
DESCRIPTION = "DESCRIPTION"
DATE_PUBLISHED = "DATE_PUBLISHED"
URL = "URL"
LICENSE = "LICENSE"
CITATION = "CITATION"
VERSION = "VERSION"
DATA_BIASES = "DATA_BIASES"
DATA_COLLECTION = "DATA_COLLECTION"
PERSONAL_SENSITIVE_INFORMATION = "PERSONAL_SENSITIVE_INFORMATION"
CREATOR_ADD = "CREATOR_ADD"
CREATOR_NAME = "CREATOR_NAME"
CREATOR_URL = "CREATOR_URL"
CREATOR_REMOVE = "CREATOR_REMOVE"
def handle_metadata_change(event: MetadataEvent, metadata: Metadata, key: str):
if event == MetadataEvent.NAME:
metadata.name = find_unique_name(set(), st.session_state[key])
elif event == MetadataEvent.DESCRIPTION:
metadata.description = st.session_state[key]
elif event == MetadataEvent.LICENSE:
metadata.license = LICENSES.get(st.session_state[key])
elif event == MetadataEvent.CITATION:
metadata.citation = st.session_state[key]
elif event == MetadataEvent.URL:
metadata.url = st.session_state[key]
elif event == MetadataEvent.VERSION:
metadata.version = st.session_state[key]
elif event == MetadataEvent.DATA_BIASES:
metadata.data_biases = st.session_state[key]
elif event == MetadataEvent.DATA_COLLECTION:
metadata.data_collection = st.session_state[key]
elif event == MetadataEvent.PERSONAL_SENSITIVE_INFORMATION:
metadata.personal_sensitive_information = st.session_state[key]
elif event == MetadataEvent.DATE_PUBLISHED:
date = st.session_state[key]
metadata.date_published = datetime.datetime(date.year, date.month, date.day)
elif event == MetadataEvent.CREATOR_ADD:
metadata.creator = mlc.PersonOrOrganization()
elif event == MetadataEvent.CREATOR_REMOVE:
metadata.creator = None
elif event == MetadataEvent.CREATOR_NAME:
if not metadata.creator:
metadata.creator = mlc.PersonOrOrganization()
metadata.creator.name = st.session_state[key]
elif event == MetadataEvent.CREATOR_URL:
if not metadata.creator:
metadata.creator = mlc.PersonOrOrganization()
metadata.creator.url = st.session_state[key]
|