File size: 1,602 Bytes
cb5b71d
 
 
 
 
0c5b67f
cb5b71d
 
 
 
db55b72
 
 
 
 
 
 
 
 
 
 
cb5b71d
0c5b67f
dc92053
0c5b67f
 
 
 
 
 
 
 
 
 
cc8c6fc
0c5b67f
 
 
 
 
cb5b71d
0c5b67f
db55b72
0c5b67f
bc133ae
0c5b67f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os

from etils import epath
import streamlit as st

from core.constants import EDITOR_CACHE
from core.past_projects import save_current_project
from core.state import Metadata
import mlcroissant as mlc

_JSON_LD_INFO = """You can download JSON-LD Croissant files from major dataset
providers:

- [Kaggle](https://www.kaggle.com/datasets) embeds Croissant JSON-LD directly in their
HTML.
- [OpenML](https://www.openml.org/search?type=data) offers a 🥐 button on all of their
datasets.
- [Hugging Face](https://huggingface.co/) offers an
[API endpoint](https://datasets-server.huggingface.co/croissant?dataset=${dataset_id) to
build a Croissant JSON-LD."""


def _on_file_upload(key):
    """Triggers when a new file gets uploaded to load the Croissant metadata."""
    file = st.session_state[key]
    file_cont = file.read()
    # TODO(marcenacp): The Python library should support loading from an open file/dict.
    newfile_name = EDITOR_CACHE / "loaded_croissant"
    os.makedirs(os.path.dirname(newfile_name), exist_ok=True)
    with open(newfile_name, mode="wb+") as outfile:
        outfile.write(file_cont)
    try:
        dataset = mlc.Dataset(newfile_name)
        st.session_state[Metadata] = Metadata.from_canonical(dataset.metadata)
        save_current_project()
    except mlc.ValidationError as e:
        st.warning(e)
        st.toast(body="Invalid Croissant File!", icon="🔥")


def render_load():
    key = "json-ld-file-upload"
    st.info(_JSON_LD_INFO)
    st.file_uploader(
        "Drop a JSON-LD", type="json", key=key, on_change=_on_file_upload, args=(key,)
    )