Spaces:
Running
Running
File size: 1,602 Bytes
cb5b71d 0c5b67f cb5b71d db55b72 cb5b71d 0c5b67f dc92053 0c5b67f cc8c6fc 0c5b67f cb5b71d 0c5b67f db55b72 0c5b67f bc133ae 0c5b67f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import os
from etils import epath
import streamlit as st
from core.constants import EDITOR_CACHE
from core.past_projects import save_current_project
from core.state import Metadata
import mlcroissant as mlc
_JSON_LD_INFO = """You can download JSON-LD Croissant files from major dataset
providers:
- [Kaggle](https://www.kaggle.com/datasets) embeds Croissant JSON-LD directly in their
HTML.
- [OpenML](https://www.openml.org/search?type=data) offers a 🥐 button on all of their
datasets.
- [Hugging Face](https://huggingface.co/) offers an
[API endpoint](https://datasets-server.huggingface.co/croissant?dataset=${dataset_id) to
build a Croissant JSON-LD."""
def _on_file_upload(key):
"""Triggers when a new file gets uploaded to load the Croissant metadata."""
file = st.session_state[key]
file_cont = file.read()
# TODO(marcenacp): The Python library should support loading from an open file/dict.
newfile_name = EDITOR_CACHE / "loaded_croissant"
os.makedirs(os.path.dirname(newfile_name), exist_ok=True)
with open(newfile_name, mode="wb+") as outfile:
outfile.write(file_cont)
try:
dataset = mlc.Dataset(newfile_name)
st.session_state[Metadata] = Metadata.from_canonical(dataset.metadata)
save_current_project()
except mlc.ValidationError as e:
st.warning(e)
st.toast(body="Invalid Croissant File!", icon="🔥")
def render_load():
key = "json-ld-file-upload"
st.info(_JSON_LD_INFO)
st.file_uploader(
"Drop a JSON-LD", type="json", key=key, on_change=_on_file_upload, args=(key,)
)
|