File size: 3,486 Bytes
cdb761d 839621c cdb761d 839621c cdb761d 839621c 8c543d4 cdb761d f474299 cdb761d 8c543d4 839621c cdb761d f92d1a9 839621c cdb761d 839621c cdb761d 839621c f92d1a9 cdb761d 839621c cdb761d f92d1a9 839621c f92d1a9 cdb761d f92d1a9 839621c cdb761d 839621c cdb761d 839621c cdb761d 839621c cdb761d 811432c cdb761d 811432c cdb761d 811432c cdb761d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import time
from hub import (
setup_dataset_on_hub,
duplicate_space_on_hub,
add_project_config_to_space_repo,
)
import streamlit as st
# Constants
# Written here to avoid defaults.py
DEFAULT_DOMAIN = "farming"
st.set_page_config(
"Domain Data Grower", page_icon="π§βπΎ", initial_sidebar_state="collapsed"
)
st.header("π§βπΎ Domain Data Grower")
st.divider()
st.sidebar.link_button(
"π€ Get your Hub Token", "https://huggingface.co/settings/tokens"
)
################################################################################
# APP MARKDOWN
################################################################################
st.header("π± Create a domain specific dataset")
st.markdown(
"""This space will set up your domain specific dataset project. It will
create the resources that you need to build a dataset. Those resources include:
- A dataset repository on the Hub
- Another space to define expert domain and run generation pipelines
For a complete overview of the project. Check out the README
"""
)
st.page_link(
"pages/π§βπΎ Domain Data Grower.py",
label="Domain Data Grower",
icon="π§βπΎ",
)
################################################################################
# CONFIGURATION
################################################################################
st.subheader("πΎ Project Configuration")
project_name = st.text_input("Project Name", DEFAULT_DOMAIN)
hub_username = st.text_input("Hub Username", "argilla")
hub_token = st.text_input("Hub Token", type="password")
private_selector = st.checkbox("Private Space", value=False)
if st.button("π€ Setup Project Resources"):
repo_id = f"{hub_username}/{project_name}"
setup_dataset_on_hub(
repo_id=repo_id,
hub_token=hub_token,
)
st.success(
f"Dataset seed created and pushed to the Hub. Check it out [here](https://huggingface.co/datasets/{hub_username}/{project_name}). Hold on the repo_id: {repo_id}, we will need it in the next steps."
)
space_name = f"{project_name}_config_space"
duplicate_space_on_hub(
source_repo="argilla/domain-specific-datasets-template",
target_repo=space_name,
hub_token=hub_token,
private=private_selector,
)
st.success(
f"Configuration Space created. Check it out [here](https://huggingface.co/spaces/{hub_username}/{space_name})."
)
argilla_name = f"{project_name}_argilla_space"
duplicate_space_on_hub(
source_repo="argilla/argilla-template-space",
target_repo=argilla_name,
hub_token=hub_token,
private=private_selector,
)
st.success(
f"Argilla Space created. Check it out [here](https://huggingface.co/spaces/{hub_username}/{argilla_name})."
)
seconds = 5
with st.spinner(f"Adding project configuration to spaces in {seconds} seconds"):
time.sleep(seconds)
add_project_config_to_space_repo(
dataset_repo_id=repo_id,
hub_token=hub_token,
project_name=project_name,
argilla_space_repo_id=f"{hub_username}/{argilla_name}",
project_space_repo_id=f"{hub_username}/{space_name}",
)
st.subheader("π’ Next Steps")
st.write("Go to you project specific space!")
st.link_button(
"π§βπΎ Open Configuration Space",
f"https://huggingface.co/spaces/{hub_username}/{space_name}",
)
|