Spaces:
Sleeping
Sleeping
Commit
·
9d04ba5
1
Parent(s):
1cb8519
Refactor code to show files and directories in the
Browse files
app.py
CHANGED
@@ -106,10 +106,11 @@ def list_git_repo_files_and_directories(repo_url: str, branch: str = "main"):
|
|
106 |
return get_files_and_directories(response)
|
107 |
|
108 |
|
109 |
-
def
|
110 |
with contextlib.suppress(Exception):
|
111 |
files_and_directories = list_git_repo_files_and_directories(url)
|
112 |
directories = files_and_directories.get("directories", [])
|
|
|
113 |
print(directories)
|
114 |
return gr.Dropdown(
|
115 |
label="Directories",
|
@@ -118,34 +119,76 @@ def show_directories(url: str):
|
|
118 |
visible=True,
|
119 |
interactive=True,
|
120 |
multiselect=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
)
|
122 |
|
123 |
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
gr.Markdown("URL for the GitHub repository where the dataset is currently hosted")
|
127 |
source_github_repository = gr.Textbox(lines=1, label="Source GitHub Repository URL")
|
128 |
-
gr.Markdown("
|
|
|
|
|
|
|
129 |
folder_in_github_repo = gr.Dropdown(
|
130 |
None,
|
131 |
-
label="Folder in GitHub Repository to
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
allow_custom_value=True,
|
133 |
visible=True,
|
134 |
)
|
135 |
source_github_repository.change(
|
136 |
-
|
|
|
|
|
137 |
)
|
138 |
-
gr.Markdown("Destination
|
|
|
139 |
destination_hf_hub_repository = gr.Textbox(
|
140 |
label="Destination Hugging Face Repository",
|
141 |
-
placeholder="username
|
142 |
)
|
|
|
143 |
gr.Markdown(
|
144 |
"""You need to provide a token with write access to the namespace you want to upload to.
|
145 |
-
You can generate
|
146 |
)
|
147 |
hf_token = gr.Textbox(label="Hugging Face Token", type="password")
|
148 |
-
summit_btn = gr.Button()
|
149 |
result = gr.Markdown(label="Summary", visible=True)
|
150 |
summit_btn.click(
|
151 |
push_to_hf,
|
|
|
106 |
return get_files_and_directories(response)
|
107 |
|
108 |
|
109 |
+
def show_files_and_directories(url: str):
|
110 |
with contextlib.suppress(Exception):
|
111 |
files_and_directories = list_git_repo_files_and_directories(url)
|
112 |
directories = files_and_directories.get("directories", [])
|
113 |
+
files = files_and_directories.get("files", [])
|
114 |
print(directories)
|
115 |
return gr.Dropdown(
|
116 |
label="Directories",
|
|
|
119 |
visible=True,
|
120 |
interactive=True,
|
121 |
multiselect=True,
|
122 |
+
), gr.Dropdown(
|
123 |
+
label="Files",
|
124 |
+
choices=files,
|
125 |
+
max_choices=1,
|
126 |
+
visible=True,
|
127 |
+
interactive=True,
|
128 |
+
multiselect=True,
|
129 |
)
|
130 |
|
131 |
|
132 |
+
html_text_app_description = """
|
133 |
+
Whilst GitHub is great for hosting code the Hugging Face Datasets Hub is a better place to host datasets.
|
134 |
+
Some of the benefits of hosting datasets on the Hugging Face Datasets Hub are:
|
135 |
+
<br>
|
136 |
+
<ul>
|
137 |
+
<li>Hosting for large datasets</li>
|
138 |
+
<li>An interactive preview of your dataset</li>
|
139 |
+
<li>Access to the dataset via many tools and libraries including; datasets, pandas, polars, dask and DuckDB</li>
|
140 |
+
</ul>
|
141 |
+
|
142 |
+
<br>
|
143 |
+
This app will help you migrate a dataset currently hosted on GitHub to the Hugging Face Datasets Hub.
|
144 |
+
"""
|
145 |
+
|
146 |
+
with gr.Blocks(theme=gr.themes.Base()) as demo:
|
147 |
+
gr.HTML(
|
148 |
+
"""<h1 style='text-align: center;'> GitHub to Hugging Face Hub Dataset Migration Tool</h1>
|
149 |
+
<center><i> ✨ Migrate a dataset in a few steps ✨</i></center>"""
|
150 |
+
)
|
151 |
+
gr.HTML(
|
152 |
+
"""<center> GitHub is a great place for sharing code but the Hugging Face Hub has many advantages for sharing datasets.
|
153 |
+
<br> This Space will guide you through the process of migrating a dataset from GitHub to the Hugging Face Hub. </center>"""
|
154 |
+
)
|
155 |
+
gr.Markdown("### Location of existing dataset")
|
156 |
gr.Markdown("URL for the GitHub repository where the dataset is currently hosted")
|
157 |
source_github_repository = gr.Textbox(lines=1, label="Source GitHub Repository URL")
|
158 |
+
gr.Markdown("### Select files and folder to migrate")
|
159 |
+
gr.Markdown(
|
160 |
+
"(Optional): select a specific folder and/or files to migrate from the GitHub repository."
|
161 |
+
)
|
162 |
folder_in_github_repo = gr.Dropdown(
|
163 |
None,
|
164 |
+
label="Folder in the GitHub Repository to migrate",
|
165 |
+
allow_custom_value=True,
|
166 |
+
visible=True,
|
167 |
+
)
|
168 |
+
files_in_github_repo = gr.Dropdown(
|
169 |
+
None,
|
170 |
+
label="Files in GitHub Repository to migrate",
|
171 |
allow_custom_value=True,
|
172 |
visible=True,
|
173 |
)
|
174 |
source_github_repository.change(
|
175 |
+
show_files_and_directories,
|
176 |
+
[source_github_repository],
|
177 |
+
[folder_in_github_repo, files_in_github_repo],
|
178 |
)
|
179 |
+
gr.Markdown("### Destination for your migrated dataset")
|
180 |
+
gr.Markdown("Destination repository for your dataset on the Hugging Face Hub")
|
181 |
destination_hf_hub_repository = gr.Textbox(
|
182 |
label="Destination Hugging Face Repository",
|
183 |
+
placeholder="i.e. <hugging face username>/<repository_name>",
|
184 |
)
|
185 |
+
gr.Markdown("## Authentication")
|
186 |
gr.Markdown(
|
187 |
"""You need to provide a token with write access to the namespace you want to upload to.
|
188 |
+
You can generate/access your Hugging FAce token from [here](https://huggingface.co/settings/token)."""
|
189 |
)
|
190 |
hf_token = gr.Textbox(label="Hugging Face Token", type="password")
|
191 |
+
summit_btn = gr.Button("Migrate Dataset")
|
192 |
result = gr.Markdown(label="Summary", visible=True)
|
193 |
summit_btn.click(
|
194 |
push_to_hf,
|