Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,8 @@ from huggingface_hub.hf_api import create_repo
|
|
3 |
from huggingface_hub.repository import Repository
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
|
|
|
|
6 |
import tempfile
|
7 |
|
8 |
token = gr.Textbox(label = "Your Hugging Face Token")
|
@@ -10,26 +12,31 @@ username = gr.Textbox(label = "Your Hugging Face User name")
|
|
10 |
dataset_name = gr.Textbox(label = "Dataset Name")
|
11 |
dataset = gr.File(label = "Dataset")
|
12 |
output_text = gr.Textbox(label = "Status")
|
13 |
-
title = "Dataset Profiler πͺβ¨"
|
14 |
-
description = "Drag and drop any dataset you want to get a detailed profile on, and this Space will profile and push it to your Hub profile as a new Space. πβ¨"
|
15 |
|
16 |
def profile_dataset(dataset, username, token, dataset_name):
|
17 |
|
18 |
df = pd.read_csv(dataset.name)
|
19 |
profile = pp.ProfileReport(df, title=f"{dataset_name} Report")
|
20 |
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
repo = Repository(
|
23 |
-
|
24 |
-
|
25 |
-
use_auth_token=token,
|
26 |
-
repo_type = "space"
|
27 |
)
|
28 |
-
|
29 |
profile.to_file(f"{username}/{dataset_name}/index.html")
|
30 |
repo.git_add()
|
31 |
repo.git_commit(commit_message = "Dataset report")
|
32 |
repo.git_push()
|
33 |
-
return f"Your dataset report will be ready at {
|
34 |
|
35 |
-
gr.Interface(profile_dataset, inputs = [dataset, username, token, dataset_name],
|
|
|
3 |
from huggingface_hub.repository import Repository
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
6 |
+
import subprocess
|
7 |
+
import os
|
8 |
import tempfile
|
9 |
|
10 |
token = gr.Textbox(label = "Your Hugging Face Token")
|
|
|
12 |
dataset_name = gr.Textbox(label = "Dataset Name")
|
13 |
dataset = gr.File(label = "Dataset")
|
14 |
output_text = gr.Textbox(label = "Status")
|
|
|
|
|
15 |
|
16 |
def profile_dataset(dataset, username, token, dataset_name):
|
17 |
|
18 |
df = pd.read_csv(dataset.name)
|
19 |
profile = pp.ProfileReport(df, title=f"{dataset_name} Report")
|
20 |
|
21 |
+
repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static")
|
22 |
+
|
23 |
+
subprocess.run(
|
24 |
+
f"git clone {repo_url}".split(),
|
25 |
+
encoding="utf-8",
|
26 |
+
cwd= os.getcwd(),
|
27 |
+
check=True,
|
28 |
+
env=os.environ.copy(),
|
29 |
+
)
|
30 |
+
|
31 |
repo = Repository(
|
32 |
+
clone_from = repo_url,
|
33 |
+
local_dir=f"{username}/{dataset_name}"
|
|
|
|
|
34 |
)
|
35 |
+
|
36 |
profile.to_file(f"{username}/{dataset_name}/index.html")
|
37 |
repo.git_add()
|
38 |
repo.git_commit(commit_message = "Dataset report")
|
39 |
repo.git_push()
|
40 |
+
return f"Your dataset report will be ready at {repo_url}"
|
41 |
|
42 |
+
gr.Interface(profile_dataset, inputs = [dataset, username, token, dataset_name], outputs=[output_text], enable_queue = True).launch(debug=True)
|