|
import pandas_profiling as pp |
|
from huggingface_hub.hf_api import create_repo |
|
from huggingface_hub.repository import Repository |
|
import gradio as gr |
|
import pandas as pd |
|
import subprocess |
|
import os |
|
import tempfile |
|
|
|
token = gr.Textbox(label = "Your Hugging Face Token") |
|
username = gr.Textbox(label = "Your Hugging Face User name") |
|
dataset_name = gr.Textbox(label = "Dataset Name") |
|
dataset = gr.File(label = "Dataset") |
|
output_text = gr.Textbox(label = "Status") |
|
|
|
def profile_dataset(dataset, username, token, dataset_name): |
|
|
|
df = pd.read_csv(dataset.name) |
|
profile = pp.ProfileReport(df, title=f"{dataset_name} Report") |
|
|
|
repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static") |
|
|
|
subprocess.run( |
|
f"git clone {repo_url}".split(), |
|
encoding="utf-8", |
|
cwd= os.getcwd(), |
|
check=True, |
|
env=os.environ.copy(), |
|
) |
|
|
|
repo = Repository( |
|
clone_from = repo_url, |
|
local_dir=f"{username}/{dataset_name}", |
|
git_user = "merve", |
|
git_email = "[email protected]", |
|
) |
|
|
|
profile.to_file(f"{username}/{dataset_name}/index.html") |
|
repo.git_add() |
|
repo.git_commit(commit_message = "Dataset report") |
|
repo.git_push() |
|
return f"Your dataset report will be ready at {repo_url}" |
|
|
|
gr.Interface(profile_dataset, inputs = [dataset, username, token, dataset_name], outputs=[output_text], enable_queue = True).launch(debug=True) |