Spaces:

huggingface-projects
/

dataset-profiler

Running

File size: 1,363 Bytes

d151dad
 
 
 
 
6bfef54
 
d151dad
 
 
 
 
 
 
 
 
 
 
 
 
6bfef54
 
 
 
 
 
 
 
 
 
d151dad
6bfef54
 
d151dad
6bfef54
d151dad
 
 
 
6bfef54
d151dad
6bfef54

import pandas_profiling as pp
from huggingface_hub.hf_api import create_repo
from huggingface_hub.repository import Repository
import gradio as gr
import pandas as pd
import subprocess
import os
import tempfile

token = gr.Textbox(label = "Your Hugging Face Token")
username = gr.Textbox(label = "Your Hugging Face User name")
dataset_name = gr.Textbox(label = "Dataset Name")
dataset = gr.File(label = "Dataset")
output_text = gr.Textbox(label = "Status")

def profile_dataset(dataset, username, token, dataset_name):

    df = pd.read_csv(dataset.name)
    profile = pp.ProfileReport(df, title=f"{dataset_name} Report")
    
    repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static")
    
    subprocess.run(
    f"git clone {repo_url}".split(),
    encoding="utf-8",
    cwd= os.getcwd(),
    check=True,
    env=os.environ.copy(),
    )

    repo = Repository(
    clone_from = repo_url,
    local_dir=f"{username}/{dataset_name}"
    )
    
    profile.to_file(f"{username}/{dataset_name}/index.html")
    repo.git_add()
    repo.git_commit(commit_message = "Dataset report")
    repo.git_push()
    return f"Your dataset report will be ready at {repo_url}"

gr.Interface(profile_dataset, inputs = [dataset, username, token, dataset_name], outputs=[output_text], enable_queue = True).launch(debug=True)