File size: 1,457 Bytes
d151dad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pandas_profiling as pp
from huggingface_hub.hf_api import create_repo
from huggingface_hub.repository import Repository
import gradio as gr
import pandas as pd
import tempfile

token = gr.Textbox(label = "Your Hugging Face Token")
username = gr.Textbox(label = "Your Hugging Face User name")
dataset_name = gr.Textbox(label = "Dataset Name")
dataset = gr.File(label = "Dataset")
output_text = gr.Textbox(label = "Status")
title = "Dataset Profiler πŸͺ„βœ¨"
description = "Drag and drop any dataset you want to get a detailed profile on, and this Space will profile and push it to your Hub profile as a new Space. 🌟✨"

def profile_dataset(dataset, username, token, dataset_name):

    df = pd.read_csv(dataset.name)
    profile = pp.ProfileReport(df, title=f"{dataset_name} Report")
    
    url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static")
    repo = Repository(
        local_dir = f"{username}/{dataset_name}",
        clone_from=url,
        use_auth_token=token,
        repo_type = "space"
    )
    repo.git_pull(rebase=True)
    profile.to_file(f"{username}/{dataset_name}/index.html")
    repo.git_add()
    repo.git_commit(commit_message = "Dataset report")
    repo.git_push()
    return f"Your dataset report will be ready at {url}"

gr.Interface(profile_dataset, inputs = [dataset, username, token, dataset_name], outputs=[output_text], enable_queue = True).launch(debug=True)