File size: 2,113 Bytes
d151dad
5d3f533
d151dad
 
 
6bfef54
 
d151dad
 
1ac7123
 
e4cf83d
d151dad
e4cf83d
d151dad
 
 
 
ab3118d
d151dad
 
 
 
 
3b67f98
6bfef54
25a4dd3
92cb149
177d46a
 
 
 
 
 
 
 
1ac7123
177d46a
ab3118d
 
 
 
6bfef54
d151dad
1ac7123
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import pandas_profiling as pp
from huggingface_hub.hf_api import create_repo, upload_file
from huggingface_hub.repository import Repository
import gradio as gr
import pandas as pd
import subprocess
import os
import tempfile

description = "This Space will profile a dataset file that you drag and drop and push the profile report to your Hugging Face account. 🌟 
 The value in dataset name field you'll enter will be used in the namespace of the Space that will be pushed to your profile, so you can use it to version the reports too! πŸ™ŒπŸ» Feel free to open a discussion in case you have any feature requests. Dataset name you'll enter will be used for repository name so make sure it doesn't exist and it doesn't contain spaces."
title = "Dataset Profiler πŸͺ„βœ¨"
token = gr.Textbox(label = "Your Hugging Face Token")
username = gr.Textbox(label = "Your Hugging Face User Name")
dataset_name = gr.Textbox(label = "Dataset Name")
dataset = gr.File(label = "Dataset")
output_text = gr.Textbox(label = "Status")


def profile_dataset(dataset, username, token, dataset_name):

    df = pd.read_csv(dataset.name)
    profile = pp.ProfileReport(df, title=f"{dataset_name} Report")
    
    repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)
    
    profile.to_file("./index.html")
    upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
    readme = f"---
title: {dataset_name}
emoji: ✨
colorFrom: green
colorTo: red
sdk: static
pinned: false
tags:
- dataset-report
---"    
    with open("README.md", "w+") as f:
        f.write(readme)
    upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)

    return f"Your dataset report will be ready at {repo_url}"

gr.Interface(profile_dataset,  title = title, description = description, inputs = [dataset, username, token, dataset_name], outputs=[output_text], enable_queue = True).launch(debug=True)