Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
from huggingface_hub.hf_api import create_repo, upload_file, HfApi | |
from huggingface_hub.repository import Repository | |
import subprocess | |
import os | |
import tempfile | |
import sweetviz as sv | |
def analyze_datasets(dataset, dataset_name, token, column=None, pairwise="off"): | |
df = pd.read_csv(dataset.name) | |
username = HfApi().whoami(token=token)["name"] | |
if column is not None: | |
analyze_report = sv.analyze(df, target_feat=column, pairwise_analysis=pairwise) | |
else: | |
analyze_report = sv.analyze(df, pairwise_analysis=pairwise) | |
analyze_report.show_html('./index.html', open_browser=False) | |
repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False) | |
upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token) | |
readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---" | |
with open("README.md", "w+") as f: | |
f.write(readme) | |
upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token) | |
return f"Your dataset report will be ready at {repo_url}" | |
def compare_column_values(dataset, dataset_name, token, column, category): | |
df = pd.read_csv(dataset.name) | |
username = HfApi().whoami(token=token)["name"] | |
arr = df[column].unique() | |
arr = list(arr[arr != column]) | |
compare_report = sv.compare_intra(df, df[column] == category, arr[0]) | |
compare_report.show_html('./index.html', open_browser=False) | |
repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False) | |
upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token) | |
readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---" | |
with open("README.md", "w+") as f: | |
f.write(readme) | |
upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token) | |
return f"Your dataset report will be ready at {repo_url}" | |
def compare_dataset_splits(dataset, dataset_name, token, splits): | |
df = pd.read_csv(dataset.name) | |
train = df.sample(frac=splits) | |
test = df.loc[df.index.difference(train.index)] | |
username = HfApi().whoami(token=token)["name"] | |
compare_report = sv.compare([train, "Training Data"], [test, "Test Data"]) | |
compare_report.show_html('./index.html', open_browser=False) | |
repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False) | |
upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token) | |
readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---" | |
with open("README.md", "w+") as f: | |
f.write(readme) | |
upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token) | |
return f"Your dataset report will be ready at {repo_url}" | |
with gr.Blocks() as demo: | |
main_title = gr.Markdown("""# Easy Analysis🪄🌟✨""") | |
main_desc = gr.Markdown("""This app enables you to run three type of dataset analysis and pushes the interactive reports to your Hugging Face Hub profile as a Space. It uses SweetViz in the back.""") | |
with gr.Tabs(): | |
with gr.TabItem("Analyze") as analyze: | |
with gr.Row(): | |
with gr.Column(): | |
title = gr.Markdown(""" ## Analyze Dataset """) | |
description = gr.Markdown("Analyze a dataset or predictive variables against a target variable in a dataset (enter a column name to column section if you want to compare against target value). You can also do pairwise analysis, but it has quadratic complexity.") | |
dataset = gr.File(label = "Dataset") | |
column = gr.Text(label = "Compare dataset against a target variable (Optional)") | |
pairwise = gr.Radio(["off", "on"], label = "Enable pairwise analysis") | |
token = gr.Textbox(label = "Your Hugging Face Token") | |
dataset_name = gr.Textbox(label = "Dataset Name") | |
pushing_desc = gr.Markdown("This app needs your Hugging Face Hub token and a unique name for your dataset report.") | |
inference_run = gr.Button("Infer") | |
inference_progress = gr.StatusTracker(cover_container=True) | |
outcome = gr.outputs.Textbox() | |
inference_run.click( | |
analyze_datasets, | |
inputs=[dataset, dataset_name, token, column, pairwise], | |
outputs=outcome, | |
status_tracker=inference_progress, | |
) | |
with gr.TabItem("Compare Splits") as compare_splits: | |
with gr.Row(): | |
with gr.Column(): | |
title = gr.Markdown(""" ## Compare Splits""") | |
description = gr.Markdown("Split a dataset and compare splits. You need to give a fraction, e.g. 0.8.") | |
dataset = gr.File(label = "Dataset") | |
split_ratio = gr.Number(label = "Split Ratios") | |
pushing_desc = gr.Markdown("This app needs your Hugging Face Hub token and a unique name for your dataset report.") | |
token = gr.Textbox(label = "Your Hugging Face Token") | |
dataset_name = gr.Textbox(label = "Dataset Name") | |
inference_run = gr.Button("Infer") | |
inference_progress = gr.StatusTracker(cover_container=True) | |
outcome = gr.outputs.Textbox() | |
inference_run.click( | |
compare_dataset_splits, | |
inputs=[dataset, dataset_name, token, split_ratio], | |
outputs=outcome, | |
status_tracker=inference_progress, | |
) | |
with gr.TabItem("Compare Subsets") as compare_subsets: | |
with gr.Row(): | |
with gr.Column(): | |
title = gr.Markdown(""" ## Compare Subsets""") | |
description = gr.Markdown("Compare subsets of a dataset, e.g. you can pick Age Group column and compare adult category against young.") | |
dataset = gr.File(label = "Dataset") | |
column = gr.Text(label = "Enter column:") | |
category = gr.Text(label = "Enter category:") | |
pushing_desc = gr.Markdown("This app needs your Hugging Face Hub token and a unique name for your dataset report.") | |
token = gr.Textbox(label = "Your Hugging Face Token") | |
dataset_name = gr.Textbox(label = "Dataset Name") | |
inference_run = gr.Button("Run Analysis") | |
inference_progress = gr.StatusTracker(cover_container=True) | |
outcome = gr.outputs.Textbox() | |
inference_run.click( | |
compare_column_values, | |
inputs=[dataset, dataset_name, token, column, category ], | |
outputs=outcome, | |
status_tracker=inference_progress, | |
) | |
demo.launch(debug=True) |