Spaces:

huggingface-projects
/

easy-analysis

Running

App Files Files Community

easy-analysis / app.py

merve HF Staff

Update app.py

c163a56 about 3 years ago

raw

history blame contribute delete

7.75 kB

	import gradio as gr
	import pandas as pd
	from huggingface_hub.hf_api import create_repo, upload_file, HfApi
	from huggingface_hub.repository import Repository
	import subprocess
	import os
	import tempfile
	import sweetviz as sv

	def analyze_datasets(dataset, dataset_name, token, column=None, pairwise="off"):
	df = pd.read_csv(dataset.name)
	username = HfApi().whoami(token=token)["name"]
	if column is not None:
	analyze_report = sv.analyze(df, target_feat=column, pairwise_analysis=pairwise)
	else:
	analyze_report = sv.analyze(df, pairwise_analysis=pairwise)
	analyze_report.show_html('./index.html', open_browser=False)
	repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)

	upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
	readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
	with open("README.md", "w+") as f:
	f.write(readme)
	upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)

	return f"Your dataset report will be ready at {repo_url}"

	def compare_column_values(dataset, dataset_name, token, column, category):

	df = pd.read_csv(dataset.name)
	username = HfApi().whoami(token=token)["name"]
	arr = df[column].unique()
	arr = list(arr[arr != column])
	compare_report = sv.compare_intra(df, df[column] == category, arr[0])
	compare_report.show_html('./index.html', open_browser=False)

	repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)

	upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
	readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
	with open("README.md", "w+") as f:
	f.write(readme)
	upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)

	return f"Your dataset report will be ready at {repo_url}"

	def compare_dataset_splits(dataset, dataset_name, token, splits):
	df = pd.read_csv(dataset.name)
	train = df.sample(frac=splits)
	test = df.loc[df.index.difference(train.index)]
	username = HfApi().whoami(token=token)["name"]

	compare_report = sv.compare([train, "Training Data"], [test, "Test Data"])
	compare_report.show_html('./index.html', open_browser=False)

	repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)

	upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
	readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
	with open("README.md", "w+") as f:
	f.write(readme)
	upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)

	return f"Your dataset report will be ready at {repo_url}"



	with gr.Blocks() as demo:
	main_title = gr.Markdown("""# Easy Analysis🪄🌟✨""")
	main_desc = gr.Markdown("""This app enables you to run three type of dataset analysis and pushes the interactive reports to your Hugging Face Hub profile as a Space. It uses SweetViz in the back.""")
	with gr.Tabs():
	with gr.TabItem("Analyze") as analyze:
	with gr.Row():
	with gr.Column():
	title = gr.Markdown(""" ## Analyze Dataset """)
	description = gr.Markdown("Analyze a dataset or predictive variables against a target variable in a dataset (enter a column name to column section if you want to compare against target value). You can also do pairwise analysis, but it has quadratic complexity.")
	dataset = gr.File(label = "Dataset")
	column = gr.Text(label = "Compare dataset against a target variable (Optional)")
	pairwise = gr.Radio(["off", "on"], label = "Enable pairwise analysis")
	token = gr.Textbox(label = "Your Hugging Face Token")
	dataset_name = gr.Textbox(label = "Dataset Name")
	pushing_desc = gr.Markdown("This app needs your Hugging Face Hub token and a unique name for your dataset report.")
	inference_run = gr.Button("Infer")
	inference_progress = gr.StatusTracker(cover_container=True)
	outcome = gr.outputs.Textbox()
	inference_run.click(
	analyze_datasets,
	inputs=[dataset, dataset_name, token, column, pairwise],
	outputs=outcome,
	status_tracker=inference_progress,
	)
	with gr.TabItem("Compare Splits") as compare_splits:
	with gr.Row():
	with gr.Column():
	title = gr.Markdown(""" ## Compare Splits""")
	description = gr.Markdown("Split a dataset and compare splits. You need to give a fraction, e.g. 0.8.")
	dataset = gr.File(label = "Dataset")
	split_ratio = gr.Number(label = "Split Ratios")
	pushing_desc = gr.Markdown("This app needs your Hugging Face Hub token and a unique name for your dataset report.")
	token = gr.Textbox(label = "Your Hugging Face Token")
	dataset_name = gr.Textbox(label = "Dataset Name")
	inference_run = gr.Button("Infer")
	inference_progress = gr.StatusTracker(cover_container=True)

	outcome = gr.outputs.Textbox()
	inference_run.click(
	compare_dataset_splits,
	inputs=[dataset, dataset_name, token, split_ratio],
	outputs=outcome,
	status_tracker=inference_progress,
	)

	with gr.TabItem("Compare Subsets") as compare_subsets:
	with gr.Row():
	with gr.Column():
	title = gr.Markdown(""" ## Compare Subsets""")
	description = gr.Markdown("Compare subsets of a dataset, e.g. you can pick Age Group column and compare adult category against young.")
	dataset = gr.File(label = "Dataset")
	column = gr.Text(label = "Enter column:")
	category = gr.Text(label = "Enter category:")
	pushing_desc = gr.Markdown("This app needs your Hugging Face Hub token and a unique name for your dataset report.")
	token = gr.Textbox(label = "Your Hugging Face Token")
	dataset_name = gr.Textbox(label = "Dataset Name")
	inference_run = gr.Button("Run Analysis")
	inference_progress = gr.StatusTracker(cover_container=True)

	outcome = gr.outputs.Textbox()
	inference_run.click(
	compare_column_values,
	inputs=[dataset, dataset_name, token, column, category ],
	outputs=outcome,
	status_tracker=inference_progress,
	)

	demo.launch(debug=True)