merve HF staff commited on
Commit
0d87668
1 Parent(s): eea4def

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -0
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from huggingface_hub.hf_api import create_repo, upload_file
4
+ from huggingface_hub.repository import Repository
5
+ import subprocess
6
+ import os
7
+ import tempfile
8
+ import sweetviz as sv
9
+
10
+ def analyze_datasets(dataset, dataset_name, username, token, column=None, pairwise="off"):
11
+ df = pd.read_csv(dataset.name)
12
+ if column is not None:
13
+ analyze_report = sv.analyze(df, target_feat=column, pairwise_analysis=pairwise)
14
+ else:
15
+ analyze_report = sv.analyze(df, pairwise_analysis=pairwise)
16
+ analyze_report.show_html('index.html', open_browser=False)
17
+ repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)
18
+
19
+ upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
20
+ readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
21
+ with open("README.md", "w+") as f:
22
+ f.write(readme)
23
+ upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
24
+
25
+ return f"Your dataset report will be ready at {repo_url}"
26
+
27
+ def compare_column_values(dataset, dataset_name, username, token, column, category):
28
+
29
+ df = pd.read_csv(dataset.name)
30
+ arr = df[column].unique()
31
+ arr = list(arr[arr != column])
32
+ compare_report = sv.compare_intra(df, df[column] == category, arr[0])
33
+ compare_report.show_html('index.html', open_browser=False)
34
+
35
+ repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)
36
+
37
+ upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
38
+ readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
39
+ with open("README.md", "w+") as f:
40
+ f.write(readme)
41
+ upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
42
+
43
+ return f"Your dataset report will be ready at {repo_url}"
44
+
45
+ def compare_dataset_splits(dataset, dataset_name, username, token, splits):
46
+ df = pd.read_csv(dataset.name)
47
+ train = df.sample(frac=splits)
48
+ test = df.loc[df.index.difference(train.index)]
49
+
50
+ compare_report = sv.compare([train, "Training Data"], [test, "Test Data"])
51
+ compare_report.show_html('index.html', open_browser=False)
52
+
53
+ repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)
54
+
55
+ upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
56
+ readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
57
+ with open("README.md", "w+") as f:
58
+ f.write(readme)
59
+ upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
60
+
61
+ return f"Your dataset report will be ready at {repo_url}"
62
+
63
+
64
+
65
+ with gr.Blocks() as demo:
66
+ main_title = gr.Markdown("""# Easy Analysis🪄🌟✨""")
67
+ main_desc = gr.Markdown("""This app enables you to run three type of dataset analysis and pushes the interactive reports to your Hugging Face Hub profile as a Space. It uses SweetViz in the back.""")
68
+ with gr.Tabs():
69
+ with gr.TabItem("Analyze") as analyze:
70
+ with gr.Row():
71
+ with gr.Column():
72
+ title = gr.Markdown(""" ## Analyze Dataset """)
73
+ description = gr.Markdown("Analyze a dataset or predictive variables against a target variable in a dataset (enter a column name to column section if you want to compare against target value). You can also do pairwise analysis, but it has quadratic complexity.")
74
+ dataset = gr.File(label = "Dataset")
75
+ column = gr.Text(label = "Compare dataset against a target variable (Optional)")
76
+ pairwise = gr.Radio(["off", "on"], label = "Enable pairwise analysis")
77
+ token = gr.Textbox(label = "Your Hugging Face Token")
78
+ username = gr.Textbox(label = "Your Hugging Face User Name")
79
+ dataset_name = gr.Textbox(label = "Dataset Name")
80
+ pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.")
81
+ inference_run = gr.Button("Infer")
82
+ inference_progress = gr.StatusTracker(cover_container=True)
83
+ outcome = gr.outputs.Textbox()
84
+ inference_run.click(
85
+ analyze_datasets,
86
+ inputs=[dataset, dataset_name, username, token, column, pairwise],
87
+ outputs=outcome,
88
+ status_tracker=inference_progress,
89
+ )
90
+ with gr.TabItem("Compare Splits") as compare_splits:
91
+ with gr.Row():
92
+ with gr.Column():
93
+ title = gr.Markdown(""" ## Compare Splits""")
94
+ description = gr.Markdown("Split a dataset and compare splits. You need to give a fraction, e.g. 0.8.")
95
+ dataset = gr.File(label = "Dataset")
96
+ split_ratio = gr.Number(label = "Split Ratios")
97
+ pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.")
98
+ token = gr.Textbox(label = "Your Hugging Face Token")
99
+ username = gr.Textbox(label = "Your Hugging Face User Name")
100
+ dataset_name = gr.Textbox(label = "Dataset Name")
101
+ inference_run = gr.Button("Infer")
102
+ inference_progress = gr.StatusTracker(cover_container=True)
103
+
104
+ outcome = gr.outputs.Textbox()
105
+ inference_run.click(
106
+ compare_dataset_splits,
107
+ inputs=[dataset, dataset_name, username, token, split_ratio],
108
+ outputs=outcome,
109
+ status_tracker=inference_progress,
110
+ )
111
+
112
+ with gr.TabItem("Compare Subsets") as compare_subsets:
113
+ with gr.Row():
114
+ with gr.Column():
115
+ title = gr.Markdown(""" ## Compare Subsets""")
116
+ description = gr.Markdown("Compare subsets of a dataset, e.g. you can pick Age Group column and compare adult category against young.")
117
+ dataset = gr.File(label = "Dataset")
118
+ column = gr.Text(label = "Enter column:")
119
+ category = gr.Text(label = "Enter category:")
120
+ pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.")
121
+ token = gr.Textbox(label = "Your Hugging Face Token")
122
+ username = gr.Textbox(label = "Your Hugging Face User Name")
123
+ dataset_name = gr.Textbox(label = "Dataset Name")
124
+ inference_run = gr.Button("Run Analysis")
125
+ inference_progress = gr.StatusTracker(cover_container=True)
126
+
127
+ outcome = gr.outputs.Textbox()
128
+ inference_run.click(
129
+ compare_column_values,
130
+ inputs=[dataset, dataset_name, username, token, column, category ],
131
+ outputs=outcome,
132
+ status_tracker=inference_progress,
133
+ )
134
+
135
+ demo.launch(debug=True)