Update app.py
Browse files
app.py
CHANGED
@@ -36,22 +36,28 @@ class DataQualitySystem:
|
|
36 |
conn.close()
|
37 |
|
38 |
def load_and_process_data(self, file):
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
def initial_data_checks(self, df):
|
57 |
metrics = {
|
@@ -115,21 +121,21 @@ class DataQualitySystem:
|
|
115 |
summary_fig = go.Figure(data=[
|
116 |
go.Table(
|
117 |
header=dict(values=['Statistic'] + list(summary_stats.columns)),
|
118 |
-
cells=dict(values=[summary_stats.index] + [summary_stats[col] for col in summary_stats.columns])
|
119 |
)
|
120 |
])
|
121 |
|
122 |
# Create anomaly distribution plot
|
123 |
if 'anomaly_score' in df.columns:
|
124 |
anomaly_fig = px.histogram(df, x='anomaly_score',
|
125 |
-
|
126 |
else:
|
127 |
anomaly_fig = None
|
128 |
|
129 |
# Create missing values plot
|
130 |
missing_data = df.isnull().sum()
|
131 |
missing_fig = px.bar(x=missing_data.index, y=missing_data.values,
|
132 |
-
|
133 |
|
134 |
return summary_fig, anomaly_fig, missing_fig
|
135 |
|
@@ -148,8 +154,7 @@ def create_gradio_interface():
|
|
148 |
|
149 |
# Create the interface
|
150 |
with gr.Blocks() as app:
|
151 |
-
gr.Markdown("# Data Quality Assurance System"
|
152 |
-
A.Joshi 91-8847374924)
|
153 |
|
154 |
with gr.Row():
|
155 |
file_input = gr.File(label="Upload Data File (CSV or XLSX)")
|
@@ -190,4 +195,4 @@ def create_gradio_interface():
|
|
190 |
# Launch the interface
|
191 |
if __name__ == "__main__":
|
192 |
app = create_gradio_interface()
|
193 |
-
app.launch()
|
|
|
36 |
conn.close()
|
37 |
|
38 |
def load_and_process_data(self, file):
|
39 |
+
try:
|
40 |
+
file_path = file.name # This should work for both CSV and XLSX files
|
41 |
+
if file_path.endswith('.csv'):
|
42 |
+
df = pd.read_csv(file_path)
|
43 |
+
elif file_path.endswith('.xlsx'):
|
44 |
+
df = pd.read_excel(file_path)
|
45 |
+
else:
|
46 |
+
return None, "Unsupported file format. Please use CSV or XLSX."
|
47 |
+
|
48 |
+
# Initial data checks
|
49 |
+
metrics = self.initial_data_checks(df)
|
50 |
+
|
51 |
+
# Anomaly detection
|
52 |
+
df_with_anomalies = self.detect_anomalies(df)
|
53 |
+
|
54 |
+
# Store quality metrics
|
55 |
+
self.store_quality_metrics(metrics)
|
56 |
+
|
57 |
+
return df_with_anomalies, "Data processed successfully!"
|
58 |
+
except Exception as e:
|
59 |
+
logging.error(f"Error processing file: {str(e)}")
|
60 |
+
return None, f"Error processing file: {str(e)}"
|
61 |
|
62 |
def initial_data_checks(self, df):
|
63 |
metrics = {
|
|
|
121 |
summary_fig = go.Figure(data=[
|
122 |
go.Table(
|
123 |
header=dict(values=['Statistic'] + list(summary_stats.columns)),
|
124 |
+
cells=dict(values=[summary_stats.index] + [summary_stats[col].tolist() for col in summary_stats.columns])
|
125 |
)
|
126 |
])
|
127 |
|
128 |
# Create anomaly distribution plot
|
129 |
if 'anomaly_score' in df.columns:
|
130 |
anomaly_fig = px.histogram(df, x='anomaly_score',
|
131 |
+
title='Distribution of Anomaly Scores')
|
132 |
else:
|
133 |
anomaly_fig = None
|
134 |
|
135 |
# Create missing values plot
|
136 |
missing_data = df.isnull().sum()
|
137 |
missing_fig = px.bar(x=missing_data.index, y=missing_data.values,
|
138 |
+
title='Missing Values by Column')
|
139 |
|
140 |
return summary_fig, anomaly_fig, missing_fig
|
141 |
|
|
|
154 |
|
155 |
# Create the interface
|
156 |
with gr.Blocks() as app:
|
157 |
+
gr.Markdown("# Data Quality Assurance System")
|
|
|
158 |
|
159 |
with gr.Row():
|
160 |
file_input = gr.File(label="Upload Data File (CSV or XLSX)")
|
|
|
195 |
# Launch the interface
|
196 |
if __name__ == "__main__":
|
197 |
app = create_gradio_interface()
|
198 |
+
app.launch()
|