Anupam251272 commited on
Commit
45b0597
·
verified ·
1 Parent(s): c9912aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -22
app.py CHANGED
@@ -36,22 +36,28 @@ class DataQualitySystem:
36
  conn.close()
37
 
38
  def load_and_process_data(self, file):
39
- try:
40
- file_path = file.name # This should work for both CSV and XLSX files
41
- if file_path.endswith('.csv'):
42
- df = pd.read_csv(file_path)
43
- elif file_path.endswith('.xlsx'):
44
- df = pd.read_excel(file_path)
45
- else:
46
- return None, "Unsupported file format. Please use CSV or XLSX."
47
- # Continue with the rest of your method
48
- metrics = self.initial_data_checks(df)
49
- df_with_anomalies = self.detect_anomalies(df)
50
- self.store_quality_metrics(metrics)
51
- return df_with_anomalies, "Data processed successfully!"
52
- except Exception as e:
53
- logging.error(f"Error processing file: {str(e)}")
54
- return None, f"Error processing file: {str(e)}"
 
 
 
 
 
 
55
 
56
  def initial_data_checks(self, df):
57
  metrics = {
@@ -115,21 +121,21 @@ class DataQualitySystem:
115
  summary_fig = go.Figure(data=[
116
  go.Table(
117
  header=dict(values=['Statistic'] + list(summary_stats.columns)),
118
- cells=dict(values=[summary_stats.index] + [summary_stats[col] for col in summary_stats.columns])
119
  )
120
  ])
121
 
122
  # Create anomaly distribution plot
123
  if 'anomaly_score' in df.columns:
124
  anomaly_fig = px.histogram(df, x='anomaly_score',
125
- title='Distribution of Anomaly Scores')
126
  else:
127
  anomaly_fig = None
128
 
129
  # Create missing values plot
130
  missing_data = df.isnull().sum()
131
  missing_fig = px.bar(x=missing_data.index, y=missing_data.values,
132
- title='Missing Values by Column')
133
 
134
  return summary_fig, anomaly_fig, missing_fig
135
 
@@ -148,8 +154,7 @@ def create_gradio_interface():
148
 
149
  # Create the interface
150
  with gr.Blocks() as app:
151
- gr.Markdown("# Data Quality Assurance System"
152
- A.Joshi 91-8847374924)
153
 
154
  with gr.Row():
155
  file_input = gr.File(label="Upload Data File (CSV or XLSX)")
@@ -190,4 +195,4 @@ def create_gradio_interface():
190
  # Launch the interface
191
  if __name__ == "__main__":
192
  app = create_gradio_interface()
193
- app.launch()
 
36
  conn.close()
37
 
38
  def load_and_process_data(self, file):
39
+ try:
40
+ file_path = file.name # This should work for both CSV and XLSX files
41
+ if file_path.endswith('.csv'):
42
+ df = pd.read_csv(file_path)
43
+ elif file_path.endswith('.xlsx'):
44
+ df = pd.read_excel(file_path)
45
+ else:
46
+ return None, "Unsupported file format. Please use CSV or XLSX."
47
+
48
+ # Initial data checks
49
+ metrics = self.initial_data_checks(df)
50
+
51
+ # Anomaly detection
52
+ df_with_anomalies = self.detect_anomalies(df)
53
+
54
+ # Store quality metrics
55
+ self.store_quality_metrics(metrics)
56
+
57
+ return df_with_anomalies, "Data processed successfully!"
58
+ except Exception as e:
59
+ logging.error(f"Error processing file: {str(e)}")
60
+ return None, f"Error processing file: {str(e)}"
61
 
62
  def initial_data_checks(self, df):
63
  metrics = {
 
121
  summary_fig = go.Figure(data=[
122
  go.Table(
123
  header=dict(values=['Statistic'] + list(summary_stats.columns)),
124
+ cells=dict(values=[summary_stats.index] + [summary_stats[col].tolist() for col in summary_stats.columns])
125
  )
126
  ])
127
 
128
  # Create anomaly distribution plot
129
  if 'anomaly_score' in df.columns:
130
  anomaly_fig = px.histogram(df, x='anomaly_score',
131
+ title='Distribution of Anomaly Scores')
132
  else:
133
  anomaly_fig = None
134
 
135
  # Create missing values plot
136
  missing_data = df.isnull().sum()
137
  missing_fig = px.bar(x=missing_data.index, y=missing_data.values,
138
+ title='Missing Values by Column')
139
 
140
  return summary_fig, anomaly_fig, missing_fig
141
 
 
154
 
155
  # Create the interface
156
  with gr.Blocks() as app:
157
+ gr.Markdown("# Data Quality Assurance System")
 
158
 
159
  with gr.Row():
160
  file_input = gr.File(label="Upload Data File (CSV or XLSX)")
 
195
  # Launch the interface
196
  if __name__ == "__main__":
197
  app = create_gradio_interface()
198
+ app.launch()