Madiharehan commited on
Commit
e290096
·
verified ·
1 Parent(s): b46854e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.linear_model import LogisticRegression, LinearRegression
7
+ from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, r2_score
8
+ import seaborn as sns
9
+ import matplotlib.pyplot as plt
10
+ import os
11
+
12
+ # Main EDA + ML Function
13
+ def auto_eda(file):
14
+ df = pd.read_csv(file.name)
15
+ original_df = df.copy()
16
+ insights = []
17
+
18
+ # Basic Info
19
+ insights.append(f"Dataset Shape: {df.shape}")
20
+ insights.append("\nData Types:\n" + str(df.dtypes))
21
+ insights.append("\nFirst 5 Rows:\n" + str(df.head()))
22
+
23
+ # Handle missing values
24
+ for col in df.columns:
25
+ if df[col].isnull().sum() > 0:
26
+ if df[col].dtype == 'object':
27
+ df[col].fillna(df[col].mode()[0], inplace=True)
28
+ else:
29
+ df[col].fillna(df[col].median(), inplace=True)
30
+ insights.append("\nMissing values handled.")
31
+
32
+ # Encode categorical values
33
+ label_encoders = {}
34
+ for col in df.select_dtypes(include='object').columns:
35
+ le = LabelEncoder()
36
+ df[col] = le.fit_transform(df[col])
37
+ label_encoders[col] = le
38
+ insights.append("\nCategorical columns encoded with Label Encoding.")
39
+
40
+ # Standardization
41
+ scaler = StandardScaler()
42
+ X = df.drop(columns=df.columns[-1])
43
+ y = df[df.columns[-1]]
44
+
45
+ X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
46
+ insights.append("\nFeature scaling done using StandardScaler.")
47
+
48
+ # Split data
49
+ X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
50
+ insights.append("\nData split into training and testing (80/20).")
51
+
52
+ # Target detection
53
+ is_classification = y.nunique() <= 20 or y.dtype == 'object'
54
+
55
+ result = ""
56
+ if is_classification:
57
+ model = LogisticRegression(max_iter=1000)
58
+ model.fit(X_train, y_train)
59
+ y_pred = model.predict(X_test)
60
+ acc = accuracy_score(y_test, y_pred)
61
+ cm = confusion_matrix(y_test, y_pred)
62
+ cr = classification_report(y_test, y_pred)
63
+ insights.append(f"\nModel: Logistic Regression\nAccuracy: {acc:.2f}\n")
64
+ result += f"\nClassification Report:\n{cr}\n"
65
+ else:
66
+ model = LinearRegression()
67
+ model.fit(X_train, y_train)
68
+ y_pred = model.predict(X_test)
69
+ r2 = r2_score(y_test, y_pred)
70
+ insights.append(f"\nModel: Linear Regression\nR² Score: {r2:.2f}\n")
71
+ result += f"\nPredictions Sample:\n{y_pred[:5]}\n"
72
+
73
+ # Save cleaned CSV
74
+ cleaned_path = "cleaned_data.csv"
75
+ df.to_csv(cleaned_path, index=False)
76
+ insights.append(f"\nCleaned dataset saved as {cleaned_path}")
77
+
78
+ return "\n".join(insights), result, cleaned_path
79
+
80
+ # Gradio App
81
+ iface = gr.Interface(
82
+ fn=auto_eda,
83
+ inputs=gr.File(label="Upload CSV File"),
84
+ outputs=[
85
+ gr.Textbox(label="EDA & Preprocessing Insights", lines=20),
86
+ gr.Textbox(label="Model Result", lines=10),
87
+ gr.File(label="Download Cleaned CSV")
88
+ ],
89
+ title="AutoEDA Agent - One Click Smart Insights",
90
+ description="Upload a dataset and let the agent auto-handle EDA, preprocessing, and modeling."
91
+ )
92
+
93
+ iface.launch()