Spaces:
Sleeping
Sleeping
Bhupen
commited on
Commit
Β·
a8e51d7
1
Parent(s):
fb3bdaf
Add ML intuitions py file
Browse files
app.py
CHANGED
@@ -9,6 +9,10 @@ import matplotlib.pyplot as plt
|
|
9 |
import seaborn as sns
|
10 |
import pandas as pd
|
11 |
import time
|
|
|
|
|
|
|
|
|
12 |
|
13 |
def load_and_train_model():
|
14 |
data = load_breast_cancer()
|
@@ -25,6 +29,19 @@ def load_and_train_model():
|
|
25 |
|
26 |
return model, X, y, acc, cm, data
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
def main():
|
29 |
st.set_page_config(page_title="Breast Cancer Classifier", layout="wide")
|
30 |
|
@@ -33,7 +50,7 @@ def main():
|
|
33 |
X = pd.DataFrame(data.data, columns=data.feature_names)
|
34 |
y = pd.Series(data.target)
|
35 |
|
36 |
-
st.subheader("
|
37 |
|
38 |
#st.write(f"**Problem Statement**")
|
39 |
with st.expander("π What we're trying to accomplish..."):
|
@@ -146,6 +163,32 @@ def main():
|
|
146 |
|
147 |
These examples underline the importance of **feature engineering** and **domain knowledge** in building effective ML systems.
|
148 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
# feature discriminatio
|
151 |
# Load dataset
|
|
|
9 |
import seaborn as sns
|
10 |
import pandas as pd
|
11 |
import time
|
12 |
+
from sklearn.datasets import make_classification, make_circles, make_moons
|
13 |
+
from sklearn.pipeline import make_pipeline
|
14 |
+
from sklearn.preprocessing import StandardScaler
|
15 |
+
from sklearn.svm import SVC
|
16 |
|
17 |
def load_and_train_model():
|
18 |
data = load_breast_cancer()
|
|
|
29 |
|
30 |
return model, X, y, acc, cm, data
|
31 |
|
32 |
+
# Helper function to plot decision boundary
|
33 |
+
def plot_decision_boundary(clf, X, y, ax, title):
|
34 |
+
h = .02
|
35 |
+
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
|
36 |
+
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
|
37 |
+
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
|
38 |
+
np.arange(y_min, y_max, h))
|
39 |
+
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
|
40 |
+
Z = Z.reshape(xx.shape)
|
41 |
+
ax.contourf(xx, yy, Z, alpha=0.3)
|
42 |
+
ax.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', s=40)
|
43 |
+
ax.set_title(title)
|
44 |
+
|
45 |
def main():
|
46 |
st.set_page_config(page_title="Breast Cancer Classifier", layout="wide")
|
47 |
|
|
|
50 |
X = pd.DataFrame(data.data, columns=data.feature_names)
|
51 |
y = pd.Series(data.target)
|
52 |
|
53 |
+
st.subheader("Breast Cancer Classification with Logistic Regression")
|
54 |
|
55 |
#st.write(f"**Problem Statement**")
|
56 |
with st.expander("π What we're trying to accomplish..."):
|
|
|
163 |
|
164 |
These examples underline the importance of **feature engineering** and **domain knowledge** in building effective ML systems.
|
165 |
""")
|
166 |
+
|
167 |
+
# Create 3 plots for different domains
|
168 |
+
fig, axes = plt.subplots(1, 3, figsize=(20, 6))
|
169 |
+
|
170 |
+
# 1. Healthcare - Linearly Separable
|
171 |
+
X1, y1 = make_classification(n_samples=200, n_features=2, n_redundant=0,
|
172 |
+
n_informative=2, n_clusters_per_class=1, class_sep=2.0, random_state=1)
|
173 |
+
clf1 = make_pipeline(StandardScaler(), SVC(kernel='linear', C=1.0))
|
174 |
+
clf1.fit(X1, y1)
|
175 |
+
plot_decision_boundary(clf1, X1, y1, axes[0], "Healthcare (Linear SVM)")
|
176 |
+
|
177 |
+
# 2. Finance - Concentric Circles
|
178 |
+
X2, y2 = make_circles(n_samples=200, factor=0.5, noise=0.05, random_state=2)
|
179 |
+
clf2 = make_pipeline(StandardScaler(), SVC(kernel='rbf', C=1.0, gamma='auto'))
|
180 |
+
clf2.fit(X2, y2)
|
181 |
+
plot_decision_boundary(clf2, X2, y2, axes[1], "Finance (RBF SVM - Circles)")
|
182 |
+
|
183 |
+
# 3. Retail - Semi Circles
|
184 |
+
X3, y3 = make_moons(n_samples=200, noise=0.1, random_state=3)
|
185 |
+
clf3 = make_pipeline(StandardScaler(), SVC(kernel='rbf', C=1.0, gamma='auto'))
|
186 |
+
clf3.fit(X3, y3)
|
187 |
+
plot_decision_boundary(clf3, X3, y3, axes[2], "Retail (RBF SVM - Moons)")
|
188 |
+
|
189 |
+
# Show plot in Streamlit
|
190 |
+
st.pyplot(fig)
|
191 |
+
|
192 |
|
193 |
# feature discriminatio
|
194 |
# Load dataset
|