Bhupen commited on
Commit
a8e51d7
Β·
1 Parent(s): fb3bdaf

Add ML intuitions py file

Browse files
Files changed (1) hide show
  1. app.py +44 -1
app.py CHANGED
@@ -9,6 +9,10 @@ import matplotlib.pyplot as plt
9
  import seaborn as sns
10
  import pandas as pd
11
  import time
 
 
 
 
12
 
13
  def load_and_train_model():
14
  data = load_breast_cancer()
@@ -25,6 +29,19 @@ def load_and_train_model():
25
 
26
  return model, X, y, acc, cm, data
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def main():
29
  st.set_page_config(page_title="Breast Cancer Classifier", layout="wide")
30
 
@@ -33,7 +50,7 @@ def main():
33
  X = pd.DataFrame(data.data, columns=data.feature_names)
34
  y = pd.Series(data.target)
35
 
36
- st.subheader("Classifying breast cancer - with Logistic Regression")
37
 
38
  #st.write(f"**Problem Statement**")
39
  with st.expander("πŸ“Œ What we're trying to accomplish..."):
@@ -146,6 +163,32 @@ def main():
146
 
147
  These examples underline the importance of **feature engineering** and **domain knowledge** in building effective ML systems.
148
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  # feature discriminatio
151
  # Load dataset
 
9
  import seaborn as sns
10
  import pandas as pd
11
  import time
12
+ from sklearn.datasets import make_classification, make_circles, make_moons
13
+ from sklearn.pipeline import make_pipeline
14
+ from sklearn.preprocessing import StandardScaler
15
+ from sklearn.svm import SVC
16
 
17
  def load_and_train_model():
18
  data = load_breast_cancer()
 
29
 
30
  return model, X, y, acc, cm, data
31
 
32
+ # Helper function to plot decision boundary
33
+ def plot_decision_boundary(clf, X, y, ax, title):
34
+ h = .02
35
+ x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
36
+ y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
37
+ xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
38
+ np.arange(y_min, y_max, h))
39
+ Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
40
+ Z = Z.reshape(xx.shape)
41
+ ax.contourf(xx, yy, Z, alpha=0.3)
42
+ ax.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', s=40)
43
+ ax.set_title(title)
44
+
45
  def main():
46
  st.set_page_config(page_title="Breast Cancer Classifier", layout="wide")
47
 
 
50
  X = pd.DataFrame(data.data, columns=data.feature_names)
51
  y = pd.Series(data.target)
52
 
53
+ st.subheader("Breast Cancer Classification with Logistic Regression")
54
 
55
  #st.write(f"**Problem Statement**")
56
  with st.expander("πŸ“Œ What we're trying to accomplish..."):
 
163
 
164
  These examples underline the importance of **feature engineering** and **domain knowledge** in building effective ML systems.
165
  """)
166
+
167
+ # Create 3 plots for different domains
168
+ fig, axes = plt.subplots(1, 3, figsize=(20, 6))
169
+
170
+ # 1. Healthcare - Linearly Separable
171
+ X1, y1 = make_classification(n_samples=200, n_features=2, n_redundant=0,
172
+ n_informative=2, n_clusters_per_class=1, class_sep=2.0, random_state=1)
173
+ clf1 = make_pipeline(StandardScaler(), SVC(kernel='linear', C=1.0))
174
+ clf1.fit(X1, y1)
175
+ plot_decision_boundary(clf1, X1, y1, axes[0], "Healthcare (Linear SVM)")
176
+
177
+ # 2. Finance - Concentric Circles
178
+ X2, y2 = make_circles(n_samples=200, factor=0.5, noise=0.05, random_state=2)
179
+ clf2 = make_pipeline(StandardScaler(), SVC(kernel='rbf', C=1.0, gamma='auto'))
180
+ clf2.fit(X2, y2)
181
+ plot_decision_boundary(clf2, X2, y2, axes[1], "Finance (RBF SVM - Circles)")
182
+
183
+ # 3. Retail - Semi Circles
184
+ X3, y3 = make_moons(n_samples=200, noise=0.1, random_state=3)
185
+ clf3 = make_pipeline(StandardScaler(), SVC(kernel='rbf', C=1.0, gamma='auto'))
186
+ clf3.fit(X3, y3)
187
+ plot_decision_boundary(clf3, X3, y3, axes[2], "Retail (RBF SVM - Moons)")
188
+
189
+ # Show plot in Streamlit
190
+ st.pyplot(fig)
191
+
192
 
193
  # feature discriminatio
194
  # Load dataset