Update pages/4Logistic_Regression.py
Browse files- pages/4Logistic_Regression.py +104 -111
pages/4Logistic_Regression.py
CHANGED
@@ -1,201 +1,194 @@
|
|
1 |
import streamlit as st
|
2 |
|
3 |
-
# Configure the Streamlit page
|
4 |
st.set_page_config(page_title="Logistic Regression", page_icon="🤖", layout="wide")
|
5 |
|
6 |
-
#
|
7 |
st.markdown("""
|
8 |
<style>
|
9 |
.stApp {
|
10 |
-
background-color: #
|
11 |
}
|
12 |
h1, h2, h3 {
|
13 |
-
color: #
|
14 |
}
|
15 |
.custom-font, p {
|
16 |
font-family: 'Arial', sans-serif;
|
17 |
font-size: 18px;
|
18 |
-
color:
|
19 |
line-height: 1.6;
|
20 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
</style>
|
22 |
""", unsafe_allow_html=True)
|
23 |
|
24 |
-
#
|
25 |
-
st.markdown("<h1
|
26 |
|
27 |
-
# Introduction
|
28 |
st.write("""
|
29 |
-
Logistic Regression is a supervised machine learning
|
|
|
30 |
""")
|
31 |
|
32 |
-
|
33 |
-
st.markdown("<h2 style='color:#003366;'>1. Logistic Regression with Step Function</h2>", unsafe_allow_html=True)
|
34 |
st.write("""
|
35 |
-
The step function
|
36 |
-
- Not differentiable,
|
37 |
-
- Cannot
|
38 |
-
-
|
39 |
|
40 |
-
To
|
41 |
""")
|
42 |
|
43 |
-
|
44 |
-
st.
|
45 |
-
st.write(r'''
|
46 |
The sigmoid function is defined as:
|
47 |
|
48 |
\[
|
49 |
-
sigmoid(z) = \frac{1}{1 + e^{-z}}
|
50 |
\]
|
51 |
|
52 |
where \( z = WX + b \).
|
53 |
|
54 |
-
**Advantages
|
55 |
-
- Smooth and
|
56 |
-
- Outputs
|
57 |
-
-
|
58 |
-
|
59 |
|
60 |
-
|
61 |
-
st.
|
62 |
-
|
63 |
-
Logistic Regression uses the cross-entropy (log) loss:
|
64 |
|
65 |
\[
|
66 |
-
L = -\sum_{i=1}^{N} [y_i \log P(y_i) + (1 - y_i) \log(1 - P(y_i))]
|
67 |
\]
|
|
|
68 |
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
# Gradient Descent
|
73 |
-
st.markdown("<h2 style='color:#003366;'>2. Gradient Descent</h2>", unsafe_allow_html=True)
|
74 |
-
st.write(r'''
|
75 |
-
Gradient Descent is used to optimize the weights by minimizing the loss:
|
76 |
|
77 |
\[
|
78 |
W = W - \alpha \frac{\partial L}{\partial W}
|
79 |
\]
|
80 |
|
81 |
-
|
82 |
-
|
83 |
|
84 |
-
|
85 |
-
st.markdown("<h2 style='color:#003366;'>3. Learning Rate in Gradient Descent</h2>", unsafe_allow_html=True)
|
86 |
st.write("""
|
87 |
-
-
|
88 |
-
-
|
89 |
-
-
|
90 |
-
-
|
91 |
""")
|
92 |
|
93 |
-
|
94 |
-
st.markdown("<h2 style='color:#003366;'>4. Types of Gradient Descent</h2>", unsafe_allow_html=True)
|
95 |
st.write("""
|
96 |
-
- **Batch
|
97 |
-
- **Stochastic
|
98 |
-
- **Mini-batch
|
99 |
""")
|
100 |
|
101 |
-
|
102 |
-
st.markdown("<h2 style='color:#003366;'>Multiclass Logistic Regression</h2>", unsafe_allow_html=True)
|
103 |
st.subheader("1. Softmax Regression")
|
104 |
-
st.write(r
|
105 |
-
Softmax
|
106 |
|
107 |
\[
|
108 |
P(y = j | X) = \frac{e^{Z_j}}{\sum_{k=1}^{K} e^{Z_k}}
|
109 |
\]
|
110 |
|
111 |
-
|
112 |
1. Compute scores: \( Z = WX + b \)
|
113 |
-
2. Apply softmax
|
114 |
3. Use cross-entropy loss
|
115 |
-
4.
|
116 |
-
5. Choose the class with the highest probability
|
117 |
-
''')
|
118 |
|
119 |
-
|
120 |
-
|
121 |
-
If the model predicts probabilities for classes 0 to 9 as follows:
|
122 |
|
123 |
| Class | Probability |
|
124 |
|-------|-------------|
|
125 |
| 0 | 0.02 |
|
126 |
| 1 | 0.05 |
|
127 |
-
| 2 | 0.07 |
|
128 |
-
| 3 | 0.10 |
|
129 |
-
| 4 | 0.08 |
|
130 |
-
| 5 | 0.12 |
|
131 |
-
| 6 | 0.10 |
|
132 |
| 7 | 0.30 |
|
133 |
-
| 8 | 0.05 |
|
134 |
| 9 | 0.11 |
|
135 |
|
136 |
-
|
137 |
""")
|
138 |
|
139 |
-
|
140 |
-
st.markdown("<h2 style='color:#003366;'>2. One-vs-Rest (OvR) Classification</h2>", unsafe_allow_html=True)
|
141 |
st.write("""
|
142 |
-
OvR
|
143 |
-
|
144 |
-
|
145 |
-
1. Train one
|
146 |
-
2. Each
|
147 |
-
3.
|
148 |
-
|
149 |
-
**Example
|
150 |
-
|
151 |
-
-
|
|
|
|
|
152 |
""")
|
153 |
|
154 |
-
|
155 |
-
st.
|
156 |
-
|
157 |
-
Regularization helps reduce overfitting by penalizing large weights:
|
158 |
|
159 |
-
- **L1 (Lasso)**: \( \lambda \sum |w| \)
|
160 |
-
- **L2 (Ridge)**: \( \lambda \sum w^2 \)
|
161 |
-
- **ElasticNet**:
|
162 |
|
163 |
-
|
|
|
|
|
164 |
""")
|
165 |
|
166 |
-
|
167 |
-
st.
|
168 |
-
|
169 |
-
High correlation among features can affect performance.
|
170 |
-
|
171 |
-
### Variance Inflation Factor (VIF):
|
172 |
|
173 |
\[
|
174 |
VIF_i = \frac{1}{1 - R^2_i}
|
175 |
\]
|
176 |
|
177 |
-
- VIF > 10
|
178 |
-
|
179 |
-
''')
|
180 |
|
181 |
-
|
182 |
-
st.markdown("<h2 style='color:#003366;'>Hyperparameters in Logistic Regression</h2>", unsafe_allow_html=True)
|
183 |
st.table([
|
|
|
184 |
["penalty", "Regularization type ('l1', 'l2', 'elasticnet', None)"],
|
185 |
["dual", "Use dual formulation (for 'l2' with 'liblinear')"],
|
186 |
-
["tol", "
|
187 |
["C", "Inverse of regularization strength"],
|
188 |
["fit_intercept", "Add intercept term or not"],
|
189 |
-
["intercept_scaling", "
|
190 |
-
["class_weight", "
|
191 |
-
["random_state", "
|
192 |
-
["solver", "Optimization algorithm ('lbfgs',
|
193 |
-
["max_iter", "Max iterations"],
|
194 |
-
["multi_class", "'ovr' or 'multinomial'"],
|
195 |
-
["verbose", "
|
196 |
["warm_start", "Reuse previous solution"],
|
197 |
-
["n_jobs", "
|
198 |
-
["l1_ratio", "
|
199 |
])
|
200 |
|
201 |
-
st.write("This
|
|
|
1 |
import streamlit as st
|
2 |
|
|
|
3 |
st.set_page_config(page_title="Logistic Regression", page_icon="🤖", layout="wide")
|
4 |
|
5 |
+
# Updated CSS styling
|
6 |
st.markdown("""
|
7 |
<style>
|
8 |
.stApp {
|
9 |
+
background-color: #f2f6fa;
|
10 |
}
|
11 |
h1, h2, h3 {
|
12 |
+
color: #1a237e;
|
13 |
}
|
14 |
.custom-font, p {
|
15 |
font-family: 'Arial', sans-serif;
|
16 |
font-size: 18px;
|
17 |
+
color: #212121;
|
18 |
line-height: 1.6;
|
19 |
}
|
20 |
+
code {
|
21 |
+
background-color: #e3eaf5;
|
22 |
+
color: #1a237e;
|
23 |
+
padding: 4px 6px;
|
24 |
+
border-radius: 6px;
|
25 |
+
font-size: 16px;
|
26 |
+
}
|
27 |
+
pre {
|
28 |
+
background-color: #e3eaf5 !important;
|
29 |
+
color: #1a237e;
|
30 |
+
padding: 10px;
|
31 |
+
border-radius: 10px;
|
32 |
+
overflow-x: auto;
|
33 |
+
}
|
34 |
</style>
|
35 |
""", unsafe_allow_html=True)
|
36 |
|
37 |
+
# App Content
|
38 |
+
st.markdown("<h1>Logistic Regression</h1>", unsafe_allow_html=True)
|
39 |
|
|
|
40 |
st.write("""
|
41 |
+
Logistic Regression is a supervised machine learning algorithm used for classification only. It is mainly used for binary classification, but with some extensions, it can handle multi-class labels as well.
|
42 |
+
The main task of logistic regression is to find the best line, plane, or hyperplane that separates the classes linearly. It assumes the data should be (almost) linearly separable for good performance.
|
43 |
""")
|
44 |
|
45 |
+
st.markdown("<h2>1. Logistic Regression with Step Function</h2>", unsafe_allow_html=True)
|
|
|
46 |
st.write("""
|
47 |
+
The step function assigns an output of either 0 or 1 based on a threshold value. However, it has the following disadvantages:
|
48 |
+
- Not differentiable, which makes optimization hard.
|
49 |
+
- Cannot measure the probability of class membership.
|
50 |
+
- Small input changes don’t change the output smoothly.
|
51 |
|
52 |
+
To solve this, we use the Sigmoid function.
|
53 |
""")
|
54 |
|
55 |
+
st.markdown("<h2>2. Logistic Regression with Sigmoid Function</h2>", unsafe_allow_html=True)
|
56 |
+
st.write(r"""
|
|
|
57 |
The sigmoid function is defined as:
|
58 |
|
59 |
\[
|
60 |
+
sigmoid(z) = \frac{1}{1 + e^{-z}}
|
61 |
\]
|
62 |
|
63 |
where \( z = WX + b \).
|
64 |
|
65 |
+
**Advantages**:
|
66 |
+
- Smooth and differentiable
|
67 |
+
- Outputs probability between 0 and 1
|
68 |
+
- Great for binary classification
|
69 |
+
""")
|
70 |
|
71 |
+
st.markdown("<h2>Loss used in Logistic Regression</h2>", unsafe_allow_html=True)
|
72 |
+
st.write(r"""
|
73 |
+
Logistic Regression uses **cross-entropy loss**:
|
|
|
74 |
|
75 |
\[
|
76 |
+
L = -\sum_{i=1}^{N} \left[y_i \log P(y_i) + (1 - y_i) \log (1 - P(y_i)) \right]
|
77 |
\]
|
78 |
+
""")
|
79 |
|
80 |
+
st.markdown("<h2>Gradient Descent</h2>", unsafe_allow_html=True)
|
81 |
+
st.write(r"""
|
82 |
+
Gradient Descent is used to minimize the loss function:
|
|
|
|
|
|
|
|
|
83 |
|
84 |
\[
|
85 |
W = W - \alpha \frac{\partial L}{\partial W}
|
86 |
\]
|
87 |
|
88 |
+
Where \( \alpha \) is the learning rate.
|
89 |
+
""")
|
90 |
|
91 |
+
st.markdown("<h2>Learning Rate in Gradient Descent</h2>", unsafe_allow_html=True)
|
|
|
92 |
st.write("""
|
93 |
+
- High learning rate → faster but unstable
|
94 |
+
- Low learning rate → stable but slow
|
95 |
+
- Too low → may never converge
|
96 |
+
- Typically used: 0.1 or 0.01
|
97 |
""")
|
98 |
|
99 |
+
st.markdown("<h2>Types of Gradient Descent</h2>", unsafe_allow_html=True)
|
|
|
100 |
st.write("""
|
101 |
+
- **Batch GD**: Full dataset per update — few epochs, slow
|
102 |
+
- **Stochastic GD (SGD)**: One sample per update — fast per epoch, more epochs
|
103 |
+
- **Mini-batch GD**: Small batch updates — balanced and commonly used
|
104 |
""")
|
105 |
|
106 |
+
st.markdown("<h2>Multiclass Logistic Regression</h2>", unsafe_allow_html=True)
|
|
|
107 |
st.subheader("1. Softmax Regression")
|
108 |
+
st.write(r"""
|
109 |
+
Softmax regression generalizes logistic regression for multi-class problems.
|
110 |
|
111 |
\[
|
112 |
P(y = j | X) = \frac{e^{Z_j}}{\sum_{k=1}^{K} e^{Z_k}}
|
113 |
\]
|
114 |
|
115 |
+
**Steps**:
|
116 |
1. Compute scores: \( Z = WX + b \)
|
117 |
+
2. Apply softmax
|
118 |
3. Use cross-entropy loss
|
119 |
+
4. Update with gradient descent
|
|
|
|
|
120 |
|
121 |
+
**Example**:
|
122 |
+
If class probabilities are:
|
|
|
123 |
|
124 |
| Class | Probability |
|
125 |
|-------|-------------|
|
126 |
| 0 | 0.02 |
|
127 |
| 1 | 0.05 |
|
|
|
|
|
|
|
|
|
|
|
128 |
| 7 | 0.30 |
|
|
|
129 |
| 9 | 0.11 |
|
130 |
|
131 |
+
Prediction = **7**
|
132 |
""")
|
133 |
|
134 |
+
st.markdown("<h2>2. One-vs-Rest (OvR) Classification</h2>", unsafe_allow_html=True)
|
|
|
135 |
st.write("""
|
136 |
+
OvR breaks a multi-class problem into many binary ones.
|
137 |
+
|
138 |
+
**Steps**:
|
139 |
+
1. Train N binary classifiers (one per class)
|
140 |
+
2. Each says: "Is this class or not?"
|
141 |
+
3. Pick the one with the highest score
|
142 |
+
|
143 |
+
**Example**:
|
144 |
+
For 🍎 🍌 🍊, you train:
|
145 |
+
- Apple vs Not Apple
|
146 |
+
- Banana vs Not Banana
|
147 |
+
- Orange vs Not Orange
|
148 |
""")
|
149 |
|
150 |
+
st.markdown("<h2>Regularization in Logistic Regression</h2>", unsafe_allow_html=True)
|
151 |
+
st.write(r"""
|
152 |
+
Regularization adds a penalty to reduce overfitting:
|
|
|
153 |
|
154 |
+
- **L1 (Lasso)**: \( \lambda \sum |w| \)
|
155 |
+
- **L2 (Ridge)**: \( \lambda \sum w^2 \)
|
156 |
+
- **ElasticNet**: Combination of both
|
157 |
|
158 |
+
**Why?**
|
159 |
+
- Reduces model complexity
|
160 |
+
- Encourages generalization
|
161 |
""")
|
162 |
|
163 |
+
st.markdown("<h2>Detecting Multicollinearity</h2>", unsafe_allow_html=True)
|
164 |
+
st.write(r"""
|
165 |
+
**Variance Inflation Factor (VIF)**:
|
|
|
|
|
|
|
166 |
|
167 |
\[
|
168 |
VIF_i = \frac{1}{1 - R^2_i}
|
169 |
\]
|
170 |
|
171 |
+
- VIF > 10 = high multicollinearity
|
172 |
+
""")
|
|
|
173 |
|
174 |
+
st.markdown("<h2>Hyperparameters in Logistic Regression</h2>", unsafe_allow_html=True)
|
|
|
175 |
st.table([
|
176 |
+
["Hyperparameter", "Description"],
|
177 |
["penalty", "Regularization type ('l1', 'l2', 'elasticnet', None)"],
|
178 |
["dual", "Use dual formulation (for 'l2' with 'liblinear')"],
|
179 |
+
["tol", "Tolerance for stopping"],
|
180 |
["C", "Inverse of regularization strength"],
|
181 |
["fit_intercept", "Add intercept term or not"],
|
182 |
+
["intercept_scaling", "Intercept scaling (for 'liblinear')"],
|
183 |
+
["class_weight", "Weights for classes ('balanced' or dict)"],
|
184 |
+
["random_state", "Seed for reproducibility"],
|
185 |
+
["solver", "Optimization algorithm (e.g., 'lbfgs', 'saga')"],
|
186 |
+
["max_iter", "Max number of iterations"],
|
187 |
+
["multi_class", "Strategy ('ovr' or 'multinomial')"],
|
188 |
+
["verbose", "Level of output verbosity"],
|
189 |
["warm_start", "Reuse previous solution"],
|
190 |
+
["n_jobs", "Cores used for training"],
|
191 |
+
["l1_ratio", "Mix ratio (for 'elasticnet')"],
|
192 |
])
|
193 |
|
194 |
+
st.write("🚀 This app helps you understand **Logistic Regression** step by step!")
|