File size: 4,224 Bytes
c704bfb
 
 
c15b0e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c704bfb
 
c15b0e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c704bfb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import plot_model
import io

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Only use the first two classes for binary classification
X = X[y != 2]
y = y[y != 2]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Streamlit interface
st.title('Logistic Regression with Keras on Iris Dataset')
st.write("""
## Introduction
Logistic Regression is a statistical model used for binary classification tasks. 
In this tutorial, we will use the Iris dataset to classify whether a flower is 
**Setosa** or **Versicolor** based on its features.
""")

# Display Iris dataset information
st.write("### Iris Dataset")
st.write("""
The Iris dataset contains 150 samples of iris flowers, each described by four features: 
sepal length, sepal width, petal length, and petal width. There are three classes: Setosa, Versicolor, and Virginica.
For this example, we'll only use the Setosa and Versicolor classes.
""")
st.write(pd.DataFrame(X, columns=iris.feature_names).head())

# Plotting sample data
st.write("### Sample Data Distribution")
fig, ax = plt.subplots()
for i, color in zip([0, 1], ['blue', 'orange']):
    idx = np.where(y == i)
    ax.scatter(X[idx, 0], X[idx, 1], c=color, label=iris.target_names[i], edgecolor='k')
ax.set_xlabel(iris.feature_names[0])
ax.set_ylabel(iris.feature_names[1])
ax.legend()
st.pyplot(fig)

# User input for number of epochs
epochs = st.slider('Select number of epochs for training:', min_value=10, max_value=200, value=100, step=10)

# Build the logistic regression model using Keras
model = Sequential()
model.add(Dense(1, input_dim=4, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display the model architecture
st.write("### Model Architecture")
st.write(model.summary())
fig, ax = plt.subplots()
buf = io.BytesIO()
plot_model(model, to_file=buf, show_shapes=True, show_layer_names=True)
buf.seek(0)
st.image(buf, caption='Logistic Regression Model Architecture', use_column_width=True)

# Train the model
model.fit(X_train, y_train, epochs=epochs, verbose=0)

# Predict and evaluate the model
y_pred_train = (model.predict(X_train) > 0.5).astype("int32")
y_pred_test = (model.predict(X_test) > 0.5).astype("int32")

train_accuracy = accuracy_score(y_train, y_pred_train)
test_accuracy = accuracy_score(y_test, y_pred_test)

conf_matrix = confusion_matrix(y_test, y_pred_test)

st.write('## Model Performance')
st.write(f'Training Accuracy: {train_accuracy:.2f}')
st.write(f'Testing Accuracy: {test_accuracy:.2f}')

st.write('## Confusion Matrix')
fig, ax = plt.subplots()
ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=0.3)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x=j, y=i, s=conf_matrix[i, j], va='center', ha='center')

plt.xlabel('Predicted Label')
plt.ylabel('True Label')
st.pyplot(fig)

st.write('## Make a Prediction')
sepal_length = st.number_input('Sepal Length (cm)', min_value=0.0, max_value=10.0, value=5.0)
sepal_width = st.number_input('Sepal Width (cm)', min_value=0.0, max_value=10.0, value=3.5)
petal_length = st.number_input('Petal Length (cm)', min_value=0.0, max_value=10.0, value=1.4)
petal_width = st.number_input('Petal Width (cm)', min_value=0.0, max_value=10.0, value=0.2)

if st.button('Predict'):
    input_data = np.array([[sepal_length, sepal_width, petal_length, petal_width]])
    input_data_scaled = scaler.transform(input_data)
    prediction = (model.predict(input_data_scaled) > 0.5).astype("int32")
    st.write(f'Prediction: {"Setosa" if prediction[0][0] == 0 else "Versicolor"}')