louiecerv commited on
Commit
62004ae
·
1 Parent(s): f023c06

update remote

Browse files
Files changed (3) hide show
  1. app.py +78 -0
  2. data_generator.py +19 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.preprocessing import StandardScaler
7
+ from sklearn.svm import SVC
8
+ from sklearn.metrics import classification_report, accuracy_score, ConfusionMatrixDisplay
9
+ import data_generator
10
+
11
+ # Load dataset from CSV
12
+ business_data = pd.read_csv("business_data.csv")
13
+ X = business_data.iloc[:, :-1].values
14
+ y = business_data.iloc[:, -1].values
15
+
16
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
17
+ scaler = StandardScaler()
18
+ X_train = scaler.fit_transform(X_train)
19
+ X_test = scaler.transform(X_test)
20
+
21
+ # Business Problem Description
22
+ """
23
+ This simulated dataset represents a business classification problem where a company is trying to categorize customer behaviors
24
+ into two distinct segments. The classification is based on factors such as purchase history, engagement levels, and
25
+ customer loyalty indicators. The data is structured in a way that requires a non-linear classification approach, making it
26
+ an ideal case for Support Vector Machines with polynomial or RBF kernels.
27
+ """
28
+
29
+ # Streamlit App
30
+ st.title("SVM Business Classification App")
31
+ st.sidebar.header("Model Hyperparameters")
32
+ C = st.sidebar.slider("Regularization (C)", 0.01, 10.0, 1.0)
33
+ epsilon = st.sidebar.slider("Epsilon", 0.01, 1.0, 0.1)
34
+
35
+ # Tabs for different kernel types
36
+ tab1, tab2, tab3 = st.tabs(["Linear Kernel", "Polynomial Kernel", "RBF Kernel"])
37
+
38
+ def train_and_evaluate(kernel, degree=3, gamma='scale'):
39
+ model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma)
40
+ model.fit(X_train, y_train)
41
+ y_pred = model.predict(X_test)
42
+ accuracy = accuracy_score(y_test, y_pred)
43
+ report = classification_report(y_test, y_pred, output_dict=True)
44
+ return model, accuracy, report, y_pred
45
+
46
+ # Linear Kernel
47
+ with tab1:
48
+ st.subheader("Linear Kernel")
49
+ model, acc, report, y_pred = train_and_evaluate("linear")
50
+ st.write(f"**Accuracy:** {acc:.2f}")
51
+ st.write("**Classification Report:**", pd.DataFrame(report).transpose())
52
+ fig, ax = plt.subplots()
53
+ ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
54
+ st.pyplot(fig)
55
+
56
+ # Polynomial Kernel
57
+ with tab2:
58
+ st.subheader("Polynomial Kernel")
59
+ degree = st.slider("Polynomial Degree", 2, 5, 3)
60
+ model, acc, report, y_pred = train_and_evaluate("poly", degree)
61
+ st.write(f"**Accuracy:** {acc:.2f}")
62
+ st.write("**Classification Report:**", pd.DataFrame(report).transpose())
63
+ fig, ax = plt.subplots()
64
+ ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
65
+ st.pyplot(fig)
66
+
67
+ # RBF Kernel
68
+ with tab3:
69
+ st.subheader("RBF Kernel")
70
+ gamma = st.slider("Gamma", 0.01, 1.0, 0.1)
71
+ model, acc, report, y_pred = train_and_evaluate("rbf", gamma=gamma)
72
+ st.write(f"**Accuracy:** {acc:.2f}")
73
+ st.write("**Classification Report:**", pd.DataFrame(report).transpose())
74
+ fig, ax = plt.subplots()
75
+ ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
76
+ st.pyplot(fig)
77
+
78
+ st.write("This app demonstrates how different SVM kernels impact classification performance in a non-linear business problem.")
data_generator.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.datasets import make_moons
4
+
5
+ def generate_simulated_data():
6
+ """Generates a simulated business classification dataset and saves it to a CSV file."""
7
+ np.random.seed(42)
8
+ X, y = make_moons(n_samples=300, noise=0.2, random_state=42)
9
+
10
+ # Convert to DataFrame
11
+ df = pd.DataFrame(X, columns=["Feature1", "Feature2"])
12
+ df["Target"] = y
13
+
14
+ # Save to CSV
15
+ df.to_csv("business_data.csv", index=False)
16
+
17
+ if __name__ == "__main__":
18
+ generate_simulated_data()
19
+ print("Simulated business dataset saved as 'business_data.csv'.")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ numpy
3
+ pandas
4
+ matplotlib
5
+ scikit-learn