Spaces:
Running
Running
update remote
Browse files- app.py +78 -0
- data_generator.py +19 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
from sklearn.model_selection import train_test_split
|
6 |
+
from sklearn.preprocessing import StandardScaler
|
7 |
+
from sklearn.svm import SVC
|
8 |
+
from sklearn.metrics import classification_report, accuracy_score, ConfusionMatrixDisplay
|
9 |
+
import data_generator
|
10 |
+
|
11 |
+
# Load dataset from CSV
|
12 |
+
business_data = pd.read_csv("business_data.csv")
|
13 |
+
X = business_data.iloc[:, :-1].values
|
14 |
+
y = business_data.iloc[:, -1].values
|
15 |
+
|
16 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
17 |
+
scaler = StandardScaler()
|
18 |
+
X_train = scaler.fit_transform(X_train)
|
19 |
+
X_test = scaler.transform(X_test)
|
20 |
+
|
21 |
+
# Business Problem Description
|
22 |
+
"""
|
23 |
+
This simulated dataset represents a business classification problem where a company is trying to categorize customer behaviors
|
24 |
+
into two distinct segments. The classification is based on factors such as purchase history, engagement levels, and
|
25 |
+
customer loyalty indicators. The data is structured in a way that requires a non-linear classification approach, making it
|
26 |
+
an ideal case for Support Vector Machines with polynomial or RBF kernels.
|
27 |
+
"""
|
28 |
+
|
29 |
+
# Streamlit App
|
30 |
+
st.title("SVM Business Classification App")
|
31 |
+
st.sidebar.header("Model Hyperparameters")
|
32 |
+
C = st.sidebar.slider("Regularization (C)", 0.01, 10.0, 1.0)
|
33 |
+
epsilon = st.sidebar.slider("Epsilon", 0.01, 1.0, 0.1)
|
34 |
+
|
35 |
+
# Tabs for different kernel types
|
36 |
+
tab1, tab2, tab3 = st.tabs(["Linear Kernel", "Polynomial Kernel", "RBF Kernel"])
|
37 |
+
|
38 |
+
def train_and_evaluate(kernel, degree=3, gamma='scale'):
|
39 |
+
model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma)
|
40 |
+
model.fit(X_train, y_train)
|
41 |
+
y_pred = model.predict(X_test)
|
42 |
+
accuracy = accuracy_score(y_test, y_pred)
|
43 |
+
report = classification_report(y_test, y_pred, output_dict=True)
|
44 |
+
return model, accuracy, report, y_pred
|
45 |
+
|
46 |
+
# Linear Kernel
|
47 |
+
with tab1:
|
48 |
+
st.subheader("Linear Kernel")
|
49 |
+
model, acc, report, y_pred = train_and_evaluate("linear")
|
50 |
+
st.write(f"**Accuracy:** {acc:.2f}")
|
51 |
+
st.write("**Classification Report:**", pd.DataFrame(report).transpose())
|
52 |
+
fig, ax = plt.subplots()
|
53 |
+
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
|
54 |
+
st.pyplot(fig)
|
55 |
+
|
56 |
+
# Polynomial Kernel
|
57 |
+
with tab2:
|
58 |
+
st.subheader("Polynomial Kernel")
|
59 |
+
degree = st.slider("Polynomial Degree", 2, 5, 3)
|
60 |
+
model, acc, report, y_pred = train_and_evaluate("poly", degree)
|
61 |
+
st.write(f"**Accuracy:** {acc:.2f}")
|
62 |
+
st.write("**Classification Report:**", pd.DataFrame(report).transpose())
|
63 |
+
fig, ax = plt.subplots()
|
64 |
+
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
|
65 |
+
st.pyplot(fig)
|
66 |
+
|
67 |
+
# RBF Kernel
|
68 |
+
with tab3:
|
69 |
+
st.subheader("RBF Kernel")
|
70 |
+
gamma = st.slider("Gamma", 0.01, 1.0, 0.1)
|
71 |
+
model, acc, report, y_pred = train_and_evaluate("rbf", gamma=gamma)
|
72 |
+
st.write(f"**Accuracy:** {acc:.2f}")
|
73 |
+
st.write("**Classification Report:**", pd.DataFrame(report).transpose())
|
74 |
+
fig, ax = plt.subplots()
|
75 |
+
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, ax=ax)
|
76 |
+
st.pyplot(fig)
|
77 |
+
|
78 |
+
st.write("This app demonstrates how different SVM kernels impact classification performance in a non-linear business problem.")
|
data_generator.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from sklearn.datasets import make_moons
|
4 |
+
|
5 |
+
def generate_simulated_data():
|
6 |
+
"""Generates a simulated business classification dataset and saves it to a CSV file."""
|
7 |
+
np.random.seed(42)
|
8 |
+
X, y = make_moons(n_samples=300, noise=0.2, random_state=42)
|
9 |
+
|
10 |
+
# Convert to DataFrame
|
11 |
+
df = pd.DataFrame(X, columns=["Feature1", "Feature2"])
|
12 |
+
df["Target"] = y
|
13 |
+
|
14 |
+
# Save to CSV
|
15 |
+
df.to_csv("business_data.csv", index=False)
|
16 |
+
|
17 |
+
if __name__ == "__main__":
|
18 |
+
generate_simulated_data()
|
19 |
+
print("Simulated business dataset saved as 'business_data.csv'.")
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
numpy
|
3 |
+
pandas
|
4 |
+
matplotlib
|
5 |
+
scikit-learn
|