3v324v23 commited on
Commit
b944f72
·
1 Parent(s): 3582e69
Files changed (3) hide show
  1. app.py +103 -0
  2. car_price_dataset.csv +0 -0
  3. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from sklearn.cluster import AgglomerativeClustering
7
+ from sklearn.metrics import confusion_matrix
8
+
9
+ # Load dataset
10
+ @st.cache_data
11
+ def load_data():
12
+ file_path = "car_price_dataset.csv" # Ensure this file is in the same directory
13
+ return pd.read_csv(file_path)
14
+
15
+ df = load_data()
16
+
17
+ # Streamlit App Title
18
+ st.title("🚗 Car Price Clustering & Evaluation")
19
+
20
+ # Creating Tabs
21
+ tab1, tab2, tab3 = st.tabs(["📊 Dataset Overview", "📈 Visual Matrix", "⚙️ User Input for Clustering"])
22
+
23
+ # --- TAB 1: Dataset Overview ---
24
+ with tab1:
25
+ st.write("## Dataset Overview")
26
+ st.write(df.head())
27
+ st.write(df.describe())
28
+
29
+ # --- TAB 2: Visualization Matrix ---
30
+ with tab2:
31
+ st.write("## Data Visualization")
32
+
33
+ # Select numerical features
34
+ numerical_df = df.select_dtypes(include=[np.number])
35
+
36
+ # Correlation Heatmap
37
+ st.write("### Correlation Heatmap")
38
+ fig, ax = plt.subplots(figsize=(8, 5))
39
+ sns.heatmap(numerical_df.corr(), annot=True, cmap="coolwarm", fmt=".2f")
40
+ st.pyplot(fig)
41
+
42
+ # Confusion Matrix
43
+ st.write("### Confusion Matrix")
44
+ selected_features = ["Engine_Size", "Mileage", "Price"]
45
+ if all(f in numerical_df.columns for f in selected_features):
46
+ X = df[selected_features].dropna().values
47
+ n_clusters = 3 # Default cluster count
48
+
49
+ # Apply Hierarchical Clustering
50
+ hc = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward')
51
+ labels = hc.fit_predict(X)
52
+
53
+ # Generate dummy "true labels" (for demonstration)
54
+ true_labels = np.random.randint(0, n_clusters, len(labels))
55
+
56
+ cm = confusion_matrix(true_labels, labels)
57
+ fig, ax = plt.subplots(figsize=(5, 4))
58
+ sns.heatmap(cm, annot=True, cmap="Blues", fmt="d")
59
+ plt.xlabel("Predicted")
60
+ plt.ylabel("Actual")
61
+ st.pyplot(fig)
62
+ else:
63
+ st.warning("Not enough numerical data for clustering.")
64
+
65
+ # Scatter Plot
66
+ st.write("### Scatter Plot")
67
+ scatter_x = st.selectbox("Select X-axis", numerical_df.columns, index=0)
68
+ scatter_y = st.selectbox("Select Y-axis", numerical_df.columns, index=1)
69
+
70
+ fig, ax = plt.subplots(figsize=(6, 4))
71
+ sns.scatterplot(x=df[scatter_x], y=df[scatter_y], alpha=0.7)
72
+ plt.xlabel(scatter_x)
73
+ plt.ylabel(scatter_y)
74
+ st.pyplot(fig)
75
+
76
+ # --- TAB 3: User Input & Clustering ---
77
+ with tab3:
78
+ st.write("## Perform Clustering")
79
+
80
+ numerical_features = numerical_df.columns.tolist()
81
+ selected_features = st.multiselect("Select features for clustering", numerical_features, default=["Engine_Size", "Mileage", "Price"])
82
+
83
+ if len(selected_features) < 2:
84
+ st.warning("Please select at least two numerical features.")
85
+ else:
86
+ X = df[selected_features].dropna().values # Prepare data
87
+
88
+ # Choose Number of Clusters (With + / - Buttons)
89
+ n_clusters = st.number_input("Select Number of Clusters", min_value=2, max_value=10, value=3, step=1)
90
+
91
+ # Predict Button
92
+ if st.button("Predict Clusters"):
93
+ # Apply Hierarchical Clustering
94
+ hc = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward')
95
+ labels = hc.fit_predict(X)
96
+
97
+ # Display results
98
+ df["Cluster"] = labels
99
+ st.write("### Clustered Data")
100
+ st.write(df[selected_features + ["Cluster"]].head(10))
101
+
102
+ st.success("Clustering Complete! 🎉")
103
+
car_price_dataset.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ panda
3
+ numpy
4
+ matplotlib
5
+ scipy
6
+ scikit-learn