Update app.py
Browse files
app.py
CHANGED
@@ -1,80 +1,130 @@
|
|
|
|
1 |
import pandas as pd
|
2 |
import numpy as np
|
3 |
-
from sklearn.linear_model import LinearRegression
|
4 |
from sklearn.model_selection import train_test_split
|
5 |
-
from
|
6 |
-
|
7 |
import altair as alt
|
8 |
|
9 |
-
#
|
10 |
-
st.title("📊 Student Performance
|
11 |
|
12 |
-
#
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
#
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
#
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
-
|
|
|
|
|
|
|
|
|
30 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
34 |
model.fit(X_train, y_train)
|
|
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
mse = mean_squared_error(y_test, y_pred)
|
41 |
-
r2 = r2_score(y_test, y_pred)
|
42 |
-
|
43 |
-
# Create tabs
|
44 |
-
tab1, tab2, tab3 = st.tabs(["📈 Data Visualization", "📊 Model Performance", "🎯 Prediction"])
|
45 |
-
|
46 |
-
# Tab 1: Data Visualization
|
47 |
-
with tab1:
|
48 |
-
st.write("### Data Visualization")
|
49 |
-
|
50 |
-
# Scatter plots
|
51 |
-
for col in ['Study Hours', 'Attendance Rate', 'Assignment Grades']:
|
52 |
-
st.write(f"**{col} vs Final Exam Score**")
|
53 |
-
chart = alt.Chart(df).mark_circle().encode(
|
54 |
-
x=col,
|
55 |
-
y='Final Exam Score',
|
56 |
-
tooltip=[col, 'Final Exam Score']
|
57 |
-
).interactive()
|
58 |
-
st.altair_chart(chart, use_container_width=True)
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
st.write(f"✅ Mean Squared Error (MSE): {mse:.2f}")
|
64 |
-
st.write(f"✅ R-squared Score: {r2:.2f}")
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
st.write("### Predict Final Exam Score")
|
69 |
-
study_hours = st.number_input("📚 Study Hours", min_value=0, value=10, step=1)
|
70 |
-
attendance_rate = st.slider("🎟️ Attendance Rate", min_value=0.0, max_value=1.0, step=0.01, value=0.85)
|
71 |
-
assignment_grades = st.number_input("📝 Average Assignment Grade", min_value=0, max_value=100, value=80, step=1)
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
|
78 |
-
|
|
|
|
|
79 |
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
|
|
4 |
from sklearn.model_selection import train_test_split
|
5 |
+
from xgboost import XGBClassifier
|
6 |
+
from sklearn.preprocessing import StandardScaler
|
7 |
import altair as alt
|
8 |
|
9 |
+
# 🎓 App Title
|
10 |
+
st.title("📊 Student Performance Prediction App")
|
11 |
|
12 |
+
# 📚 About the App
|
13 |
+
st.write(
|
14 |
+
"""
|
15 |
+
## About This App
|
16 |
+
This application predicts whether a student will pass or fail based on their exam scores and demographic data.
|
17 |
+
### Features:
|
18 |
+
- **Dataset Overview**: View the number of students categorized by performance.
|
19 |
+
- **Model Evaluation**: Check the model's accuracy on the test set.
|
20 |
+
- **Student Performance Prediction**: Enter student details and get a prediction.
|
21 |
+
|
22 |
+
The app uses **Streamlit** for the UI and **XGBoostClassifier** for predictions.
|
23 |
+
"""
|
24 |
+
)
|
25 |
|
26 |
+
# 📌 Load and preprocess data
|
27 |
+
def load_data():
|
28 |
+
file_path = "exams.csv"
|
29 |
+
df = pd.read_csv(file_path)
|
30 |
+
|
31 |
+
# Define target variable: Pass if average score >= 50
|
32 |
+
df["Average Score"] = df[["math score", "reading score", "writing score"]].mean(axis=1)
|
33 |
+
df["Passed"] = (df["Average Score"] >= 50).astype(int)
|
34 |
+
|
35 |
+
# Drop unnecessary columns
|
36 |
+
df.drop(columns=["Average Score", "lunch", "race/ethnicity", "gender"], inplace=True)
|
37 |
+
|
38 |
+
# Encode categorical variables
|
39 |
+
cat_cols = ["parental level of education", "test preparation course"]
|
40 |
+
df = pd.get_dummies(df, columns=cat_cols)
|
41 |
+
|
42 |
+
# Standardize numerical features
|
43 |
+
scaler = StandardScaler()
|
44 |
+
numerical_features = ["math score", "reading score", "writing score"]
|
45 |
+
df[numerical_features] = scaler.fit_transform(df[numerical_features])
|
46 |
+
|
47 |
+
return df, scaler, numerical_features
|
48 |
|
49 |
+
# Train the model
|
50 |
+
def train_model(df):
|
51 |
+
X = df.drop(columns=["Passed"])
|
52 |
+
y = df["Passed"]
|
53 |
+
|
54 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
55 |
+
|
56 |
+
model = XGBClassifier(
|
57 |
+
n_estimators=1000, learning_rate=0.03, max_depth=10,
|
58 |
+
colsample_bytree=0.9, subsample=0.9, random_state=42
|
59 |
+
)
|
60 |
model.fit(X_train, y_train)
|
61 |
+
return model, X_test, y_test, X_train.columns, X_train, scaler
|
62 |
|
63 |
+
# Evaluate the model
|
64 |
+
def evaluate_model(model, X_test, y_test):
|
65 |
+
accuracy = model.score(X_test, y_test)
|
66 |
+
return accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
+
df, scaler, numerical_features = load_data()
|
69 |
+
model, X_test, y_test, feature_names, X_train, scaler = train_model(df)
|
70 |
+
accuracy = evaluate_model(model, X_test, y_test)
|
|
|
|
|
71 |
|
72 |
+
# 🏡 Streamlit Tabs
|
73 |
+
tab1, tab2, tab3 = st.tabs(["💁 Dataset Overview", "📊 Model Performance", "🎓 Predict Performance"])
|
|
|
|
|
|
|
|
|
74 |
|
75 |
+
# 📁 Tab 1: Dataset Overview
|
76 |
+
with tab1:
|
77 |
+
st.write("### Dataset Summary")
|
78 |
+
st.write(df.describe())
|
79 |
+
|
80 |
+
st.write("### Distribution of Passed Students")
|
81 |
+
pass_counts = df["Passed"].value_counts().reset_index()
|
82 |
+
pass_counts.columns = ["Passed", "Count"]
|
83 |
+
|
84 |
+
chart = alt.Chart(pass_counts).mark_bar().encode(
|
85 |
+
x=alt.X("Passed:N", title="Passed (0 = No, 1 = Yes)"),
|
86 |
+
y="Count",
|
87 |
+
color="Passed:N"
|
88 |
+
)
|
89 |
+
st.altair_chart(chart, use_container_width=True)
|
90 |
|
91 |
+
# 📊 Tab 2: Model Performance
|
92 |
+
with tab2:
|
93 |
+
st.write("### Model Evaluation")
|
94 |
+
st.write(f"✅ **Model Accuracy:** {accuracy*100:.2f}%")
|
95 |
|
96 |
+
# 🎓 Tab 3: Predict Performance
|
97 |
+
with tab3:
|
98 |
+
st.write("### Enter Student Details")
|
99 |
+
|
100 |
+
math_score = st.number_input("Math Score", min_value=0, max_value=100, value=70)
|
101 |
+
reading_score = st.number_input("Reading Score", min_value=0, max_value=100, value=70)
|
102 |
+
writing_score = st.number_input("Writing Score", min_value=0, max_value=100, value=70)
|
103 |
+
parent_education = st.selectbox("Parental Level of Education", ["Some high school", "High school", "Some college", "Associate's degree", "Bachelor's degree", "Master's degree"])
|
104 |
+
test_prep = st.selectbox("Test Preparation Course", ["None", "Completed"])
|
105 |
+
|
106 |
+
# Convert inputs to match model encoding
|
107 |
+
input_data = pd.DataFrame({
|
108 |
+
"math score": [math_score],
|
109 |
+
"reading score": [reading_score],
|
110 |
+
"writing score": [writing_score]
|
111 |
+
})
|
112 |
+
|
113 |
+
# Standardize numerical inputs
|
114 |
+
input_data[numerical_features] = scaler.transform(input_data[numerical_features])
|
115 |
+
|
116 |
+
# Add categorical columns dynamically
|
117 |
+
for col in feature_names:
|
118 |
+
if col.startswith("parental level of education_") or col.startswith("test preparation course_"):
|
119 |
+
input_data[col] = 0
|
120 |
+
|
121 |
+
input_data[f"parental level of education_{parent_education}"] = 1
|
122 |
+
input_data[f"test preparation course_{test_prep}"] = 1
|
123 |
+
|
124 |
+
# Fill missing encoded columns with 0
|
125 |
+
input_data = input_data.reindex(columns=feature_names, fill_value=0)
|
126 |
+
|
127 |
+
if st.button("Predict"):
|
128 |
+
prediction = model.predict(input_data)[0]
|
129 |
+
result = "Pass" if prediction == 1 else "Fail"
|
130 |
+
st.subheader(f"Prediction: {result}")
|