File size: 4,830 Bytes
55b17cf 3bbe6c1 55b17cf 3bbe6c1 55b17cf 3bbe6c1 55b17cf 3bbe6c1 55b17cf 3bbe6c1 55b17cf 3bbe6c1 55b17cf 3bbe6c1 55b17cf 3bbe6c1 55b17cf 3bbe6c1 55b17cf 3bbe6c1 55b17cf 3bbe6c1 55b17cf 3bbe6c1 55b17cf 3bbe6c1 55b17cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
import altair as alt
# π App Title
st.title("π Student Performance Prediction App")
# π About the App
st.write(
"""
## About This App
This application predicts whether a student will pass or fail based on their exam scores and demographic data.
### Features:
- **Dataset Overview**: View the number of students categorized by performance.
- **Model Evaluation**: Check the model's accuracy on the test set.
- **Student Performance Prediction**: Enter student details and get a prediction.
The app uses **Streamlit** for the UI and **XGBoostClassifier** for predictions.
"""
)
# π Load and preprocess data
def load_data():
file_path = "exams.csv"
df = pd.read_csv(file_path)
# Define target variable: Pass if average score >= 50
df["Average Score"] = df[["math score", "reading score", "writing score"]].mean(axis=1)
df["Passed"] = (df["Average Score"] >= 50).astype(int)
# Drop unnecessary columns
df.drop(columns=["Average Score", "lunch", "race/ethnicity", "gender"], inplace=True)
# Encode categorical variables
cat_cols = ["parental level of education", "test preparation course"]
df = pd.get_dummies(df, columns=cat_cols)
# Standardize numerical features
scaler = StandardScaler()
numerical_features = ["math score", "reading score", "writing score"]
df[numerical_features] = scaler.fit_transform(df[numerical_features])
return df, scaler, numerical_features
# Train the model
def train_model(df):
X = df.drop(columns=["Passed"])
y = df["Passed"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = XGBClassifier(
n_estimators=1000, learning_rate=0.03, max_depth=10,
colsample_bytree=0.9, subsample=0.9, random_state=42
)
model.fit(X_train, y_train)
return model, X_test, y_test, X_train.columns, X_train, scaler
# Evaluate the model
def evaluate_model(model, X_test, y_test):
accuracy = model.score(X_test, y_test)
return accuracy
df, scaler, numerical_features = load_data()
model, X_test, y_test, feature_names, X_train, scaler = train_model(df)
accuracy = evaluate_model(model, X_test, y_test)
# π‘ Streamlit Tabs
tab1, tab2, tab3 = st.tabs(["π Dataset Overview", "π Model Performance", "π Predict Performance"])
# π Tab 1: Dataset Overview
with tab1:
st.write("### Dataset Summary")
st.write(df.describe())
st.write("### Distribution of Passed Students")
pass_counts = df["Passed"].value_counts().reset_index()
pass_counts.columns = ["Passed", "Count"]
chart = alt.Chart(pass_counts).mark_bar().encode(
x=alt.X("Passed:N", title="Passed (0 = No, 1 = Yes)"),
y="Count",
color="Passed:N"
)
st.altair_chart(chart, use_container_width=True)
# π Tab 2: Model Performance
with tab2:
st.write("### Model Evaluation")
st.write(f"β
**Model Accuracy:** {accuracy*100:.2f}%")
# π Tab 3: Predict Performance
with tab3:
st.write("### Enter Student Details")
math_score = st.number_input("Math Score", min_value=0, max_value=100, value=70)
reading_score = st.number_input("Reading Score", min_value=0, max_value=100, value=70)
writing_score = st.number_input("Writing Score", min_value=0, max_value=100, value=70)
parent_education = st.selectbox("Parental Level of Education", ["Some high school", "High school", "Some college", "Associate's degree", "Bachelor's degree", "Master's degree"])
test_prep = st.selectbox("Test Preparation Course", ["None", "Completed"])
# Convert inputs to match model encoding
input_data = pd.DataFrame({
"math score": [math_score],
"reading score": [reading_score],
"writing score": [writing_score]
})
# Standardize numerical inputs
input_data[numerical_features] = scaler.transform(input_data[numerical_features])
# Add categorical columns dynamically
for col in feature_names:
if col.startswith("parental level of education_") or col.startswith("test preparation course_"):
input_data[col] = 0
input_data[f"parental level of education_{parent_education}"] = 1
input_data[f"test preparation course_{test_prep}"] = 1
# Fill missing encoded columns with 0
input_data = input_data.reindex(columns=feature_names, fill_value=0)
if st.button("Predict"):
prediction = model.predict(input_data)[0]
result = "Pass" if prediction == 1 else "Fail"
st.subheader(f"Prediction: {result}")
|