chiichann commited on
Commit
55b17cf
·
verified ·
1 Parent(s): 2a8426f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -64
app.py CHANGED
@@ -1,80 +1,130 @@
 
1
  import pandas as pd
2
  import numpy as np
3
- from sklearn.linear_model import LinearRegression
4
  from sklearn.model_selection import train_test_split
5
- from sklearn.metrics import mean_squared_error, r2_score
6
- import streamlit as st
7
  import altair as alt
8
 
9
- # Streamlit app title
10
- st.title("📊 Student Performance Predictor")
11
 
12
- # Load dataset
13
- try:
14
- df = pd.read_csv("student_performance_data.csv") # Ensure the file is in the same directory
15
- st.write("### Preview of Dataset")
16
- st.write(df.head()) # Show first few rows
17
- except FileNotFoundError:
18
- st.error("File 'student_performance_data.csv' not found! Please upload the dataset.")
 
 
 
 
 
 
19
 
20
- # Ensure dataset contains the required columns
21
- required_columns = ['Study Hours', 'Attendance Rate', 'Assignment Grades', 'Final Exam Score']
22
- if not all(col in df.columns for col in required_columns):
23
- st.error("Dataset must contain the following columns: " + ", ".join(required_columns))
24
- else:
25
- # Prepare data for training
26
- X = df[['Study Hours', 'Attendance Rate', 'Assignment Grades']]
27
- y = df['Final Exam Score']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # Split data into training and testing sets
 
 
 
 
30
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
31
-
32
- # Train a Linear Regression model
33
- model = LinearRegression()
 
 
34
  model.fit(X_train, y_train)
 
35
 
36
- # Make predictions
37
- y_pred = model.predict(X_test)
38
-
39
- # Evaluate the model
40
- mse = mean_squared_error(y_test, y_pred)
41
- r2 = r2_score(y_test, y_pred)
42
-
43
- # Create tabs
44
- tab1, tab2, tab3 = st.tabs(["📈 Data Visualization", "📊 Model Performance", "🎯 Prediction"])
45
-
46
- # Tab 1: Data Visualization
47
- with tab1:
48
- st.write("### Data Visualization")
49
-
50
- # Scatter plots
51
- for col in ['Study Hours', 'Attendance Rate', 'Assignment Grades']:
52
- st.write(f"**{col} vs Final Exam Score**")
53
- chart = alt.Chart(df).mark_circle().encode(
54
- x=col,
55
- y='Final Exam Score',
56
- tooltip=[col, 'Final Exam Score']
57
- ).interactive()
58
- st.altair_chart(chart, use_container_width=True)
59
 
60
- # Tab 2: Model Performance
61
- with tab2:
62
- st.write("### Model Performance")
63
- st.write(f"✅ Mean Squared Error (MSE): {mse:.2f}")
64
- st.write(f"✅ R-squared Score: {r2:.2f}")
65
 
66
- # Tab 3: Prediction
67
- with tab3:
68
- st.write("### Predict Final Exam Score")
69
- study_hours = st.number_input("📚 Study Hours", min_value=0, value=10, step=1)
70
- attendance_rate = st.slider("🎟️ Attendance Rate", min_value=0.0, max_value=1.0, step=0.01, value=0.85)
71
- assignment_grades = st.number_input("📝 Average Assignment Grade", min_value=0, max_value=100, value=80, step=1)
72
 
73
- if st.button("🔮 Predict"):
74
- # Prepare input for prediction
75
- input_data = np.array([[study_hours, attendance_rate, assignment_grades]])
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- # Make prediction
78
- predicted_score = model.predict(input_data)[0]
 
 
79
 
80
- st.success(f"🎯 Predicted Final Exam Score: {predicted_score:.2f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  import pandas as pd
3
  import numpy as np
 
4
  from sklearn.model_selection import train_test_split
5
+ from xgboost import XGBClassifier
6
+ from sklearn.preprocessing import StandardScaler
7
  import altair as alt
8
 
9
+ # 🎓 App Title
10
+ st.title("📊 Student Performance Prediction App")
11
 
12
+ # 📚 About the App
13
+ st.write(
14
+ """
15
+ ## About This App
16
+ This application predicts whether a student will pass or fail based on their exam scores and demographic data.
17
+ ### Features:
18
+ - **Dataset Overview**: View the number of students categorized by performance.
19
+ - **Model Evaluation**: Check the model's accuracy on the test set.
20
+ - **Student Performance Prediction**: Enter student details and get a prediction.
21
+
22
+ The app uses **Streamlit** for the UI and **XGBoostClassifier** for predictions.
23
+ """
24
+ )
25
 
26
+ # 📌 Load and preprocess data
27
+ def load_data():
28
+ file_path = "exams.csv"
29
+ df = pd.read_csv(file_path)
30
+
31
+ # Define target variable: Pass if average score >= 50
32
+ df["Average Score"] = df[["math score", "reading score", "writing score"]].mean(axis=1)
33
+ df["Passed"] = (df["Average Score"] >= 50).astype(int)
34
+
35
+ # Drop unnecessary columns
36
+ df.drop(columns=["Average Score", "lunch", "race/ethnicity", "gender"], inplace=True)
37
+
38
+ # Encode categorical variables
39
+ cat_cols = ["parental level of education", "test preparation course"]
40
+ df = pd.get_dummies(df, columns=cat_cols)
41
+
42
+ # Standardize numerical features
43
+ scaler = StandardScaler()
44
+ numerical_features = ["math score", "reading score", "writing score"]
45
+ df[numerical_features] = scaler.fit_transform(df[numerical_features])
46
+
47
+ return df, scaler, numerical_features
48
 
49
+ # Train the model
50
+ def train_model(df):
51
+ X = df.drop(columns=["Passed"])
52
+ y = df["Passed"]
53
+
54
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
55
+
56
+ model = XGBClassifier(
57
+ n_estimators=1000, learning_rate=0.03, max_depth=10,
58
+ colsample_bytree=0.9, subsample=0.9, random_state=42
59
+ )
60
  model.fit(X_train, y_train)
61
+ return model, X_test, y_test, X_train.columns, X_train, scaler
62
 
63
+ # Evaluate the model
64
+ def evaluate_model(model, X_test, y_test):
65
+ accuracy = model.score(X_test, y_test)
66
+ return accuracy
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ df, scaler, numerical_features = load_data()
69
+ model, X_test, y_test, feature_names, X_train, scaler = train_model(df)
70
+ accuracy = evaluate_model(model, X_test, y_test)
 
 
71
 
72
+ # 🏡 Streamlit Tabs
73
+ tab1, tab2, tab3 = st.tabs(["💁 Dataset Overview", "📊 Model Performance", "🎓 Predict Performance"])
 
 
 
 
74
 
75
+ # 📁 Tab 1: Dataset Overview
76
+ with tab1:
77
+ st.write("### Dataset Summary")
78
+ st.write(df.describe())
79
+
80
+ st.write("### Distribution of Passed Students")
81
+ pass_counts = df["Passed"].value_counts().reset_index()
82
+ pass_counts.columns = ["Passed", "Count"]
83
+
84
+ chart = alt.Chart(pass_counts).mark_bar().encode(
85
+ x=alt.X("Passed:N", title="Passed (0 = No, 1 = Yes)"),
86
+ y="Count",
87
+ color="Passed:N"
88
+ )
89
+ st.altair_chart(chart, use_container_width=True)
90
 
91
+ # 📊 Tab 2: Model Performance
92
+ with tab2:
93
+ st.write("### Model Evaluation")
94
+ st.write(f"✅ **Model Accuracy:** {accuracy*100:.2f}%")
95
 
96
+ # 🎓 Tab 3: Predict Performance
97
+ with tab3:
98
+ st.write("### Enter Student Details")
99
+
100
+ math_score = st.number_input("Math Score", min_value=0, max_value=100, value=70)
101
+ reading_score = st.number_input("Reading Score", min_value=0, max_value=100, value=70)
102
+ writing_score = st.number_input("Writing Score", min_value=0, max_value=100, value=70)
103
+ parent_education = st.selectbox("Parental Level of Education", ["Some high school", "High school", "Some college", "Associate's degree", "Bachelor's degree", "Master's degree"])
104
+ test_prep = st.selectbox("Test Preparation Course", ["None", "Completed"])
105
+
106
+ # Convert inputs to match model encoding
107
+ input_data = pd.DataFrame({
108
+ "math score": [math_score],
109
+ "reading score": [reading_score],
110
+ "writing score": [writing_score]
111
+ })
112
+
113
+ # Standardize numerical inputs
114
+ input_data[numerical_features] = scaler.transform(input_data[numerical_features])
115
+
116
+ # Add categorical columns dynamically
117
+ for col in feature_names:
118
+ if col.startswith("parental level of education_") or col.startswith("test preparation course_"):
119
+ input_data[col] = 0
120
+
121
+ input_data[f"parental level of education_{parent_education}"] = 1
122
+ input_data[f"test preparation course_{test_prep}"] = 1
123
+
124
+ # Fill missing encoded columns with 0
125
+ input_data = input_data.reindex(columns=feature_names, fill_value=0)
126
+
127
+ if st.button("Predict"):
128
+ prediction = model.predict(input_data)[0]
129
+ result = "Pass" if prediction == 1 else "Fail"
130
+ st.subheader(f"Prediction: {result}")