import pandas as pd import numpy as np from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score import streamlit as st import altair as alt # Streamlit app title st.title("📊 Student Performance Predictor") # Load dataset try: df = pd.read_csv("student_performance_data.csv") # Ensure the file is in the same directory st.write("### Preview of Dataset") st.write(df.head()) # Show first few rows except FileNotFoundError: st.error("File 'student_performance_data.csv' not found! Please upload the dataset.") # Ensure dataset contains the required columns required_columns = ['Study Hours', 'Attendance Rate', 'Assignment Grades', 'Final Exam Score'] if not all(col in df.columns for col in required_columns): st.error("Dataset must contain the following columns: " + ", ".join(required_columns)) else: # Prepare data for training X = df[['Study Hours', 'Attendance Rate', 'Assignment Grades']] y = df['Final Exam Score'] # Split data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train a Linear Regression model model = LinearRegression() model.fit(X_train, y_train) # Make predictions y_pred = model.predict(X_test) # Evaluate the model mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) # Create tabs tab1, tab2, tab3 = st.tabs(["📈 Data Visualization", "📊 Model Performance", "🎯 Prediction"]) # Tab 1: Data Visualization with tab1: st.write("### Data Visualization") # Scatter plots for col in ['Study Hours', 'Attendance Rate', 'Assignment Grades']: st.write(f"**{col} vs Final Exam Score**") chart = alt.Chart(df).mark_circle().encode( x=col, y='Final Exam Score', tooltip=[col, 'Final Exam Score'] ).interactive() st.altair_chart(chart, use_container_width=True) # Tab 2: Model Performance with tab2: st.write("### Model Performance") st.write(f"✅ Mean Squared Error (MSE): {mse:.2f}") st.write(f"✅ R-squared Score: {r2:.2f}") # Tab 3: Prediction with tab3: st.write("### Predict Final Exam Score") study_hours = st.number_input("📚 Study Hours", min_value=0, value=10, step=1) attendance_rate = st.slider("🎟️ Attendance Rate", min_value=0.0, max_value=1.0, step=0.01, value=0.85) assignment_grades = st.number_input("📝 Average Assignment Grade", min_value=0, max_value=100, value=80, step=1) if st.button("🔮 Predict"): # Prepare input for prediction input_data = np.array([[study_hours, attendance_rate, assignment_grades]]) # Make prediction predicted_score = model.predict(input_data)[0] st.success(f"🎯 Predicted Final Exam Score: {predicted_score:.2f}")