import streamlit as st import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score # Function to load data @st.cache_data def load_data_from_url(url): data = pd.read_csv(url) return data # App title st.title("House Price Prediction") # Sidebar for user inputs st.sidebar.header("Upload Your Data") uploaded_file = st.sidebar.file_uploader("Upload a CSV file", type=["csv"]) data_url = st.sidebar.text_input("Or enter a URL to a CSV file") # Load data if uploaded_file: data = pd.read_csv(uploaded_file) elif data_url: data = load_data_from_url(data_url) else: st.write("Please upload a CSV file or enter a URL to a CSV file.") st.stop() st.write("Dataset", data) # Feature selection st.sidebar.header("Configure Input Features") selected_features = st.sidebar.multiselect("Select features", data.columns[:-1]) if selected_features: X = data[selected_features] y = data.iloc[:, -1] # Assuming the last column is the target # Split data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train model model = LinearRegression() model.fit(X_train, y_train) # Prediction y_pred = model.predict(X_test) # Plot fig, ax = plt.subplots() ax.scatter(y_test, y_pred, edgecolors=(0, 0, 0)) ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4) ax.set_xlabel('Actual') ax.set_ylabel('Predicted') ax.set_title('Actual vs Predicted') st.pyplot(fig) # Model performance st.write("Mean Squared Error", mean_squared_error(y_test, y_pred)) st.write("R-squared Score", r2_score(y_test, y_pred)) # Display results st.write("Selected Features", selected_features) st.write("Model Coefficients", model.coef_) st.write("Predictions", y_pred) st.write("Actual Values", y_test.values)