badr-mardi commited on
Commit
c945aff
·
verified ·
1 Parent(s): a875770

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import streamlit as st
2
  import numpy as np
3
  import pandas as pd
@@ -11,13 +12,15 @@ import io
11
  def main():
12
  st.title("California Housing Analysis")
13
 
 
14
  california = datasets.fetch_california_housing()
15
  df = pd.DataFrame(california.data, columns=california.feature_names)
16
  df['MedHouseVal'] = california.target
17
 
 
18
  st.write("## Data Sample")
19
  st.write(df.head())
20
-
21
  st.write("## Data Statistics")
22
  st.write(df.describe())
23
 
@@ -26,7 +29,7 @@ def main():
26
  df.info(buf=buffer)
27
  s = buffer.getvalue()
28
  st.text(s)
29
-
30
  st.write("## Missing Values")
31
  st.write(df.isnull().sum())
32
 
@@ -37,7 +40,7 @@ def main():
37
  # Drop the target from the predictors list
38
  predictor_options = df.columns.drop(target).tolist()
39
 
40
- # Add multiselect for user to choose predictor variables
41
  predictors = st.multiselect(
42
  'Select predictor variables for regression:',
43
  options=predictor_options,
@@ -59,10 +62,12 @@ def main():
59
  else:
60
  st.write("Scatter plot is only available for a single predictor.")
61
 
62
- # Regression analysis
63
  X = df[predictors]
64
  y = df[target]
65
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
 
66
  model = LinearRegression()
67
  model.fit(X_train, y_train)
68
  y_pred = model.predict(X_test)
@@ -73,6 +78,7 @@ def main():
73
  st.write(f'RMSE: {rmse}')
74
  st.write(f'R-squared: {r2}')
75
 
 
76
  if len(predictors) == 1:
77
  fig, ax = plt.subplots()
78
  ax.scatter(X_train, y_train, color='blue', label='Training data')
@@ -98,3 +104,4 @@ if __name__ == "__main__":
98
 
99
 
100
 
 
 
1
+
2
  import streamlit as st
3
  import numpy as np
4
  import pandas as pd
 
12
  def main():
13
  st.title("California Housing Analysis")
14
 
15
+ # Load the California housing dataset
16
  california = datasets.fetch_california_housing()
17
  df = pd.DataFrame(california.data, columns=california.feature_names)
18
  df['MedHouseVal'] = california.target
19
 
20
+ # Displaying initial data information
21
  st.write("## Data Sample")
22
  st.write(df.head())
23
+
24
  st.write("## Data Statistics")
25
  st.write(df.describe())
26
 
 
29
  df.info(buf=buffer)
30
  s = buffer.getvalue()
31
  st.text(s)
32
+
33
  st.write("## Missing Values")
34
  st.write(df.isnull().sum())
35
 
 
40
  # Drop the target from the predictors list
41
  predictor_options = df.columns.drop(target).tolist()
42
 
43
+ # Multiselect widget to select predictor variables for regression
44
  predictors = st.multiselect(
45
  'Select predictor variables for regression:',
46
  options=predictor_options,
 
62
  else:
63
  st.write("Scatter plot is only available for a single predictor.")
64
 
65
+ # Splitting data for regression
66
  X = df[predictors]
67
  y = df[target]
68
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
69
+
70
+ # Perform linear regression
71
  model = LinearRegression()
72
  model.fit(X_train, y_train)
73
  y_pred = model.predict(X_test)
 
78
  st.write(f'RMSE: {rmse}')
79
  st.write(f'R-squared: {r2}')
80
 
81
+ # Visualizing the regression results
82
  if len(predictors) == 1:
83
  fig, ax = plt.subplots()
84
  ax.scatter(X_train, y_train, color='blue', label='Training data')
 
104
 
105
 
106
 
107
+