eaglelandsonce commited on
Commit
c704bfb
·
verified ·
1 Parent(s): 318f21a

Create 5_RealDataSetRegression.py

Browse files
Files changed (1) hide show
  1. pages/5_RealDataSetRegression.py +103 -0
pages/5_RealDataSetRegression.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import tensorflow as tf
5
+ from matplotlib import pyplot as plt
6
+
7
+ # Function to build the model
8
+ def build_model(my_learning_rate):
9
+ model = tf.keras.models.Sequential()
10
+ model.add(tf.keras.layers.Dense(units=1, input_shape=(1,)))
11
+ model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=my_learning_rate),
12
+ loss='mean_squared_error',
13
+ metrics=[tf.keras.metrics.RootMeanSquaredError()])
14
+ return model
15
+
16
+ # Function to train the model
17
+ def train_model(model, df, feature, label, epochs, batch_size):
18
+ history = model.fit(x=df[feature], y=df[label], batch_size=batch_size, epochs=epochs)
19
+ trained_weight = model.get_weights()[0][0]
20
+ trained_bias = model.get_weights()[1]
21
+ epochs = history.epoch
22
+ hist = pd.DataFrame(history.history)
23
+ rmse = hist["root_mean_squared_error"]
24
+ return trained_weight, trained_bias, epochs, rmse
25
+
26
+ # Function to plot the model
27
+ def plot_the_model(trained_weight, trained_bias, feature, label, df):
28
+ plt.figure(figsize=(10, 6))
29
+ plt.xlabel(feature)
30
+ plt.ylabel(label)
31
+
32
+ random_examples = df.sample(n=200)
33
+ plt.scatter(random_examples[feature], random_examples[label])
34
+
35
+ x0 = 0
36
+ y0 = trained_bias
37
+ x1 = random_examples[feature].max()
38
+ y1 = trained_bias + (trained_weight * x1)
39
+ plt.plot([x0, x1], [y0, y1], c='r')
40
+
41
+ st.pyplot(plt)
42
+
43
+ # Function to plot the loss curve
44
+ def plot_the_loss_curve(epochs, rmse):
45
+ plt.figure(figsize=(10, 6))
46
+ plt.xlabel("Epoch")
47
+ plt.ylabel("Root Mean Squared Error")
48
+
49
+ plt.plot(epochs, rmse, label="Loss")
50
+ plt.legend()
51
+ plt.ylim([rmse.min()*0.97, rmse.max()])
52
+ st.pyplot(plt)
53
+
54
+ # Load the dataset
55
+ @st.cache
56
+ def load_data():
57
+ url = "https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv"
58
+ df = pd.read_csv(url)
59
+ df["median_house_value"] /= 1000.0
60
+ return df
61
+
62
+ training_df = load_data()
63
+
64
+ # Streamlit interface
65
+ st.title("Simple Linear Regression with Real Data")
66
+
67
+ if st.checkbox('Show raw data'):
68
+ st.write(training_df.head())
69
+
70
+ learning_rate = st.sidebar.slider('Learning Rate', min_value=0.001, max_value=1.0, value=0.01, step=0.01)
71
+ epochs = st.sidebar.slider('Epochs', min_value=1, max_value=1000, value=30, step=1)
72
+ batch_size = st.sidebar.slider('Batch Size', min_value=1, max_value=len(training_df), value=30, step=1)
73
+ feature = st.sidebar.selectbox('Select Feature', training_df.columns)
74
+ label = 'median_house_value'
75
+
76
+ if st.sidebar.button('Run'):
77
+ my_model = build_model(learning_rate)
78
+ weight, bias, epochs, rmse = train_model(my_model, training_df, feature, label, epochs, batch_size)
79
+
80
+ st.subheader('Model Plot')
81
+ plot_the_model(weight, bias, feature, label, training_df)
82
+
83
+ st.subheader('Loss Curve')
84
+ plot_the_loss_curve(epochs, rmse)
85
+
86
+ # Function to make predictions
87
+ def predict_house_values(n, feature, label):
88
+ batch = training_df[feature][10000:10000 + n]
89
+ predicted_values = my_model.predict_on_batch(x=batch)
90
+
91
+ st.write("feature label predicted")
92
+ st.write(" value value value")
93
+ st.write(" in thousand$ in thousand$")
94
+ st.write("--------------------------------------")
95
+ for i in range(n):
96
+ st.write("%5.0f %6.0f %15.0f" % (training_df[feature][10000 + i],
97
+ training_df[label][10000 + i],
98
+ predicted_values[i][0] ))
99
+
100
+ n_predictions = st.sidebar.slider('Number of Predictions', min_value=1, max_value=100, value=10)
101
+ if st.sidebar.button('Predict'):
102
+ st.subheader('Predictions')
103
+ predict_house_values(n_predictions, feature, label)