Spaces:
Sleeping
Sleeping
Create 5_RealDataSetRegression.py
Browse files- pages/5_RealDataSetRegression.py +103 -0
pages/5_RealDataSetRegression.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import tensorflow as tf
|
5 |
+
from matplotlib import pyplot as plt
|
6 |
+
|
7 |
+
# Function to build the model
|
8 |
+
def build_model(my_learning_rate):
|
9 |
+
model = tf.keras.models.Sequential()
|
10 |
+
model.add(tf.keras.layers.Dense(units=1, input_shape=(1,)))
|
11 |
+
model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=my_learning_rate),
|
12 |
+
loss='mean_squared_error',
|
13 |
+
metrics=[tf.keras.metrics.RootMeanSquaredError()])
|
14 |
+
return model
|
15 |
+
|
16 |
+
# Function to train the model
|
17 |
+
def train_model(model, df, feature, label, epochs, batch_size):
|
18 |
+
history = model.fit(x=df[feature], y=df[label], batch_size=batch_size, epochs=epochs)
|
19 |
+
trained_weight = model.get_weights()[0][0]
|
20 |
+
trained_bias = model.get_weights()[1]
|
21 |
+
epochs = history.epoch
|
22 |
+
hist = pd.DataFrame(history.history)
|
23 |
+
rmse = hist["root_mean_squared_error"]
|
24 |
+
return trained_weight, trained_bias, epochs, rmse
|
25 |
+
|
26 |
+
# Function to plot the model
|
27 |
+
def plot_the_model(trained_weight, trained_bias, feature, label, df):
|
28 |
+
plt.figure(figsize=(10, 6))
|
29 |
+
plt.xlabel(feature)
|
30 |
+
plt.ylabel(label)
|
31 |
+
|
32 |
+
random_examples = df.sample(n=200)
|
33 |
+
plt.scatter(random_examples[feature], random_examples[label])
|
34 |
+
|
35 |
+
x0 = 0
|
36 |
+
y0 = trained_bias
|
37 |
+
x1 = random_examples[feature].max()
|
38 |
+
y1 = trained_bias + (trained_weight * x1)
|
39 |
+
plt.plot([x0, x1], [y0, y1], c='r')
|
40 |
+
|
41 |
+
st.pyplot(plt)
|
42 |
+
|
43 |
+
# Function to plot the loss curve
|
44 |
+
def plot_the_loss_curve(epochs, rmse):
|
45 |
+
plt.figure(figsize=(10, 6))
|
46 |
+
plt.xlabel("Epoch")
|
47 |
+
plt.ylabel("Root Mean Squared Error")
|
48 |
+
|
49 |
+
plt.plot(epochs, rmse, label="Loss")
|
50 |
+
plt.legend()
|
51 |
+
plt.ylim([rmse.min()*0.97, rmse.max()])
|
52 |
+
st.pyplot(plt)
|
53 |
+
|
54 |
+
# Load the dataset
|
55 |
+
@st.cache
|
56 |
+
def load_data():
|
57 |
+
url = "https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv"
|
58 |
+
df = pd.read_csv(url)
|
59 |
+
df["median_house_value"] /= 1000.0
|
60 |
+
return df
|
61 |
+
|
62 |
+
training_df = load_data()
|
63 |
+
|
64 |
+
# Streamlit interface
|
65 |
+
st.title("Simple Linear Regression with Real Data")
|
66 |
+
|
67 |
+
if st.checkbox('Show raw data'):
|
68 |
+
st.write(training_df.head())
|
69 |
+
|
70 |
+
learning_rate = st.sidebar.slider('Learning Rate', min_value=0.001, max_value=1.0, value=0.01, step=0.01)
|
71 |
+
epochs = st.sidebar.slider('Epochs', min_value=1, max_value=1000, value=30, step=1)
|
72 |
+
batch_size = st.sidebar.slider('Batch Size', min_value=1, max_value=len(training_df), value=30, step=1)
|
73 |
+
feature = st.sidebar.selectbox('Select Feature', training_df.columns)
|
74 |
+
label = 'median_house_value'
|
75 |
+
|
76 |
+
if st.sidebar.button('Run'):
|
77 |
+
my_model = build_model(learning_rate)
|
78 |
+
weight, bias, epochs, rmse = train_model(my_model, training_df, feature, label, epochs, batch_size)
|
79 |
+
|
80 |
+
st.subheader('Model Plot')
|
81 |
+
plot_the_model(weight, bias, feature, label, training_df)
|
82 |
+
|
83 |
+
st.subheader('Loss Curve')
|
84 |
+
plot_the_loss_curve(epochs, rmse)
|
85 |
+
|
86 |
+
# Function to make predictions
|
87 |
+
def predict_house_values(n, feature, label):
|
88 |
+
batch = training_df[feature][10000:10000 + n]
|
89 |
+
predicted_values = my_model.predict_on_batch(x=batch)
|
90 |
+
|
91 |
+
st.write("feature label predicted")
|
92 |
+
st.write(" value value value")
|
93 |
+
st.write(" in thousand$ in thousand$")
|
94 |
+
st.write("--------------------------------------")
|
95 |
+
for i in range(n):
|
96 |
+
st.write("%5.0f %6.0f %15.0f" % (training_df[feature][10000 + i],
|
97 |
+
training_df[label][10000 + i],
|
98 |
+
predicted_values[i][0] ))
|
99 |
+
|
100 |
+
n_predictions = st.sidebar.slider('Number of Predictions', min_value=1, max_value=100, value=10)
|
101 |
+
if st.sidebar.button('Predict'):
|
102 |
+
st.subheader('Predictions')
|
103 |
+
predict_house_values(n_predictions, feature, label)
|