import streamlit as st
import numpy as np
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt

# Function to build the model
def build_model(my_learning_rate):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(units=1, input_shape=(1,)))
    model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=my_learning_rate),
                  loss='mean_squared_error',
                  metrics=[tf.keras.metrics.RootMeanSquaredError()])
    return model

# Function to train the model
def train_model(model, df, feature, label, epochs, batch_size):
    history = model.fit(x=df[feature], y=df[label], batch_size=batch_size, epochs=epochs)
    trained_weight = model.get_weights()[0][0]
    trained_bias = model.get_weights()[1]
    epochs = history.epoch
    hist = pd.DataFrame(history.history)
    rmse = hist["root_mean_squared_error"]
    return trained_weight, trained_bias, epochs, rmse

# Function to plot the model
def plot_the_model(trained_weight, trained_bias, feature, label, df):
    plt.figure(figsize=(10, 6))
    plt.xlabel(feature)
    plt.ylabel(label)

    random_examples = df.sample(n=200)
    plt.scatter(random_examples[feature], random_examples[label])

    x0 = 0
    y0 = trained_bias
    x1 = random_examples[feature].max()
    y1 = trained_bias + (trained_weight * x1)
    plt.plot([x0, x1], [y0, y1], c='r')

    st.pyplot(plt)

# Function to plot the loss curve
def plot_the_loss_curve(epochs, rmse):
    plt.figure(figsize=(10, 6))
    plt.xlabel("Epoch")
    plt.ylabel("Root Mean Squared Error")

    plt.plot(epochs, rmse, label="Loss")
    plt.legend()
    plt.ylim([rmse.min()*0.97, rmse.max()])
    st.pyplot(plt)

# Load the dataset
@st.cache_data
def load_data():
    url = "https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv"
    df = pd.read_csv(url)
    df["median_house_value"] /= 1000.0
    return df

training_df = load_data()

# Streamlit interface
st.title("Simple Linear Regression with Real Data")

st.write("https://colab.research.google.com/github/google/eng-edu/blob/main/ml/cc/exercises/linear_regression_with_a_real_dataset.ipynb?utm_source=mlcc&utm_campaign=colab-external&utm_medium=referral&utm_content=linear_regression_real_tf2-colab&hl=en")

if st.checkbox('Show raw data'):
    st.write(training_df.head())

learning_rate = st.sidebar.slider('Learning Rate', min_value=0.001, max_value=1.0, value=0.01, step=0.01)
epochs = st.sidebar.slider('Epochs', min_value=1, max_value=1000, value=30, step=1)
batch_size = st.sidebar.slider('Batch Size', min_value=1, max_value=len(training_df), value=30, step=1)
feature = st.sidebar.selectbox('Select Feature', training_df.columns)
label = 'median_house_value'

my_model = None  # Initialize the model variable

if st.sidebar.button('Run'):
    my_model = build_model(learning_rate)
    weight, bias, epochs, rmse = train_model(my_model, training_df, feature, label, epochs, batch_size)

    st.subheader('Model Plot')
    plot_the_model(weight, bias, feature, label, training_df)

    st.subheader('Loss Curve')
    plot_the_loss_curve(epochs, rmse)

# Function to make predictions
def predict_house_values(n, feature, label):
    batch = training_df[feature][10000:10000 + n]
    predicted_values = my_model.predict_on_batch(x=batch)

    st.write("feature   label          predicted")
    st.write("  value   value          value")
    st.write("          in thousand$   in thousand$")
    st.write("--------------------------------------")
    for i in range(n):
        st.write("%5.0f %6.0f %15.0f" % (training_df[feature][10000 + i],
                                        training_df[label][10000 + i],
                                        predicted_values[i][0] ))

n_predictions = st.sidebar.slider('Number of Predictions', min_value=1, max_value=100, value=10)
if my_model is not None and st.sidebar.button('Predict'):
    st.subheader('Predictions')
    predict_house_values(n_predictions, feature, label)