File size: 3,641 Bytes
3730bdf
f6a56da
3730bdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55f7cb4
3730bdf
55f7cb4
 
 
 
3730bdf
d7b041a
3730bdf
55f7cb4
 
 
 
a575d01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6e3518
3730bdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import os
os.system('pip3 install pdpbox==0.2.1')

from pdpbox.pdp import pdp_isolate, pdp_plot
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
from numpy import mean
import streamlit as st

"""
# IOT
"""

merge = st.checkbox("Merge US country with State ?")

all_region = st.checkbox("Take all Region ?", value=True)
all_country = st.checkbox("Take all Country ?", value=True)
all_state = st.checkbox("Take all State ?", value=True)
all_city = st.checkbox("Take all City ?", value=True)

df = pd.read_csv("city_temperature.csv")

if all_region == False:
    region = st.selectbox(
         'Which region do you want to predict temparature ?',
         (df["Region"].unique()), index=0)
    df.drop(df.loc[df['Region'] != region].index, inplace=True)

if all_country == False:
    region = st.selectbox(
         'Which country do you want to predict temparature ?',
         (df["Country"].unique()), index=0)
    df.drop(df.loc[df['Country'] != region].index, inplace=True)

if merge == False and all_state == False:
    state = st.selectbox(
         'Which state do you want to predict temparature ?',
         (df["State"].unique()), index=0)
    df.drop(df.loc[df['State'] != region].index, inplace=True)

if all_city == False:
    region = st.selectbox(
         'Which city do you want to predict temparature ?',
         (df["City"].unique()), index=0)
    df.drop(df.loc[df['City'] != region].index, inplace=True)
    
    

def mergeStateToCountry():
    df.loc[df['State'].notna(), 'Country'] = df['State']
    df = df.loc[:, ~df.columns.str.contains('State')]

i = 0

for region in df["Region"].unique():
    df["Region"] = df["Region"].replace(region, str(i))
    i += 1
    
i = 0

for country in df["Country"].unique():
    df["Country"] = df["Country"].replace(country, str(i))
    i += 1
    
i = 0

for state in df["State"].unique():
    df["State"] = df["State"].replace(state, str(i))
    i += 1
    
i = 0

for city in df["City"].unique():
    df["City"] = df["City"].replace(city, str(i))
    i += 1

df = df.astype({"Region": "int"})
df = df.astype({"Country": "int"})
df = df.astype({"State": "int"})
df = df.astype({"City": "int"})

target = 'AvgTemperature'
# Here Y would be our target
Y = df[target]
# Here X would contain the other column
#X = df.loc[:, df.columns != target]
X = df[['Month', 'Day', 'Year']]

X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.25, random_state=42)

y_pred = [Y_train.mean()] * len(Y_train)

st.write('Baseline MAE: %f' % (round(mean_absolute_error(Y_train, y_pred), 5)))

lm = make_pipeline(StandardScaler(), LinearRegression(),)

lm.fit(X_train, Y_train)

st.write('Linear Regression Training MAE: %f' % (round(mean_absolute_error(Y_train, lm.predict(X_train)), 5)))
st.write('Linear Regression Test MAE: %f' % (round(mean_absolute_error(Y_val, lm.predict(X_val)), 5)))

forestModel = make_pipeline(
    SelectKBest(k="all"), 
    StandardScaler(), 
    RandomForestRegressor(
        n_estimators=100,
        max_depth=50,
        random_state=77,
        n_jobs=-1))

forestModel.fit (X_train, Y_train)

st.write('Random Forest Regressor Model Training MAE: %f' % (mean_absolute_error(Y_train, forestModel.predict(X_train))))
st.write('Random Forest Regressor Model Test MAE: %f' % (mean_absolute_error(Y_val, forestModel.predict(X_val))))