Paatiii1712's picture
Update app.py
3d7b23a
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 25 21:06:22 2022
@author: ayush
"""
# Import the Libraries
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import yfinance as yf
import datetime as dttm
st.set_page_config(page_title='Stock Market Forecasting', page_icon='stock2.jpg')
st.image('stock2.jpg','Stock Market Forecasting')
st.header('Stock Market Forecasting')
st.write('---')
st.sidebar.header('User Input Parameters')
ipt = st.sidebar.selectbox( label ="Take Input DataSet From",
options=['Yfinance'])
if ipt == 'Yfinance':
st.sidebar.write('**1).Stock Name**')
stick_name = st.sidebar.text_input('Enter the Name of Stock as in yfinance',
value= 'NESTLEIND.NS'
)
st.sidebar.write('**2).Starting Date and Ending Date used for Training Data**')
startDate = st.sidebar.date_input('Starting Date',
value= dttm.date(2011, 1, 1),
min_value = dttm.date(2010,1,1)
)
endDate = st.sidebar.date_input('Ending Date',
value= dttm.date(2022, 7, 1),
min_value = dttm.date(2010,1,1)
)
n = st.sidebar.number_input("Approx Number of Day's you want to Forecaste",
min_value = 1,
max_value = 30,
value = 10)
else:
st.error('You Select a wrong option')
if 'start' not in st.session_state:
st.session_state['start'] = False
start = st.sidebar.checkbox('check to start', value = st.session_state['start'])
if start:
st.session_state['start'] = True
else:
st.session_state['start'] = False
if 'stock_past' not in st.session_state:
st.session_state['stock_past'] = None
result = st.sidebar.button('Clear the Session State')
if result:
for key in st.session_state.keys():
del st.session_state[key]
if 'start' not in st.session_state:
st.session_state['start'] = False
if st.session_state['start']==True:
# Extract Dataset from yfinance
if ipt == 'Yfinance':
GetData = yf.Ticker(stick_name)
yf_data = pd.DataFrame(GetData.history(start=startDate, end=endDate))
st.subheader('Input DataFrame')
st.write(stick_name,'*Stock DataFrame*')
st.dataframe(yf_data)
if yf_data.empty:
st.error('No Internet Connection')
yf_data = None
if 'stock_present' not in st.session_state:
st.session_state['stock_present'] = stick_name
st.session_state['stock_present'] = stick_name
st.subheader('Visualisation')
# Different types of plots
st.sidebar.header('Visualisation')
#Visualisation
chart_select = st.sidebar.selectbox(
label ="Type of chart",
options=['Lineplots','Scatterplots','Histogram']
)
numeric_columns = list(yf_data.select_dtypes(['float','int']).columns)
numeric_columns.sort()
if chart_select == 'Scatterplots':
st.sidebar.subheader('Scatterplot Settings')
try:
y_values = st.sidebar.selectbox('Y axis',options=numeric_columns)
plot = px.scatter(data_frame=yf_data,y=y_values,
title=str('Scatter Plot for '+y_values+' column'))
st.write(plot)
except Exception as e:
print(e)
if chart_select == 'Histogram':
st.sidebar.subheader('Histogram Settings')
try:
x_values = st.sidebar.selectbox('X axis',options=numeric_columns)
plot = px.histogram(data_frame=yf_data,x=x_values,marginal="box",
title=str('Histogram Plot for '+y_values+' column'))
st.write(plot)
except Exception as e:
print(e)
if chart_select == 'Lineplots':
st.sidebar.subheader('Lineplots Settings')
try:
y_values = st.sidebar.selectbox('Y axis',options=numeric_columns)
plot = px.line(yf_data,y=y_values,
title=str('Line Plot for '+y_values+' column'))
st.write(plot)
except Exception as e:
print(e)
# Final Dataset for Model Building i.e. selecting only "close" column
#st.write(numeric_columns)
if "Close" in numeric_columns:
final_data = pd.DataFrame(yf_data.Close)
final_data = final_data.sort_index(ascending=True)
final_data.rename(columns={'Close': 'Close'},inplace = True)
elif "close" in numeric_columns:
final_data = pd.DataFrame(yf_data.close)
final_data = final_data.sort_index(ascending=True)
final_data.rename(columns={'close': 'Close'},inplace = True)
elif "CLOSE" in numeric_columns:
final_data = pd.DataFrame(yf_data.CLOSE)
final_data = final_data.sort_index(ascending=True)
final_data.rename(columns={'CLOSE': 'Close'},inplace = True)
else:
final_data = None
st.subheader('Close Column is not Present in the File, Please Check the file and reupload')
st.subheader('DataSet used for Training')
st.write(final_data)
try:
# Setting Frequency of Close column
training_data = final_data.copy()
training_data = training_data.asfreq('B')
training_data.ffill(inplace=True)
#st.write(data)
#st.write(data.shape)
#st.write(data.isnull().sum())
except:
training_data = None
st.error('Please Check the settings, You have not choose the appropriate option or You have not upload the File or not in write formate')
# Error function
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
# Data Transformation=========================================================================================================================
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0.001,1))
full_data_minmax = scaler.fit_transform(np.array(training_data).reshape(-1,1))
#st.write(train_data_minmax)
#st.write(train_data.index)
full_data_minmax = pd.DataFrame(full_data_minmax, columns = ['close'])
full_data_minmax.index = training_data.index
#st.write(full_data_minmax)
# Spliting the DataSet into Train and Test
train_data = training_data[:int(len(final_data)*0.8)]
test_data = training_data[int(len(final_data)*0.8):]
train_data_minmax = scaler.fit_transform(np.array(train_data).reshape(-1,1))
train_data_minmax = pd.DataFrame(train_data_minmax, columns = ['close'])
train_data_minmax.index = train_data.index
#Model Building===============================================================================================================================
from sktime.forecasting.compose import AutoEnsembleForecaster
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.fbprophet import Prophet
import holidays
import random
# Prophet Model-------------------------------------------------------------------------------------------------------------------------------
# Holiday
holiday = pd.DataFrame([])
for date, name in sorted(holidays.India(years=[2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022]).items()):
holiday = holiday.append(pd.DataFrame({'ds': date, 'holiday': "India_Holidays"}, index=[0]), ignore_index=True)
holiday['ds'] = pd.to_datetime(holiday['ds'], format='%Y-%m-%d', errors='ignore')
# HyperParameter Tunning
if st.session_state['stock_present']!= st.session_state['stock_past']:
st.write('**Hypreparameter Tunning is Done only once for every stock, So be patient**')
st.write('**Hypreparameter Tunning is Started for Prophet Model**')
from sklearn.model_selection import ParameterGrid
params_grid = {'changepoint_prior_scale':[10,25],
'n_changepoints' : [10,25],
'seasonality_prior_scale':[0.05,1]}
Pro_model_parameters = pd.DataFrame(columns = ['Parameters','MSE','RMSE'])
grid = ParameterGrid(params_grid)
Pro_bar = st.progress(0)
i = 1
for p in grid:
test = pd.DataFrame()
# print(i,' ',p)
random.seed(0)
train_model =Prophet(freq='B',
changepoint_prior_scale = p['changepoint_prior_scale'],
n_changepoints = p['n_changepoints'],
seasonality_mode = 'multiplicative',
seasonality_prior_scale=p['seasonality_prior_scale'],
weekly_seasonality=False,
daily_seasonality = False,
yearly_seasonality = True,
add_country_holidays={'country_name': 'India'},
holidays=holiday)
train_model.fit(train_data_minmax)
fh = list(range(1,int(len(test_data))+1))
test_predictions = train_model.predict(fh=fh)
test_predictions=scaler.inverse_transform(test_predictions)
mse = mean_squared_error(test_data, test_predictions)
rmse = np.sqrt(mse)
Pro_bar.progress(i/len(grid))
i = i+1
#print('Root Mean Squre Error(RMSE)------------------------------------',rmse)
Pro_model_parameters = Pro_model_parameters.append({'Parameters':p, 'MSE':mse, 'RMSE':rmse},ignore_index=True)
Pro_parameters = Pro_model_parameters.sort_values(by=['RMSE'])
Pro_parameters = Pro_parameters.reset_index(drop=True)
#st.write(Pro_parameters)
st.write('**Hypreparameter Tunning is Done for Prophet Model**')
if 'changepoint_prior_scale' not in st.session_state:
st.session_state['changepoint_prior_scale'] = Pro_parameters['Parameters'][0]['changepoint_prior_scale']
else:
pass
st.session_state['changepoint_prior_scale'] = Pro_parameters['Parameters'][0]['changepoint_prior_scale']
if 'n_changepoints' not in st.session_state:
st.session_state['n_changepoints'] = Pro_parameters['Parameters'][0]['n_changepoints']
else:
pass
st.session_state['n_changepoints'] = Pro_parameters['Parameters'][0]['n_changepoints']
if 'seasonality_prior_scale' not in st.session_state:
st.session_state['seasonality_prior_scale'] = Pro_parameters['Parameters'][0]['seasonality_prior_scale']
else:
pass
st.session_state['seasonality_prior_scale'] = Pro_parameters['Parameters'][0]['seasonality_prior_scale']
else:
pass
Pro_model = Prophet(freq='B', seasonality_mode='multiplicative',
changepoint_prior_scale=st.session_state['changepoint_prior_scale'],
n_changepoints=st.session_state['n_changepoints'],
seasonality_prior_scale=st.session_state['seasonality_prior_scale'],
add_country_holidays={'country_name': 'India'}, verbose=10,
holidays=holiday,
yearly_seasonality=True, weekly_seasonality=False , daily_seasonality=False)
#Pro_model.fit(train_data_minmax)
#fh = list(range(1,int(len(test_data_minmax))+1))
# fh1 = pd.DatetimeIndex(np.array(test_data.index))
# fh1
#test_predictions_minmax = Pro_model.predict(fh=fh)
#st.write(test_predictions_minmax)
#test_predictions=scaler.inverse_transform(test_predictions_minmax)
#test_predictions = pd.DataFrame(test_predictions, columns = ['Close'])
#test_predictions.index = test_data.index
#st.write(test_predictions)
# Exponential Smoothing Model-----------------------------------------------------------------------------------------------------------------
# HyperParameter Tunning
if st.session_state['stock_present']!= st.session_state['stock_past']:
st.write('**Hypreparameter Tunning is Started for Exponential Smoothing Model**')
from sklearn.model_selection import ParameterGrid
params_grid = {'trend':["add", "mul"],
'seasonal' : ["add", "mul"]
}
Expo_model_parameters = pd.DataFrame(columns = ['Parameters','MSE','RMSE'])
grid = ParameterGrid(params_grid)
Expo_bar = st.progress(0)
i = 1
for p in grid:
test = pd.DataFrame()
# print(i,' ',p)
random.seed(0)
train_model = ExponentialSmoothing(trend=p['trend'],
seasonal=p['seasonal'],
sp=262,
damped_trend=False)
train_model.fit(train_data_minmax)
fh = list(range(1,int(len(test_data))+1))
test_predictions = train_model.predict(fh=fh)
test_predictions=scaler.inverse_transform(test_predictions)
mse = mean_squared_error(test_data, test_predictions)
rmse = np.sqrt(mse)
Expo_bar.progress(i/len(grid))
i = i+1
# print('Root Mean Squre Error(RMSE)------------------------------------',rmse)
Expo_model_parameters = Expo_model_parameters.append({'Parameters':p, 'MSE':mse, 'RMSE':rmse},ignore_index=True)
Expo_parameters = Expo_model_parameters.sort_values(by=['RMSE'])
Expo_parameters = Expo_parameters.reset_index(drop=True)
#st.write(Expo_parameters)
st.write('**Hypreparameter Tunning is Done for Exponential Smoothing Model**')
if 'trend' not in st.session_state:
st.session_state['trend'] = Expo_parameters['Parameters'][0]['trend']
else:
pass
st.session_state['trend'] = Expo_parameters['Parameters'][0]['trend']
if 'seasonal' not in st.session_state:
st.session_state['seasonal'] = Expo_parameters['Parameters'][0]['seasonal']
else:
pass
st.session_state['seasonal'] = Expo_parameters['Parameters'][0]['seasonal']
else:
pass
Expo_model = ExponentialSmoothing(trend=st.session_state['trend'],
seasonal=st.session_state['seasonal'],
sp=262,
damped_trend=False)
#Expo_model.fit(train_data_minmax)
#fh = list(range(1,int(len(test_data_minmax))+1))
# fh1 = pd.DatetimeIndex(np.array(test_data.index))
# fh1
#test_predictions_minmax = Expo_model.predict(fh=fh)
#st.write(test_predictions_minmax)
#test_predictions=scaler.inverse_transform(test_predictions_minmax)
#test_predictions = pd.DataFrame(test_predictions, columns = ['Close'])
#test_predictions.index = test_data.index
#st.write(test_predictions)
# AutoEnsembleForecaster Model----------------------------------------------------------------------------------------------------------------
st.subheader('Model Building')
st.write('**Validating the final model**')
forecasters = [
("prophet" , Pro_model),
("expo" , Expo_model)
]
Ensmodel = AutoEnsembleForecaster(forecasters=forecasters, n_jobs=-1, random_state=42)
Ensmodel.fit(train_data_minmax)
fh = list(range(1,int(len(test_data))+1))
# fh1 = pd.DatetimeIndex(np.array(test_data.index))
# fh1
test_predictionsEns = Ensmodel.predict(fh=fh)
#st.write(test_predictionsEns)
test_predictions=scaler.inverse_transform(test_predictionsEns)
test_predictions = pd.DataFrame(test_predictions, columns = ['Close'])
test_predictions.index = test_data.index
#st.write(test_predictions)
mse = mean_squared_error(test_data, test_predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(test_data, test_predictions)
mape = np.mean(np.abs((test_data-test_predictions)/test_data))*100
errors = {'MSE':mse, 'RMSE':rmse, 'MAE':mae, 'MAPE':mape}
errors_df = pd.DataFrame(errors)
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_data.index, y=train_data['Close'], mode='lines', name='TRAIN'))
fig.add_trace(go.Scatter(x=test_data.index, y=test_data['Close'], mode='lines', name='TEST'))
fig.add_trace(go.Scatter(x=test_predictions.index, y=test_predictions['Close'], mode='lines', name='PREDICTION'))
fig.update_layout(title_text='Forecast vs Actuals', title_x=0.5)
st.plotly_chart(fig)
st.write(errors_df)
st.write('**If you are satisfied with the Validation then Start the Forecast Or Reset the Session State and re-run the app for Hyperparameter Tunning**')
numdays = st.number_input("Number of Day's you want to Forecaste",
min_value = 1,
max_value = n*3,
value = n)
if 'fr' not in st.session_state:
st.session_state['fr'] = 0
if st.session_state['stock_present']!= st.session_state['stock_past']:
st.session_state['stock_past'] = st.session_state['stock_present']
st.session_state['fr'] = 0
frct = st.selectbox('Start the Forecast', options = ['No', 'Yes'],
index = st.session_state['fr'])
if frct == 'Yes':
st.subheader('Forecasting')
st.session_state['fr'] = 1
forecasters = [
("prophet" , Pro_model),
("expo" , Expo_model)
]
st.write('Training the model')
Ensmodel = AutoEnsembleForecaster(forecasters=forecasters, n_jobs=-1, random_state=42)
Ensmodel.fit(full_data_minmax)
st.write('Forecasting from trained model')
prediction_list = [(pd.to_datetime(endDate) + dttm.timedelta(days=x)).date() for x in range(0,numdays)]
prediction_list = pd.to_datetime(prediction_list)
forecaste = pd.DataFrame(prediction_list, columns=['Date'])
#st.write(forecaste)
for_df = forecaste.set_index('Date')
#st.write(for_df)
for_df = for_df.asfreq('B')
#n = int(len(for_df.index))
#st.write(n)
#fh = list(range(1,n+1))
fh1 = pd.DatetimeIndex(np.array(for_df.index))
# fh1
final_predictions = Ensmodel.predict(fh=fh1)
#st.write(final_predictions)
final_predictions=scaler.inverse_transform(final_predictions)
#st.write(final_predictions)
for_df['Close'] = final_predictions
st.markdown('### Forecast DataSet')
st.write(for_df)
fig = go.Figure()
fig.add_trace(go.Scatter(x=training_data.index, y=training_data['Close'], mode='lines', name='TRAIN'))
fig.add_trace(go.Scatter(x=for_df.index, y=for_df['Close'], mode='lines', name='Forecast'))
fig.update_layout(title_text='Final Forecast', title_x=0.5)
st.write(fig)
elif frct == 'No':
st.session_state['fr'] = 0
else:
pass
if st.sidebar.button('Made By'):
name = ['Ayush Patidar', 'Aditya Rao', 'Farzan Nawaz',
'Nikhil Hosamani', 'Lakshmi Supriya', 'Bhavitha Mitte', 'Aadarsh Asthana']
gmail = ['[email protected]', '[email protected]', '[email protected]',
'[email protected]', '[email protected]', '[email protected]',
'[email protected]']
dt = {'Name':name, 'Contact Detail': gmail}
made = pd.DataFrame(dt)
st.write(made)
else:
pass