Spaces:

affine
/

Time_Series_Model

Runtime error

App Files Files Community

Time_Series_Model / src /prediction.py

Sanket45

upload python support file

75ae889 over 1 year ago

raw

history blame

7.14 kB

	from sklearn.metrics import mean_absolute_error,mean_squared_error
	import numpy as np
	import pandas as pd

	def create_week_date_featues(df,date_column):

	df['Month'] = pd.to_datetime(df[date_column]).dt.month

	df['Day'] = pd.to_datetime(df[date_column]).dt.day

	df['Dayofweek'] = pd.to_datetime(df[date_column]).dt.dayofweek

	df['DayOfyear'] = pd.to_datetime(df[date_column]).dt.dayofyear

	df['Week'] = pd.to_datetime(df[date_column]).dt.week

	df['Quarter'] = pd.to_datetime(df[date_column]).dt.quarter

	df['Is_month_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_month_start,0,1)

	df['Is_month_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_month_end,0,1)

	df['Is_quarter_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_quarter_start,0,1)

	df['Is_quarter_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_quarter_end,0,1)

	df['Is_year_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_year_start,0,1)

	df['Is_year_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_year_end,0,1)

	df['Semester'] = np.where(df[date_column].isin([1,2]),1,2)

	df['Is_weekend'] = np.where(df[date_column].isin([5,6]),1,0)

	df['Is_weekday'] = np.where(df[date_column].isin([0,1,2,3,4]),1,0)

	df['Days_in_month'] = pd.to_datetime(df[date_column]).dt.days_in_month

	return df

	def val_prediction(validation,model:object,train_dataset:pd.DataFrame(),store_id:str='1',item_id:str='1'):
	predictions = model.predict(validation.filter(lambda x: (x.store ==store_id) & (x.item ==item_id)),
	return_y=True,
	return_x=True,
	trainer_kwargs=dict(accelerator="cpu"))

	filter_train=train_dataset.loc[(train_dataset['store']==store_id) & (train_dataset['item']==item_id)].reset_index(drop=True)
	# print(filter_train)
	training_results=filter_train.iloc[-30:,:]
	y=[float(i) for i in predictions.output[0]]
	y_true=[float(i) for i in predictions.y[0][0]]
	x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
	training_results['prediction']=y
	training_results['y_true']=y_true
	training_results['x']=x
	rmse=np.around(np.sqrt(mean_squared_error(training_results['Lead_1'],y)),2)
	mae=np.around(mean_absolute_error(training_results['Lead_1'],y),2)
	print(f" VAL DATA = Item ID : {item_id} :: MAE : {mae} :: RMSE : {rmse}")
	return training_results

	def test_prediction(model:object,train_dataset,test_dataset,earliest_time,max_encoder_length=120,store_id:str='1',item_id:str='1'):
	#encoder data is the last lookback window: we get the last 1 week (168 datapoints) for all 5 consumers = 840 total datapoints
	encoder_data = train_dataset[lambda x: x.days_from_start > x.days_from_start.max() - max_encoder_length]
	last_data = train_dataset[lambda x: x.days_from_start == x.days_from_start.max()]
	# decoder_data = pd.concat(
	# [last_data.assign(date=lambda x: x.date + pd.offsets.DateOffset(i)) for i in range(1, 30 + 1)],
	# ignore_index=True,
	# )

	# decoder_data["hours_from_start"] = (decoder_data["date"] - earliest_time).dt.seconds / 60 / 60 + (decoder_data["date"] - earliest_time).dt.days * 24
	# decoder_data['hours_from_start'] = decoder_data['hours_from_start'].astype('int')
	# decoder_data["hours_from_start"] += encoder_data["hours_from_start"].max() + 1 - decoder_data["hours_from_start"].min()
	# # add time index consistent with "data"
	# decoder_data["days_from_start"] = (decoder_data["date"] - earliest_time).apply(lambda x:x.days)
	# decoder_data=create_week_date_featues(decoder_data,'date')
	decoder_data=test_dataset.copy()

	new_prediction_data = pd.concat([encoder_data, decoder_data], ignore_index=True)
	filter_test=new_prediction_data.loc[(new_prediction_data['store']==store_id) & (new_prediction_data['item']==item_id)]
	predictions = model.predict(filter_test,
	return_y=True,
	return_x=True,
	trainer_kwargs=dict(accelerator="cpu"))

	# print(filter_test)
	testing_results=test_dataset.loc[(test_dataset['store']=='1') & (test_dataset['item']==item_id)]
	y=[float(i) for i in predictions.output[0]]
	y_true=[float(i) for i in predictions.y[0][0]]
	x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
	testing_results['prediction']=y
	testing_results['y_true']=y_true
	testing_results['x']=x
	return testing_results



	#-------------------------------------------------------------

	def val_pred(model:object,train_dataset,validation,consumer_id:str='MT_001'):
	predictions = model.predict(validation.filter(lambda x: (x.consumer_id ==consumer_id)),
	return_y=True,
	return_x=True,
	trainer_kwargs=dict(accelerator="cpu"))

	filter_train=train_dataset.loc[(train_dataset['consumer_id']==consumer_id)].reset_index(drop=True)

	# print(filter_train)
	# filter validation data
	val_results=filter_train.iloc[-24:,:]

	# prediction
	y=[float(i) for i in predictions.output[0]]
	# actual
	y_true=[float(i) for i in predictions.y[0][0]]
	# time idx
	x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
	# update into the validation results
	val_results['prediction']=y
	val_results['y_true']=y_true
	val_results['x']=x
	# RMSE & MAE for validation data
	rmse=np.around(np.sqrt(mean_squared_error(val_results['Lead_1'],y)),2)
	mae=np.around(mean_absolute_error(val_results['Lead_1'],y),2)

	print(f" VAL DATA = Consumer ID : {consumer_id} :: MAE : {mae} :: RMSE : {rmse}")
	return val_results

	def test_pred(model:object,train_dataset,test_dataset,consumer_id:str='MT_001',max_encoder_length:int=168):
	encoder_data = train_dataset[lambda x: x.hours_from_start > x.hours_from_start.max() - max_encoder_length]
	last_data = train_dataset[lambda x: x.hours_from_start == x.hours_from_start.max()]

	decoder_data=test_dataset.copy()

	new_prediction_data = pd.concat([encoder_data, decoder_data], ignore_index=True)

	filter_train=new_prediction_data.loc[ (new_prediction_data['consumer_id']==consumer_id)]
	predictions = model.predict(filter_train,
	return_y=True,
	return_x=True,
	trainer_kwargs=dict(accelerator="cpu"))

	# print(filter_train)
	testing_results=test_dataset.loc[(test_dataset['consumer_id']==consumer_id)]

	y=[float(i) for i in predictions.output[0]]
	y_true=[float(i) for i in predictions.y[0][0]]
	x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]

	testing_results['prediction']=y
	testing_results['y_true']=y_true
	testing_results['x']=x

	rmse=np.around(np.sqrt(mean_squared_error(testing_results['Lead_1'],y)),2)
	mae=np.around(mean_absolute_error(testing_results['Lead_1'],y),2)
	print(f"TEST DATA = Consumer ID : {consumer_id} :: MAE : {mae} :: RMSE : {rmse}")
	return testing_results