|
|
|
"""DriverPosPredictionFinal |
|
|
|
Automatically generated by Colab. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1DGTfO4HEZDof1phuficJD_J8v38JnF3C |
|
""" |
|
|
|
from flask import Flask,jsonify |
|
import json |
|
app = Flask(__name__) |
|
|
|
|
|
from sklearn import tree, linear_model |
|
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor |
|
from sklearn.model_selection import train_test_split, GridSearchCV |
|
!pip install bayesian-optimization |
|
from bayes_opt import BayesianOptimization |
|
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_squared_log_error, median_absolute_error |
|
import pandas as pd |
|
import joblib |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
import numpy as np |
|
import xgboost as xgb |
|
import pickle |
|
|
|
def read_data(file_name): |
|
df = pd.read_csv(file_name) |
|
x= df.drop(['RaceID', "Pos", "formating" ], axis=1) |
|
y =df["Pos"] |
|
return x,y |
|
|
|
|
|
def decision_tree_regressor_method(): |
|
dtree = DecisionTreeRegressor(random_state=42) |
|
return dtree |
|
def decsison_tree_classifier_method(X_train,y_train): |
|
dtree = DecisionTreeClassifier() |
|
dtree = dtree.fit(X_train,y_train) |
|
return dtree |
|
def linear_reg(X_train,y_train): |
|
regr= linear_model.LinearRegression() |
|
regr.fit(X_train,y_train) |
|
return regr |
|
def hyper_paramter_tuning(model, xtrain,ytrain,**kwargs): |
|
|
|
|
|
grid_search_object = GridSearchCV(estimator=model, param_grid=kwargs, cv=3, n_jobs=-1, verbose=2, scoring='neg_mean_squared_error') |
|
grid_search_object.fit(xtrain,ytrain) |
|
best_estimator = grid_search_object.best_estimator_ |
|
return best_estimator |
|
|
|
|
|
def predict(model, xtest,ytest): |
|
|
|
y_pred = model.predict(xtest) |
|
|
|
mse = mean_squared_error(ytest, y_pred) |
|
rmse = mse ** 0.5 |
|
mae = mean_absolute_error(ytest, y_pred) |
|
r2 = r2_score(ytest, y_pred) |
|
msle = mean_squared_log_error(ytest, y_pred) |
|
medae = median_absolute_error(ytest, y_pred) |
|
|
|
|
|
print(f"Test MSE: {mse}") |
|
print(f"Test RMSE: {rmse}") |
|
print(f"Test MAE: {mae}") |
|
print(f"Test R²: {r2}") |
|
print(f"Test MSLE: {msle}") |
|
print(f"Test Median Absolute Error: {medae}") |
|
|
|
return y_pred |
|
|
|
|
|
|
|
def hyperamter_tuning_paramter_grid(): |
|
parameter_grid = {'max_depth': [1,2,3,4,5,None], |
|
"min_samples_split":[2,3,5,6] , |
|
'min_samples_leaf': [1,2,4,5], |
|
"min_weight_fraction_leaf": [0.0,0.01, 0.05,0.1, 0.2], |
|
} |
|
return parameter_grid |
|
|
|
|
|
def prediction(req): |
|
data = json.loads(req) |
|
input_data = {} |
|
input_data['LapNumber'] = float(data['lapNumber']) |
|
input_data['LapTimes'] = float(data['LapTimes']) |
|
input_data['PitStopTimes'] = float(data['PitStopTimes']) |
|
input_data['PrevLap'] = float(data['PrevLap']) |
|
input_data['AvgSpeed'] = float(data['AvgSpeed']) |
|
input_data['AirTemp_Cel'] = float(data['AirTemp_Cel']) |
|
input_data['TrackTemp_Cel'] = float(data['TrackTemp_Cel']) |
|
input_data['Humidity'] = float(data['Humidity']) |
|
input_data['WindSpeed_km'] = float(data['WindSpeed_km']) |
|
print(input_data) |
|
return jsonify({'message':'Hello world'}) |
|
|
|
|
|
def ask_user(model, X_test=None): |
|
""" |
|
Collect user input and format it for prediction. |
|
Returns a DataFrame with a single row formatted like the training data. |
|
""" |
|
try: |
|
|
|
input_data = {} |
|
|
|
|
|
input_data['LapNumber'] = float(input("Enter the number of total laps in race: ")) |
|
input_data['LapTimes'] = float(input("Avg lap time from prev year (or estimate 98): ")) |
|
input_data['PitStopTimes'] = float(input("Enter pit stop time (0 if no pit stop): ")) |
|
input_data['PrevLap'] = float(input("Enter avg lap time differece (0-2) (0 if first lap): ")) |
|
|
|
|
|
input_data['AvgSpeed'] = float(input("Enter average speed in km/h from prev year: ")) |
|
input_data['AirTemp_Cel'] = float(input("estimate air temperature in Celsius on race day: ")) |
|
input_data['TrackTemp_Cel'] = float(input("estimate track temperature in Celsius on race day: ")) |
|
input_data['Humidity'] = float(input("estimate the humidity percentage (0-100) on race day: ")) |
|
input_data['WindSpeed_km/h'] = float(input("estimate wind speed in km/h: ")) |
|
|
|
|
|
input_df = pd.DataFrame([input_data]) |
|
|
|
|
|
if X_test is not None: |
|
missing_cols = set(X_test.columns) - set(input_df.columns) |
|
|
|
for col in missing_cols: |
|
input_df[col] = 0 |
|
|
|
input_df = input_df[X_test.columns] |
|
|
|
|
|
prediction = model.predict(input_df) |
|
print(f"\nPredicted position: {int(round(prediction[0]))}") |
|
|
|
return prediction[0] |
|
|
|
except ValueError as e: |
|
print(f"Error: Please enter valid numeric values. Details: {e}") |
|
return None |
|
except Exception as e: |
|
print(f"An error occurred: {e}") |
|
return None |
|
|
|
|
|
|
|
|
|
|
|
|
|
x , y= read_data("b.csv") |
|
xtrain, xtest , ytrain, ytest = train_test_split(x,y,test_size = 0.3) |
|
|
|
model_1 = linear_reg(xtrain,ytrain) |
|
|
|
ypred = predict(model_1,xtest,ytest) |
|
|
|
|
|
|
|
x , y= read_data("b.csv") |
|
xtrain, xtest , ytrain, ytest = train_test_split(x,y,test_size = 0.3) |
|
|
|
model_2 = decsison_tree_classifier_method(xtrain,ytrain) |
|
|
|
parameter_grid= hyperamter_tuning_paramter_grid() |
|
tuned_model_2 = hyper_paramter_tuning(model_2,xtrain,ytrain,**parameter_grid) |
|
y_pred = predict(tuned_model_2,xtest,ytest) |
|
|
|
|
|
|
|
|
|
x , y= read_data("b.csv") |
|
xtrain, xtest , ytrain, ytest = train_test_split(x,y,test_size = 0.3) |
|
|
|
model_3 = decision_tree_regressor_method() |
|
|
|
parameter_grid= hyperamter_tuning_paramter_grid() |
|
tuned_model_3 = hyper_paramter_tuning(model_3,xtrain,ytrain,**parameter_grid) |
|
y_pred= predict(tuned_model_3,xtest,ytest) |
|
|
|
|
|
with open('model.pkl', 'wb') as file: |
|
pickle.dump(tuned_model_3, file) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|