# -*- coding: utf-8 -*- """DriverPosPredictionFinal Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1DGTfO4HEZDof1phuficJD_J8v38JnF3C """ from flask import Flask,jsonify import json app = Flask(__name__) from sklearn import tree, linear_model from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.model_selection import train_test_split, GridSearchCV !pip install bayesian-optimization from bayes_opt import BayesianOptimization from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_squared_log_error, median_absolute_error import pandas as pd import joblib import matplotlib.pyplot as plt import seaborn as sns import numpy as np import xgboost as xgb import pickle def read_data(file_name): df = pd.read_csv(file_name) x= df.drop(['RaceID', "Pos", "formating" ], axis=1) y =df["Pos"] return x,y #returns the object for model being used def decision_tree_regressor_method(): dtree = DecisionTreeRegressor(random_state=42) return dtree def decsison_tree_classifier_method(X_train,y_train): dtree = DecisionTreeClassifier() dtree = dtree.fit(X_train,y_train) return dtree def linear_reg(X_train,y_train): regr= linear_model.LinearRegression() regr.fit(X_train,y_train) return regr def hyper_paramter_tuning(model, xtrain,ytrain,**kwargs): #doesnt work for linear regression #dtree = DecisionTreeRegressor(random_state=42) # grid_search_object = GridSearchCV(estimator=model, param_grid=kwargs, cv=5, n_jobs=-1, verbose=2, scoring='neg_mean_squared_error') grid_search_object = GridSearchCV(estimator=model, param_grid=kwargs, cv=3, n_jobs=-1, verbose=2, scoring='neg_mean_squared_error') grid_search_object.fit(xtrain,ytrain) best_estimator = grid_search_object.best_estimator_ return best_estimator def predict(model, xtest,ytest): # Predict the target for the test set y_pred = model.predict(xtest) # Calculate various evaluation metrics mse = mean_squared_error(ytest, y_pred) rmse = mse ** 0.5 mae = mean_absolute_error(ytest, y_pred) r2 = r2_score(ytest, y_pred) msle = mean_squared_log_error(ytest, y_pred) medae = median_absolute_error(ytest, y_pred) # Print the metrics print(f"Test MSE: {mse}") # smaller is better - mean squared error print(f"Test RMSE: {rmse}")# print(f"Test MAE: {mae}") print(f"Test R²: {r2}") print(f"Test MSLE: {msle}") print(f"Test Median Absolute Error: {medae}") #rmse measures abg magnitude of errors betwen actual and predicted values return y_pred def hyperamter_tuning_paramter_grid(): parameter_grid = {'max_depth': [1,2,3,4,5,None], "min_samples_split":[2,3,5,6] , 'min_samples_leaf': [1,2,4,5], "min_weight_fraction_leaf": [0.0,0.01, 0.05,0.1, 0.2],# add more hypermater tuning criteria }#'criterion': ['gini', 'entropy'] return parameter_grid #@app.route('/predict',methods=['POST']) # api endpoint -any url(requests) with /predict will route to the method def prediction(req): data = json.loads(req) input_data = {} input_data['LapNumber'] = float(data['lapNumber']) input_data['LapTimes'] = float(data['LapTimes']) input_data['PitStopTimes'] = float(data['PitStopTimes']) input_data['PrevLap'] = float(data['PrevLap']) input_data['AvgSpeed'] = float(data['AvgSpeed']) input_data['AirTemp_Cel'] = float(data['AirTemp_Cel']) input_data['TrackTemp_Cel'] = float(data['TrackTemp_Cel']) input_data['Humidity'] = float(data['Humidity']) input_data['WindSpeed_km'] = float(data['WindSpeed_km']) print(input_data) return jsonify({'message':'Hello world'}) def ask_user(model, X_test=None): """ Collect user input and format it for prediction. Returns a DataFrame with a single row formatted like the training data. """ try: # Create a dictionary to store inputs input_data = {} # Collect basic race information input_data['LapNumber'] = float(input("Enter the number of total laps in race: ")) input_data['LapTimes'] = float(input("Avg lap time from prev year (or estimate 98): ")) input_data['PitStopTimes'] = float(input("Enter pit stop time (0 if no pit stop): ")) input_data['PrevLap'] = float(input("Enter avg lap time differece (0-2) (0 if first lap): ")) # Collect race conditions input_data['AvgSpeed'] = float(input("Enter average speed in km/h from prev year: ")) input_data['AirTemp_Cel'] = float(input("estimate air temperature in Celsius on race day: ")) input_data['TrackTemp_Cel'] = float(input("estimate track temperature in Celsius on race day: ")) input_data['Humidity'] = float(input("estimate the humidity percentage (0-100) on race day: ")) input_data['WindSpeed_km/h'] = float(input("estimate wind speed in km/h: ")) # Create a DataFrame with the input data input_df = pd.DataFrame([input_data]) # If we have a test set, ensure our columns match the training data if X_test is not None: missing_cols = set(X_test.columns) - set(input_df.columns) # Add any missing columns with 0s for col in missing_cols: input_df[col] = 0 # Ensure column order matches training data input_df = input_df[X_test.columns] # Make prediction prediction = model.predict(input_df) print(f"\nPredicted position: {int(round(prediction[0]))}") return prediction[0] except ValueError as e: print(f"Error: Please enter valid numeric values. Details: {e}") return None except Exception as e: print(f"An error occurred: {e}") return None #-----linear regresssion------------ x , y= read_data("b.csv") xtrain, xtest , ytrain, ytest = train_test_split(x,y,test_size = 0.3) model_1 = linear_reg(xtrain,ytrain) ypred = predict(model_1,xtest,ytest) #visvualize(ytest,ypred) #-----descision tree classifier----- x , y= read_data("b.csv") xtrain, xtest , ytrain, ytest = train_test_split(x,y,test_size = 0.3) model_2 = decsison_tree_classifier_method(xtrain,ytrain) parameter_grid= hyperamter_tuning_paramter_grid() tuned_model_2 = hyper_paramter_tuning(model_2,xtrain,ytrain,**parameter_grid) y_pred = predict(tuned_model_2,xtest,ytest) #visvualize(ytest,ypred) #----descision tree regressor------- x , y= read_data("b.csv") xtrain, xtest , ytrain, ytest = train_test_split(x,y,test_size = 0.3) model_3 = decision_tree_regressor_method() parameter_grid= hyperamter_tuning_paramter_grid() tuned_model_3 = hyper_paramter_tuning(model_3,xtrain,ytrain,**parameter_grid) y_pred= predict(tuned_model_3,xtest,ytest) with open('model.pkl', 'wb') as file: pickle.dump(tuned_model_3, file) # Ask user for input and predict position #predicted_position = ask_user(tuned_model_3, x_columns) #predicted_position = ask_user(tuned_model_3, xtest) #pickle.dump(tuned_model_3.open('model.pkl','mb'))