File size: 7,471 Bytes
bb1009c
 
 
 
 
 
 
 
 
bdeed57
 
 
bb1009c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
871b4a2
 
 
 
 
 
 
 
 
 
 
 
 
 
bb1009c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c9d32ea
bdeed57
 
 
871b4a2
 
 
 
 
 
 
 
 
 
 
 
 
 
bdeed57
bb1009c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
871b4a2
 
bb1009c
871b4a2
 
 
bb1009c
871b4a2
 
 
 
bb1009c
 
871b4a2
 
bb1009c
d333575
871b4a2
bb1009c
871b4a2
 
 
bb1009c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# -*- coding: utf-8 -*-
"""DriverPosPredictionFinal

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1DGTfO4HEZDof1phuficJD_J8v38JnF3C
"""

from flask import Flask,jsonify
import json
app = Flask(__name__)


from sklearn import tree, linear_model
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_squared_log_error, median_absolute_error
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import xgboost as xgb
import pickle

def read_data(file_name):
    df = pd.read_csv(file_name)

    # Convert 'LapTime' column to numeric (removing 'days' and 'seconds' part)
    df['LapTime'] = pd.to_timedelta(df['LapTime']).dt.total_seconds()

    # Encoding categorical columns (like 'Compound' and 'Compound2')
    label_encoder = LabelEncoder()
    df['Compound'] = label_encoder.fit_transform(df['Compound'])
    df['Compound2'] = label_encoder.fit_transform(df['Compound2'])

    # Drop non-predictive columns and set target variable
    X = df.drop(['Race', 'Driver', 'Position', 'Rainfall', 'RainFall2'], axis=1)
    y = df['Position']
    return X, y

#returns the object for model being used
def decision_tree_regressor_method():
  dtree = DecisionTreeRegressor(random_state=42)
  return dtree
def decsison_tree_classifier_method(X_train,y_train):
  dtree = DecisionTreeClassifier()
  dtree = dtree.fit(X_train,y_train)
  return dtree
def linear_reg(X_train,y_train):
  regr= linear_model.LinearRegression()
  regr.fit(X_train,y_train)
  return regr
def hyper_paramter_tuning(model, xtrain,ytrain,**kwargs): #doesnt work for linear regression
  #dtree = DecisionTreeRegressor(random_state=42)
#  grid_search_object = GridSearchCV(estimator=model, param_grid=kwargs, cv=5, n_jobs=-1, verbose=2, scoring='neg_mean_squared_error')
  grid_search_object = GridSearchCV(estimator=model, param_grid=kwargs, cv=3, n_jobs=-1, verbose=2, scoring='neg_mean_squared_error')
  grid_search_object.fit(xtrain,ytrain)
  best_estimator = grid_search_object.best_estimator_
  return best_estimator


def predict(model, xtest,ytest):
    # Predict the target for the test set
    y_pred = model.predict(xtest)
    # Calculate various evaluation metrics
    mse = mean_squared_error(ytest, y_pred)
    rmse = mse ** 0.5
    mae = mean_absolute_error(ytest, y_pred)
    r2 = r2_score(ytest, y_pred)
    msle = mean_squared_log_error(ytest, y_pred)
    medae = median_absolute_error(ytest, y_pred)
    # Print the metrics

    print(f"Test MSE: {mse}") # smaller is better - mean squared error
    print(f"Test RMSE: {rmse}")#
    print(f"Test MAE: {mae}")
    print(f"Test R²: {r2}")
    print(f"Test MSLE: {msle}")
    print(f"Test Median Absolute Error: {medae}")
    #rmse measures abg magnitude of errors betwen actual and predicted values
    return y_pred



def hyperamter_tuning_paramter_grid():
  parameter_grid = {'max_depth': [1,2,3,4,5,None],
     "min_samples_split":[2,3,5,6] ,
     'min_samples_leaf': [1,2,4,5],
     "min_weight_fraction_leaf": [0.0,0.01, 0.05,0.1, 0.2],# add more hypermater tuning criteria
    }#'criterion': ['gini', 'entropy']
  return parameter_grid

#@app.route('/predict',methods=['POST']) # api endpoint -any url(requests) with /predict will route to the method 
def prediction(req):
  data = json.loads(req)
  input_data = {}

  input_data['LapNumber'] = float(input("Enter the number of total laps in race: "))
  input_data['LapTime'] = float(input("Avg lap time from prev year (in seconds): "))
  input_data['TyreLife'] = float(input("Enter tyre life (in laps): "))
  input_data['Speedl1'] = float(input("Enter Speedl1 (in km/h): "))
  input_data['Speedl2'] = float(input("Enter Speedl2 (in km/h): "))
  input_data['SpeedFL'] = float(input("Enter SpeedFL (in km/h): "))
  input_data['SpeedST'] = float(input("Enter SpeedST (in km/h): "))
  input_data['AirTemp'] = float(input("Enter air temperature in Celsius: "))
  input_data['TrackTemp'] = float(input("Enter track temperature in Celsius: "))
  input_data['Humidity'] = float(input("Enter humidity percentage (0-100): "))
  input_data['WindSpeed'] = float(input("Enter wind speed in km/h: "))
  input_data['Compound'] = float(input("Enter compound (numeric): "))
  input_data['Compound2'] = float(input("Enter compound2 (numeric): "))
  return jsonify({'message':'Hello world'})


def ask_user(model, X_test=None):
    """
    Collect user input and format it for prediction.
    Returns a DataFrame with a single row formatted like the training data.
    """
    try:
        # Create a dictionary to store inputs
        input_data = {}

        # Collect basic race information
        input_data['LapNumber'] = float(input("Enter the number of total laps in race: "))
        input_data['LapTimes'] = float(input("Avg lap time from prev year (or estimate 98): "))
        input_data['PitStopTimes'] = float(input("Enter pit stop time (0 if no pit stop): "))
        input_data['PrevLap'] = float(input("Enter avg lap time differece (0-2) (0 if first lap): "))

        # Collect race conditions
        input_data['AvgSpeed'] = float(input("Enter average speed in km/h from prev year: "))
        input_data['AirTemp_Cel'] = float(input("estimate air temperature in Celsius on race day: "))
        input_data['TrackTemp_Cel'] = float(input("estimate track temperature in Celsius on race day: "))
        input_data['Humidity'] = float(input("estimate the humidity percentage (0-100) on race day: "))
        input_data['WindSpeed_km/h'] = float(input("estimate wind speed in km/h: "))

        # Create a DataFrame with the input data
        input_df = pd.DataFrame([input_data])

        # If we have a test set, ensure our columns match the training data
        if X_test is not None:
            missing_cols = set(X_test.columns) - set(input_df.columns)
            # Add any missing columns with 0s
            for col in missing_cols:
                input_df[col] = 0
            # Ensure column order matches training data
            input_df = input_df[X_test.columns]

        # Make prediction
        prediction = model.predict(input_df)
        print(f"\nPredicted position: {int(round(prediction[0]))}")

        return prediction[0]

    except ValueError as e:
        print(f"Error: Please enter valid numeric values. Details: {e}")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None



X, y = read_data("b.csv")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Train and evaluate models
# model_1 = linear_reg(X_train, y_train)
# y_pred_1 = predict(model_1, X_test, y_test)

#model_2 = decision_tree_classifier_method(X_train, y_train)
parameter_grid = hyperparameter_tuning_paramter_grid()
# tuned_model_2 = hyper_paramter_tuning(model_2, X_train, y_train, **parameter_grid)
# y_pred_2 = predict(tuned_model_2, X_test, y_test)

model_3 = decision_tree_regressor_method()
tuned_model_3 = hyper_paramter_tuning(model_3, X_train, y_train, **parameter_grid)
y_pred_3 = predict(tuned_model_3, X_test, y_test)

# Ask user for input and predict position
predicted_position = ask_user(tuned_model_3, X_test)

# Save model
with open('tuned_model.pkl', 'wb') as file:
    pickle.dump(tuned_model_3, file)