File size: 8,246 Bytes
bb1009c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# -*- coding: utf-8 -*-
"""DriverPosPredictionFinal

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1DGTfO4HEZDof1phuficJD_J8v38JnF3C
"""







from sklearn import tree, linear_model
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
!pip install bayesian-optimization
from bayes_opt import BayesianOptimization
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_squared_log_error, median_absolute_error
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import xgboost as xgb
import pickle

def read_data(file_name):
  df = pd.read_csv(file_name)
  x= df.drop(['RaceID', "Pos", "formating" ], axis=1)
  y =df["Pos"]
  return x,y

#returns the object for model being used
def decision_tree_regressor_method():
  dtree = DecisionTreeRegressor(random_state=42)
  return dtree
def decsison_tree_classifier_method(X_train,y_train):
  dtree = DecisionTreeClassifier()
  dtree = dtree.fit(X_train,y_train)
  return dtree
def linear_reg(X_train,y_train):
  regr= linear_model.LinearRegression()
  regr.fit(X_train,y_train)
  return regr
def hyper_paramter_tuning(model, xtrain,ytrain,**kwargs): #doesnt work for linear regression
  #dtree = DecisionTreeRegressor(random_state=42)
#  grid_search_object = GridSearchCV(estimator=model, param_grid=kwargs, cv=5, n_jobs=-1, verbose=2, scoring='neg_mean_squared_error')
  grid_search_object = GridSearchCV(estimator=model, param_grid=kwargs, cv=3, n_jobs=-1, verbose=2, scoring='neg_mean_squared_error')
  grid_search_object.fit(xtrain,ytrain)
  best_estimator = grid_search_object.best_estimator_
  return best_estimator


def predict(model, xtest,ytest):
    # Predict the target for the test set
    y_pred = model.predict(xtest)
    # Calculate various evaluation metrics
    mse = mean_squared_error(ytest, y_pred)
    rmse = mse ** 0.5
    mae = mean_absolute_error(ytest, y_pred)
    r2 = r2_score(ytest, y_pred)
    msle = mean_squared_log_error(ytest, y_pred)
    medae = median_absolute_error(ytest, y_pred)
    # Print the metrics

    print(f"Test MSE: {mse}") # smaller is better - mean squared error
    print(f"Test RMSE: {rmse}")#
    print(f"Test MAE: {mae}")
    print(f"Test R²: {r2}")
    print(f"Test MSLE: {msle}")
    print(f"Test Median Absolute Error: {medae}")
    #rmse measures abg magnitude of errors betwen actual and predicted values
    return y_pred



def hyperamter_tuning_paramter_grid():
  parameter_grid = {'max_depth': [1,2,3,4,5,None],
     "min_samples_split":[2,3,5,6] ,
     'min_samples_leaf': [1,2,4,5],
     "min_weight_fraction_leaf": [0.0,0.01, 0.05,0.1, 0.2],# add more hypermater tuning criteria
    }#'criterion': ['gini', 'entropy']
  return parameter_grid




def ask_user(model, X_test=None):
    """
    Collect user input and format it for prediction.
    Returns a DataFrame with a single row formatted like the training data.
    """
    try:
        # Create a dictionary to store inputs
        input_data = {}

        # Collect basic race information
        input_data['LapNumber'] = float(input("Enter the number of total laps in race: "))
        input_data['LapTimes'] = float(input("Avg lap time from prev year (or estimate 98): "))
        input_data['PitStopTimes'] = float(input("Enter pit stop time (0 if no pit stop): "))
        input_data['PrevLap'] = float(input("Enter avg lap time differece (0-2) (0 if first lap): "))

        # Collect race conditions
        input_data['AvgSpeed'] = float(input("Enter average speed in km/h from prev year: "))
        input_data['AirTemp_Cel'] = float(input("estimate air temperature in Celsius on race day: "))
        input_data['TrackTemp_Cel'] = float(input("estimate track temperature in Celsius on race day: "))
        input_data['Humidity'] = float(input("estimate the humidity percentage (0-100) on race day: "))
        input_data['WindSpeed_km/h'] = float(input("estimate wind speed in km/h: "))

        # Create a DataFrame with the input data
        input_df = pd.DataFrame([input_data])

        # If we have a test set, ensure our columns match the training data
        if X_test is not None:
            missing_cols = set(X_test.columns) - set(input_df.columns)
            # Add any missing columns with 0s
            for col in missing_cols:
                input_df[col] = 0
            # Ensure column order matches training data
            input_df = input_df[X_test.columns]

        # Make prediction
        prediction = model.predict(input_df)
        print(f"\nPredicted position: {int(round(prediction[0]))}")

        return prediction[0]

    except ValueError as e:
        print(f"Error: Please enter valid numeric values. Details: {e}")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None





#-----linear regresssion------------
x , y= read_data("b.csv")
xtrain, xtest , ytrain, ytest = train_test_split(x,y,test_size = 0.3)

model_1 = linear_reg(xtrain,ytrain)

ypred = predict(model_1,xtest,ytest)
#visvualize(ytest,ypred)

#-----descision tree classifier-----
x , y= read_data("b.csv")
xtrain, xtest , ytrain, ytest = train_test_split(x,y,test_size = 0.3)

model_2 = decsison_tree_classifier_method(xtrain,ytrain)

parameter_grid= hyperamter_tuning_paramter_grid()
tuned_model_2 = hyper_paramter_tuning(model_2,xtrain,ytrain,**parameter_grid)
y_pred = predict(tuned_model_2,xtest,ytest)

#visvualize(ytest,ypred)

#----descision tree regressor-------
x , y= read_data("b.csv")
xtrain, xtest , ytrain, ytest = train_test_split(x,y,test_size = 0.3)

model_3 = decision_tree_regressor_method()

parameter_grid= hyperamter_tuning_paramter_grid()
tuned_model_3 = hyper_paramter_tuning(model_3,xtrain,ytrain,**parameter_grid)
y_pred= predict(tuned_model_3,xtest,ytest)


# Ask user for input and predict position
#predicted_position = ask_user(tuned_model_3, x_columns)




predicted_position = ask_user(tuned_model_3, xtest)

#pickle.dump(tuned_model_3.open('model.pkl','mb'))

!pip install streamlit
import streamlit as st
import pickle
import numpy as np

model = pickle.load(open('model.pkl','rb'))

def predictPos(TLaps,Laptime,Pit,Prev,AvgSpeed,AirTemp,TrackTemp,Humid,Wind):
  input = np.array([ [TLaps,Laptime,Pit,Prev,AvgSpeed,AirTemp,TrackTemp,Humid,Wind]]).astype(np.float64)
  prediction=model.predict(input)
  return prediction[0]

def main():
  st.title("title")
  html_temp="""
  <div style = "background-color:#025244; padding :10px">
  <h2 style = "color:white; text-align:center;"> Driver Position Prediction </h2>
  </div>
  """
  st.markdown(html_temp, unsafe_allow_html=True)
  TLaps = st.text_input("Enter the number of total laps in race: ","Type Here")
  Laptime = st.text_input("Avg lap time from prev year (or estimate 98): ","Type Here")
  Pit = st.text_input("Enter pit stop time (0 if no pit stop): ","Type Here")
  Prev= st.text_input("Enter avg lap time differece (0-2) (0 if first lap): ","Type Here")


  AvgSpeed = st.text_input("Enter average speed in km/h from prev year: ","Type Here")
  AirTemp= st.text_input("estimate air temperature in Celsius on race day: ","Type Here")
  TrackTemp = st.text_input("estimate track temperature in Celsius on race day: ","Type Here")
  Humid = st.text_input("estimate the humidity percentage (0-100) on race day: ","Type Here")
  Wind = st.text_input("estimate wind speed in km/h: ","Type Here")
  safe_html="""
    <div style="background-color:#F4D03F;padding:10px >
    <h2 style="color:white;text-align:center;"> Your forest is safe</h2>
    </div>
    """
  danger_html="""
  <div style="background-color:#F08080;padding:10px >
    <h2 style="color:black ;text-align:center;"> Your forest is in danger</h2>
   </div>
    """

  if st.button("Predict"):
    output=predictPos(TLaps,Laptime,Pit,Prev,AvgSpeed,AirTemp,TrackTemp,Humid,Wind)
    st.success('The postion of driver is {}'.format(output))

    # if output > 0.5:
    #   st.markdown(danger_html,unsafe_allow_html=True)
    # else:
    #   st.markdown(safe_html,unsafe_allow_html=True)

if __name__=='__main__':
    main()