transexpress_ml_api

Sleeping

App Files Files Community

Arafath10 commited on Apr 30, 2024

Commit

64c5058

verified ·

1 Parent(s): 5ca8728

Update main.py

Browse files

Files changed (1) hide show

main.py +38 -93

main.py CHANGED Viewed

@@ -26,66 +26,11 @@ app.add_middleware(
-def train_the_model(data,page):
-    if str(page) == "2":
-        new_data = data
-        encoders = load('transexpress_encoders.joblib')
-        xgb_model = load('transexpress_xgb_model.joblib')
-        # Selecting and filling missing data
-        selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
-                            'weight', 'cod', 'pickup_address', 'client_number', 'destination_city',
-                            'status_name']
-        new_data_filled = new_data[selected_columns].fillna('Missing')
-        for col, encoder in encoders.items():
-            if col in new_data_filled.columns:
-                unseen_categories = set(new_data_filled[col]) - set(encoder.classes_)
-                if unseen_categories:
-                    for category in unseen_categories:
-                        encoder.classes_ = np.append(encoder.classes_, category)
-                    new_data_filled[col] = encoder.transform(new_data_filled[col])
-                else:
-                    new_data_filled[col] = encoder.transform(new_data_filled[col])
-        X_new = new_data_filled.drop('status_name', axis=1)
-        y_new = new_data_filled['status_name']
-        X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, test_size=0.2, random_state=42)
-        # Setup the hyperparameter grid to search
-        param_grid = {
-            'max_depth': [3, 4, 5],
-            'learning_rate': [0.01, 0.1, 0.4],
-            'n_estimators': [100, 200, 300],
-            'subsample': [0.8, 0.9, 1],
-            'colsample_bytree': [0.3, 0.7]
-        }
-        # Initialize the classifier
-        #xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
-        # Setup GridSearchCV
-        grid_search = GridSearchCV(xgb_model, param_grid, cv=40, n_jobs=-1, scoring='accuracy')
-        # Fit the grid search to the data
-        grid_search.fit(X_train, y_train)
-        dump(grid_search, 'transexpress_xgb_model.joblib')
-        # Making predictions and evaluating the model
-        y_pred = grid_search.predict(X_test)
-        accuracy = accuracy_score(y_test, y_pred)
-        classification_rep = classification_report(y_test, y_pred)
-        # Returning the results
-        return accuracy, classification_rep, "Model finetuned with new data."
-    if str(page) == "1":
-        data = data
         # Select columns
         selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
                             'weight','cod','pickup_address','client_number','destination_city',
@@ -104,35 +49,27 @@ def train_the_model(data,page):
         y = data_filled['status_name']
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        # Setup the hyperparameter grid to search
-        param_grid = {
-            'max_depth': [3, 4, 5],
-            'learning_rate': [0.01, 0.1, 0.4],
-            'n_estimators': [100, 200, 300],
-            'subsample': [0.8, 0.9, 1],
-            'colsample_bytree': [0.3, 0.7]
         }
-        # Initialize the classifier
-        xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
-        # Setup GridSearchCV
-        grid_search = GridSearchCV(xgb, param_grid, cv=40, n_jobs=-1, scoring='accuracy')
-        # Fit the grid search to the data
-        grid_search.fit(X_train, y_train)
-        # Get the best parameters
-        best_params = grid_search.best_params_
-        print("Best parameters:", best_params)
-        # Train the model with best parameters
-        best_xgb = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss')
-        best_xgb.fit(X_train, y_train)
         # Predict on the test set
-        y_pred = best_xgb.predict(X_test)
-        y_pred_proba = best_xgb.predict_proba(X_test)
         # Evaluate the model
         accuracy = accuracy_score(y_test, y_pred)
@@ -140,23 +77,17 @@ def train_the_model(data,page):
         # Save the model
         model_filename = 'transexpress_xgb_model.joblib'
-        dump(best_xgb, model_filename)
         # Save the encoders
         encoders_filename = 'transexpress_encoders.joblib'
         dump(encoders, encoders_filename)
-        return accuracy,classification_rep,"base Model trained"
 @app.get("/trigger_the_data_fecher")
 async def your_continuous_function(page: str,paginate: str):
-    if str(page) == "2":
-        df = pd.read_csv("transexpress_v10.csv")
-        print("file readed")
-        accuracy,classification_rep,message = train_the_model(df,page)
-        return {"message":message,"page_number":page,"data_count":data_count,"accuracy":accuracy,"classification_rep":classification_rep}
     print("data fetcher running.....")
@@ -188,9 +119,22 @@ async def your_continuous_function(page: str,paginate: str):
     print("data collected from page : "+page)
     #return "done"
-    #data.to_csv("new.csv")
-    accuracy,classification_rep,message = train_the_model(df,page)
     return {"message":message,"page_number":page,"data_count":data_count,"accuracy":accuracy,"classification_rep":classification_rep}
@@ -214,10 +158,11 @@ async def model_updated_time():
 # Endpoint for making predictions
 @app.post("/predict")
 def predict(
     customer_name: str,
     customer_address: str,
     customer_phone: str,
-    weight: int,
     cod: int,
     pickup_address: str,
     client_number:str,

+def train_the_model():
+        data = pd.read_csv("trainer_data.csv")
+        print(data["customer_name"].count())
         # Select columns
         selected_columns = ['customer_name', 'customer_address', 'customer_phone_no',
                             'weight','cod','pickup_address','client_number','destination_city',
         y = data_filled['status_name']
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        # Parameters to use for the model
+        params = {
+            'colsample_bytree': 0.3,
+            'learning_rate': 0.6,
+            'max_depth': 8,
+            'n_estimators': 100,
+            'subsample': 0.9,
+            'use_label_encoder': False,
+            'eval_metric': 'logloss'
         }
+        # Initialize the classifier with the specified parameters
+        xgb = XGBClassifier(**params)
+        # Train the model
+        xgb.fit(X_train, y_train)
         # Predict on the test set
+        y_pred = xgb.predict(X_test)
+        y_pred_proba = xgb.predict_proba(X_test)
         # Evaluate the model
         accuracy = accuracy_score(y_test, y_pred)
         # Save the model
         model_filename = 'transexpress_xgb_model.joblib'
+        dump(xgb, model_filename)
         # Save the encoders
         encoders_filename = 'transexpress_encoders.joblib'
         dump(encoders, encoders_filename)
+        return accuracy,classification_rep,"Model trained with new data"
 @app.get("/trigger_the_data_fecher")
 async def your_continuous_function(page: str,paginate: str):
     print("data fetcher running.....")
     print("data collected from page : "+page)
     #return "done"
+    try:
+        file_path = 'trainer_data.csv'  # Replace with your file path
+        source_csv = pd.read_csv(file_path)
+        new_data = df
+        combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
+        combined_df_final.to_csv("trainer_data.csv")
+        print("data added")
+    except:
+        df.to_csv("trainer_data.csv")
+        print("data created")
+    accuracy,classification_rep,message = train_the_model()
     return {"message":message,"page_number":page,"data_count":data_count,"accuracy":accuracy,"classification_rep":classification_rep}
 # Endpoint for making predictions
 @app.post("/predict")
 def predict(
+    date : str
     customer_name: str,
     customer_address: str,
     customer_phone: str,
+    weight: float,
     cod: int,
     pickup_address: str,
     client_number:str,