import asyncio from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware import requests import pandas as pd import json import httpx import pandas as pd from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.preprocessing import LabelEncoder from xgboost import XGBClassifier from sklearn.metrics import accuracy_score, classification_report from joblib import dump, load import numpy as np app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) def train_the_model(data,page): if page==2: # Function to evaluate the model def evaluate_model(model, X_test, y_test): y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) print("Classification Report:\n", classification_report(y_test, y_pred)) return accuracy new_data = data encoders = load('encoders.joblib') xgb_model = load('xgb_model.joblib') selected_columns = ['customer_name', 'customer_address', 'customer_phone', 'customer_email', 'cod', 'weight', 'origin_city.name', 'destination_city.name', 'status.name'] new_data_filled = new_data[selected_columns].fillna('Missing') for col, encoder in encoders.items(): if col in new_data_filled.columns: unseen_categories = set(new_data_filled[col]) - set(encoder.classes_) if unseen_categories: for category in unseen_categories: encoder.classes_ = np.append(encoder.classes_, category) new_data_filled[col] = encoder.transform(new_data_filled[col]) else: new_data_filled[col] = encoder.transform(new_data_filled[col]) X_new = new_data_filled.drop('status.name', axis=1) y_new = new_data_filled['status.name'] xgb_model.fit(X_new, y_new) dump(xgb_model, 'xgb_model.joblib') print("Model updated with new data.") updated_model_accuracy = evaluate_model(xgb_model, X_test, y_test) print("Updated model accuracy:", updated_model_accuracy) else: data = data # Select columns selected_columns = ['customer_name', 'customer_address', 'customer_phone', 'customer_email', 'cod', 'weight', 'origin_city.name', 'destination_city.name', 'status.name'] # Handling missing values data_filled = data[selected_columns].fillna('Missing') # Encoding categorical variables encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'} for col, encoder in encoders.items(): data_filled[col] = encoder.fit_transform(data_filled[col]) # Splitting the dataset X = data_filled.drop('status.name', axis=1) y = data_filled['status.name'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Setup the hyperparameter grid to search param_grid = { 'max_depth': [3, 4, 5], 'learning_rate': [0.01, 0.1, 0.4], 'n_estimators': [100, 200, 300], 'subsample': [0.8, 0.9, 1], 'colsample_bytree': [0.3, 0.7] } # Initialize the classifier xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss') # Setup GridSearchCV grid_search = GridSearchCV(xgb, param_grid, cv=2, n_jobs=-1, scoring='accuracy') # Fit the grid search to the data grid_search.fit(X_train, y_train) # Get the best parameters best_params = grid_search.best_params_ print("Best parameters:", best_params) # Train the model with best parameters best_xgb = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss') best_xgb.fit(X_train, y_train) # Predict on the test set y_pred = best_xgb.predict(X_test) y_pred_proba = best_xgb.predict_proba(X_test) # Evaluate the model accuracy = accuracy_score(y_test, y_pred) classification_rep = classification_report(y_test, y_pred) # Print the results print("Accuracy:", accuracy) print("Classification Report:\n", classification_report(y_test, y_pred)) # Save the model model_filename = 'xgb_model.joblib' dump(best_xgb, model_filename) # Save the encoders encoders_filename = 'encoders.joblib' dump(encoders, encoders_filename) print(f"Model saved as {model_filename}") print(f"Encoders saved as {encoders_filename}") print("new base model trained") @app.get("/trigger_the_data_fecher") async def your_continuous_function(page: int,paginate: int,Tenant: str): print("data fetcher running.....") # Initialize an empty DataFrame to store the combined data combined_df = pd.DataFrame() # Update the payload for each page url = "https://dev3.api.curfox.parallaxtec.com/api/ml/order-list?sort=id&paginate="+str(paginate)+"&page="+str(page) payload = {} headers = { 'Accept': 'application/json', 'X-Tenant': Tenant #'royalexpress' } response = requests.request("GET", url, headers=headers, data=payload) # Sample JSON response json_response = response.json() # Extracting 'data' for conversion data = json_response['data'] data_count = len(data) df = pd.json_normalize(data) # Concatenate the current page's DataFrame with the combined DataFrame combined_df = pd.concat([combined_df, df], ignore_index=True) data = combined_df[combined_df['status.name'].isin(['RETURN TO CLIENT', 'DELIVERED'])] print("data collected from page : "+str(page)) #data.to_csv("new.csv") train_the_model(data,page) return "model trained with page number: "+str(page)+" data count :"+str(data_count) @app.get("/test_api") async def test_api(): encoders = load('encoders.joblib') xgb_model = load('xgb_model.joblib')