Arafath10 commited on
Commit
167a0c6
·
verified ·
1 Parent(s): 1380647

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +112 -6
main.py CHANGED
@@ -5,6 +5,12 @@ import requests
5
  import pandas as pd
6
  import json
7
  import httpx
 
 
 
 
 
 
8
  app = FastAPI()
9
 
10
  app.add_middleware(
@@ -15,8 +21,6 @@ app.add_middleware(
15
  allow_headers=["*"],
16
  )
17
 
18
- global page
19
- page = 1
20
  # Declare the continuous function as an async function.
21
  #async def your_continuous_function():
22
 
@@ -27,12 +31,114 @@ page = 1
27
  #async def startup_event():
28
  # Start the continuous function as a background task.
29
  #asyncio.create_task(your_continuous_function())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- @app.get("/trigger_the_data_fecher_every_60s")
32
- async def your_continuous_function():
33
- global page
34
 
35
- page+=1
36
  return "model trained with new page : "+str(page)+" data"
37
 
38
  @app.get("/test_api")
 
5
  import pandas as pd
6
  import json
7
  import httpx
8
+ import pandas as pd
9
+ from sklearn.model_selection import train_test_split, GridSearchCV
10
+ from sklearn.preprocessing import LabelEncoder
11
+ from xgboost import XGBClassifierS
12
+ from sklearn.metrics import accuracy_score, classification_report
13
+
14
  app = FastAPI()
15
 
16
  app.add_middleware(
 
21
  allow_headers=["*"],
22
  )
23
 
 
 
24
  # Declare the continuous function as an async function.
25
  #async def your_continuous_function():
26
 
 
31
  #async def startup_event():
32
  # Start the continuous function as a background task.
33
  #asyncio.create_task(your_continuous_function())
34
+
35
+ from joblib import dump
36
+
37
+ def train_the_model(data):
38
+ data = data
39
+
40
+ # Select columns
41
+ selected_columns = ['customer_name', 'customer_address', 'customer_phone',
42
+ 'customer_email', 'cod', 'weight',
43
+ 'origin_city.name', 'destination_city.name', 'status.name']
44
+
45
+ # Handling missing values
46
+ data_filled = data[selected_columns].fillna('Missing')
47
+
48
+ # Encoding categorical variables
49
+ encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
50
+ for col, encoder in encoders.items():
51
+ data_filled[col] = encoder.fit_transform(data_filled[col])
52
+
53
+ # Splitting the dataset
54
+ X = data_filled.drop('status.name', axis=1)
55
+ y = data_filled['status.name']
56
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
57
+
58
+ # Setup the hyperparameter grid to search
59
+ param_grid = {
60
+ 'max_depth': [3, 4, 5],
61
+ 'learning_rate': [0.01, 0.1, 0.4],
62
+ 'n_estimators': [100, 200, 300],
63
+ 'subsample': [0.8, 0.9, 1],
64
+ 'colsample_bytree': [0.3, 0.7]
65
+ }
66
+
67
+ # Initialize the classifier
68
+ xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
69
+
70
+ # Setup GridSearchCV
71
+ grid_search = GridSearchCV(xgb, param_grid, cv=10, n_jobs=-1, scoring='accuracy')
72
+
73
+ # Fit the grid search to the data
74
+ grid_search.fit(X_train, y_train)
75
+
76
+ # Get the best parameters
77
+ best_params = grid_search.best_params_
78
+ print("Best parameters:", best_params)
79
+
80
+ # Train the model with best parameters
81
+ best_xgb = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss')
82
+ best_xgb.fit(X_train, y_train)
83
+
84
+ # Predict on the test set
85
+ y_pred = best_xgb.predict(X_test)
86
+ y_pred_proba = best_xgb.predict_proba(X_test)
87
+
88
+ # Evaluate the model
89
+ accuracy = accuracy_score(y_test, y_pred)
90
+ classification_rep = classification_report(y_test, y_pred)
91
+
92
+ # Print the results
93
+ print("Accuracy:", accuracy)
94
+ print("Classification Report:\n", classification_report(y_test, y_pred))
95
+
96
+
97
+ # Save the model
98
+ model_filename = 'xgb_model.joblib'
99
+ dump(best_xgb, model_filename)
100
+
101
+ # Save the encoders
102
+ encoders_filename = 'encoders.joblib'
103
+ dump(encoders, encoders_filename)
104
+
105
+ print(f"Model saved as {model_filename}")
106
+ print(f"Encoders saved as {encoders_filename}")
107
+
108
+ @app.get("/trigger_the_data_fecher_every_30min")
109
+ async def your_continuous_function(page: int):
110
+ print("data fetcher running.....")
111
+
112
+ # Initialize an empty DataFrame to store the combined data
113
+ combined_df = pd.DataFrame()
114
+
115
+ # Update the payload for each page
116
+ url = "https://dev3.api.curfox.parallaxtec.com/api/ml/order-list?sort=id&paginate=500&page="+str(page)
117
+
118
+ payload = {}
119
+ headers = {
120
+ 'Accept': 'application/json',
121
+ 'X-Tenant': 'royalexpress'
122
+ }
123
+
124
+ response = requests.request("GET", url, headers=headers, data=payload)
125
+
126
+ # Sample JSON response
127
+ json_response = response.json()
128
+ # Extracting 'data' for conversion
129
+ data = json_response['data']
130
+
131
+ df = pd.json_normalize(data)
132
+
133
+ # Concatenate the current page's DataFrame with the combined DataFrame
134
+ combined_df = pd.concat([combined_df, df], ignore_index=True)
135
+
136
+ data = combined_df[combined_df['status.name'].isin(['RETURN TO CLIENT', 'DELIVERED'])]
137
+ print("data collected from page : "+str(page))
138
+ #data.to_csv("new.csv")
139
 
140
+ train_the_model(data)
 
 
141
 
 
142
  return "model trained with new page : "+str(page)+" data"
143
 
144
  @app.get("/test_api")