Arafath10 commited on
Commit
f7b1e15
1 Parent(s): 6d0b34a

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +83 -20
main.py CHANGED
@@ -17,6 +17,7 @@ import numpy as np
17
 
18
  app = FastAPI()
19
 
 
20
  app.add_middleware(
21
  CORSMiddleware,
22
  allow_origins=["*"],
@@ -26,14 +27,15 @@ app.add_middleware(
26
  )
27
 
28
 
29
- @app.get("/train_the_model")
30
  async def train_the_model(Tenant: str):
31
  # Load the dataset
32
- data = pd.read_csv(f"model/{Tenant}trainer_data.csv")
33
  print(data["customer_name"].count())
34
 
35
  # Analyze class distribution
36
  class_distribution = data['status.name'].value_counts()
 
37
  print("Class Distribution before balancing:\n", class_distribution)
38
 
39
  # Get the size of the largest class to match other classes' sizes
@@ -53,16 +55,19 @@ async def train_the_model(Tenant: str):
53
 
54
  data = oversampled_data
55
 
56
-
57
  # Select columns 'customer_email'
58
  selected_columns = ['customer_name', 'customer_address', 'customer_phone',
59
  'cod', 'weight', 'origin_city.name',
60
- 'destination_city.name','created_at','status.name']
61
 
62
  # Handling missing values
63
  #data_filled = data[selected_columns].fillna('Missing')
64
  data_filled = data[selected_columns].dropna()
65
-
 
 
 
 
66
  # Encoding categorical variables
67
  encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
68
  for col, encoder in encoders.items():
@@ -117,9 +122,58 @@ async def train_the_model(Tenant: str):
117
  encoders_filename = f'model/{Tenant}_curfox_encoders.joblib'
118
  dump(encoders, encoders_filename)
119
 
120
- return accuracy,classification_rep,"Model trained with new data for :",model_filename
 
 
 
 
 
 
 
 
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  @app.get("/trigger_the_data_fecher")
124
  async def your_continuous_function(page: int,paginate: int,Tenant: str):
125
  print("data fetcher running.....")
@@ -153,17 +207,18 @@ async def your_continuous_function(page: int,paginate: int,Tenant: str):
153
  #data.to_csv("new.csv")
154
 
155
  try:
156
- file_path = f'model/{Tenant}trainer_data.csv' # Replace with your file path
157
  source_csv = pd.read_csv(file_path)
158
  new_data = df
159
  combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
160
 
161
- combined_df_final.to_csv(f"model/{Tenant}trainer_data.csv")
162
  print("data added")
163
  except:
164
 
165
- df.to_csv(f"model/{Tenant}trainer_data.csv")
166
  print("data created")
 
167
  return {"message":"done","page_number":page,"data_count":data_count,'X-Tenant': Tenant}
168
 
169
 
@@ -171,12 +226,19 @@ async def your_continuous_function(page: int,paginate: int,Tenant: str):
171
 
172
  @app.get("/get_latest_model_updated_time")
173
  async def model_updated_time(Tenant: str):
 
 
 
 
174
  try:
175
  m_time_encoder = os.path.getmtime(f'model/{Tenant}_curfox_encoders.joblib')
176
  m_time_model = os.path.getmtime(f'model/{Tenant}_curfox_xgb_model.joblib')
177
- return {"Tenant":Tenant,
 
178
  "base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
179
- "last model updated time":datetime.datetime.fromtimestamp(m_time_model)}
 
 
180
  except:
181
  return {"no model found so first trained the model using data fecther"}
182
 
@@ -185,20 +247,21 @@ async def model_updated_time(Tenant: str):
185
 
186
 
187
  # Endpoint for making predictions
 
188
  @app.post("/predict")
189
  def predict(
190
  Tenant: str,
191
  customer_name: str,
192
  customer_address: str,
193
  customer_phone: str,
194
- customer_email: str,
195
  cod:str,
196
  weight: str,
197
- pickup_address: str,
198
  origin_city_name: str,
199
  destination_city_name: str,
200
- origin_country: str,
201
- created_at: str
 
 
202
  ):
203
 
204
  try:
@@ -219,13 +282,13 @@ def predict(
219
  return [encoder.transform([x])[0] if x in classes else -1 for x in column]
220
 
221
 
222
- # Convert input data to DataFrame
223
  input_data = {
224
  'customer_name': customer_name,
225
  'customer_address': customer_address,
226
- 'customer_phone': int(customer_phone), #'customer_email': customer_email,
227
- 'cod': float(cod),
228
- 'weight': float(weight),
229
  'origin_city.name':origin_city_name,
230
  'destination_city.name':destination_city_name,
231
  'created_at':created_at
@@ -249,4 +312,4 @@ def predict(
249
  if predicted_status == "RETURN TO CLIENT":
250
  probability = 100 - probability
251
 
252
- return {"Probability": round(probability,2),"Tenant":Tenant}
 
17
 
18
  app = FastAPI()
19
 
20
+
21
  app.add_middleware(
22
  CORSMiddleware,
23
  allow_origins=["*"],
 
27
  )
28
 
29
 
30
+ @app.get("/train_the_model_new_v2")
31
  async def train_the_model(Tenant: str):
32
  # Load the dataset
33
+ data = pd.read_csv(f"model/{Tenant}trainer_data_v1.csv")
34
  print(data["customer_name"].count())
35
 
36
  # Analyze class distribution
37
  class_distribution = data['status.name'].value_counts()
38
+ bf = str(class_distribution)
39
  print("Class Distribution before balancing:\n", class_distribution)
40
 
41
  # Get the size of the largest class to match other classes' sizes
 
55
 
56
  data = oversampled_data
57
 
 
58
  # Select columns 'customer_email'
59
  selected_columns = ['customer_name', 'customer_address', 'customer_phone',
60
  'cod', 'weight', 'origin_city.name',
61
+ 'destination_city.name','status.name','created_at']
62
 
63
  # Handling missing values
64
  #data_filled = data[selected_columns].fillna('Missing')
65
  data_filled = data[selected_columns].dropna()
66
+ data_filled['customer_phone'] = data_filled['customer_phone'].astype(str)
67
+ data_filled['created_at'] = data_filled['created_at'].astype(str)
68
+ #data_filled = data_filled.drop(columns=['created_at'])
69
+
70
+ af = str(oversampled_data['status.name'].value_counts())
71
  # Encoding categorical variables
72
  encoders = {col: LabelEncoder() for col in selected_columns if data_filled[col].dtype == 'object'}
73
  for col, encoder in encoders.items():
 
122
  encoders_filename = f'model/{Tenant}_curfox_encoders.joblib'
123
  dump(encoders, encoders_filename)
124
 
125
+ return accuracy,classification_rep,"Model trained with new data for :",model_filename,str(af),str(bf)
126
+
127
+
128
+ @app.get("/trigger_the_data_fecher_for_me")
129
+ async def continuous_function(page: int,paginate: int,Tenant: str):
130
+ print("data fetcher running.....")
131
+
132
+
133
+ # Update the payload for each page
134
 
135
+ #url = "https://dev3.api.curfox.parallaxtec.com/api/ml/order-list?sort=id&paginate="+str(paginate)+"&page="+str(page)
136
+ url = "https://v1.api.curfox.com/api/ml/order-list?sort=id&paginate="+str(paginate)+"&page="+str(page)
137
+
138
+
139
+ payload = {}
140
+ headers = {
141
+ 'Accept': 'application/json',
142
+ 'X-Tenant': Tenant #'royalexpress'
143
+ }
144
+
145
+ response = requests.request("GET", url, headers=headers, data=payload)
146
+
147
+ # Sample JSON response
148
+ json_response = response.json()
149
+ # Extracting 'data' for conversion
150
+ data = json_response['data']
151
+ data_count = len(data)
152
 
153
+ df = pd.json_normalize(data)
154
+
155
+
156
+ df = df[df['status.name'].isin(['RETURN TO CLIENT', 'DELIVERED'])]
157
+ print("data collected from page : "+str(page))
158
+ #data.to_csv("new.csv")
159
+
160
+ try:
161
+ file_path = f'model/{Tenant}trainer_data_v1.csv' # Replace with your file path
162
+ source_csv = pd.read_csv(file_path)
163
+ new_data = df
164
+ combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
165
+
166
+ combined_df_final.to_csv(f"model/{Tenant}trainer_data_v1.csv")
167
+ print("data added")
168
+ message = "data added"
169
+ except:
170
+
171
+ df.to_csv(f"model/{Tenant}trainer_data_v1.csv")
172
+ print("data created")
173
+ message = "data created"
174
+
175
+ return {"message":message,"page_number":page,"data_count":data_count,'X-Tenant': Tenant}
176
+
177
  @app.get("/trigger_the_data_fecher")
178
  async def your_continuous_function(page: int,paginate: int,Tenant: str):
179
  print("data fetcher running.....")
 
207
  #data.to_csv("new.csv")
208
 
209
  try:
210
+ file_path = f'model/{Tenant}trainer_data_.csv' # Replace with your file path
211
  source_csv = pd.read_csv(file_path)
212
  new_data = df
213
  combined_df_final = pd.concat([source_csv,new_data], ignore_index=True)
214
 
215
+ combined_df_final.to_csv(f"model/{Tenant}trainer_data_.csv")
216
  print("data added")
217
  except:
218
 
219
+ df.to_csv(f"model/{Tenant}trainer_data_.csv")
220
  print("data created")
221
+
222
  return {"message":"done","page_number":page,"data_count":data_count,'X-Tenant': Tenant}
223
 
224
 
 
226
 
227
  @app.get("/get_latest_model_updated_time")
228
  async def model_updated_time(Tenant: str):
229
+ import multiprocessing
230
+
231
+ # Get the number of available CPU cores
232
+ available_cores = multiprocessing.cpu_count()
233
  try:
234
  m_time_encoder = os.path.getmtime(f'model/{Tenant}_curfox_encoders.joblib')
235
  m_time_model = os.path.getmtime(f'model/{Tenant}_curfox_xgb_model.joblib')
236
+ return {
237
+ "Tenant":Tenant,
238
  "base model created time ":datetime.datetime.fromtimestamp(m_time_encoder),
239
+ "last model updated time":datetime.datetime.fromtimestamp(m_time_model),
240
+ "Number of available CPU cores": available_cores
241
+ }
242
  except:
243
  return {"no model found so first trained the model using data fecther"}
244
 
 
247
 
248
 
249
  # Endpoint for making predictions
250
+
251
  @app.post("/predict")
252
  def predict(
253
  Tenant: str,
254
  customer_name: str,
255
  customer_address: str,
256
  customer_phone: str,
 
257
  cod:str,
258
  weight: str,
 
259
  origin_city_name: str,
260
  destination_city_name: str,
261
+ created_at: str,
262
+ customer_email: str,
263
+ pickup_address: str,
264
+ origin_country: str
265
  ):
266
 
267
  try:
 
282
  return [encoder.transform([x])[0] if x in classes else -1 for x in column]
283
 
284
 
285
+
286
  input_data = {
287
  'customer_name': customer_name,
288
  'customer_address': customer_address,
289
+ 'customer_phone': customer_phone, #'customer_email': customer_email,
290
+ 'cod': int(cod),
291
+ 'weight': int(weight),
292
  'origin_city.name':origin_city_name,
293
  'destination_city.name':destination_city_name,
294
  'created_at':created_at
 
312
  if predicted_status == "RETURN TO CLIENT":
313
  probability = 100 - probability
314
 
315
+ return {"predicted_status":predicted_status,Probability": round(probability,2),"Tenant_new":Tenant}