OnsAouedi commited on
Commit
5734b6e
·
verified ·
1 Parent(s): 1ef1816

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -87
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # Final version...
2
  import torch
3
  import torch.nn as nn
4
  import gradio as gr
@@ -28,23 +27,25 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
28
  def add_time_decimal_feature(df):
29
  """
30
  Add 'time_decimal' feature by combining 'hour' and 'minutes'.
31
-
32
  :param df: DataFrame with 'hour' and 'minutes' columns.
33
  :return: DataFrame with 'time_decimal' and without 'hour' and 'minutes'.
34
  """
35
- if 'hour' in df.columns and 'minutes' in df.columns:
 
 
 
36
  logging.info("Adding 'time_decimal' feature...")
37
  df['time_decimal'] = df['hour'] + df['minutes'] / 60.0
38
  df = df.drop(columns=['hour', 'minutes']) # Drop 'hour' and 'minutes' after creation
39
  logging.info("'time_decimal' feature added.")
40
  else:
41
- logging.warning("'hour' and/or 'minutes' columns not found. Skipping 'time_decimal' feature addition.")
 
42
  return df
43
 
44
  def haversine(lon1, lat1, lon2, lat2):
45
  """
46
  Calculate the great-circle distance between two points on the Earth.
47
-
48
  :param lon1: Longitude of point 1 (in decimal degrees)
49
  :param lat1: Latitude of point 1 (in decimal degrees)
50
  :param lon2: Longitude of point 2 (in decimal degrees)
@@ -65,7 +66,6 @@ def haversine(lon1, lat1, lon2, lat2):
65
  def calculate_bearing(lon1, lat1, lon2, lat2):
66
  """
67
  Calculate the bearing between two points.
68
-
69
  :param lon1: Longitude of point 1 (in decimal degrees)
70
  :param lat1: Latitude of point 1 (in decimal degrees)
71
  :param lon2: Longitude of point 2 (in decimal degrees)
@@ -90,7 +90,6 @@ def calculate_bearing(lon1, lat1, lon2, lat2):
90
  def angular_divergence(bearing1, bearing2):
91
  """
92
  Calculate the smallest angle difference between two bearings.
93
-
94
  :param bearing1: First bearing in degrees
95
  :param bearing2: Second bearing in degrees
96
  :return: Angular divergence in degrees
@@ -101,7 +100,6 @@ def angular_divergence(bearing1, bearing2):
101
  def denormalize(scaled_lat, scaled_lon, scaler, lat_idx, lon_idx):
102
  """
103
  Denormalize latitude and longitude using the scaler's parameters.
104
-
105
  :param scaled_lat: Scaled latitude values (numpy array).
106
  :param scaled_lon: Scaled longitude values (numpy array).
107
  :param scaler: The scaler object used for normalization.
@@ -118,7 +116,7 @@ def denormalize(scaled_lat, scaled_lon, scaler, lat_idx, lon_idx):
118
  denorm_lon = scaled_lon * (lon_max - lon_min) + lon_min
119
  return denorm_lat, denorm_lon
120
 
121
- def create_dataset_grouped_by_mmsi(df_scaled, seq_len, forecast_horizon, features_to_scale):
122
  """
123
  Create input and output sequences grouped by original MMSI.
124
  Returns scaled last known positions.
@@ -137,14 +135,14 @@ def create_dataset_grouped_by_mmsi(df_scaled, seq_len, forecast_horizon, feature
137
  # Future positions to predict (scaled)
138
  future_positions = group[['latitude_degrees', 'longitude_degrees']].iloc[i + seq_len:i + seq_len + forecast_horizon].to_numpy()
139
 
140
- # Future hour feature
141
- future_hour = group[['time_decimal']].iloc[i + seq_len].values[0]
142
- future_hour_feature = np.full((seq_len, 1), future_hour)
143
 
144
- # Combine sequence with future_hour_feature
145
- sequence_with_future_hour = np.hstack((sequence, future_hour_feature))
146
 
147
- Xs.append(sequence_with_future_hour)
148
  ys.append(future_positions)
149
  mmsis.append(mmsi)
150
 
@@ -163,7 +161,6 @@ class LSTMModelTeacher(nn.Module):
163
  def __init__(self, in_dim, hidden_dim, forecast_horizon, n_layers=7, dropout=0.2):
164
  """
165
  Teacher LSTM Model.
166
-
167
  :param in_dim: Number of input features.
168
  :param hidden_dim: Number of hidden units.
169
  :param forecast_horizon: Number of future steps to predict.
@@ -187,7 +184,6 @@ class LSTMModelStudent(nn.Module):
187
  def __init__(self, in_dim, hidden_dim, forecast_horizon, n_layers=3, dropout=0.2):
188
  """
189
  Student LSTM Model.
190
-
191
  :param in_dim: Number of input features.
192
  :param hidden_dim: Number of hidden units.
193
  :param forecast_horizon: Number of future steps to predict.
@@ -213,48 +209,58 @@ class LSTMModelStudent(nn.Module):
213
 
214
  def load_models(model_paths):
215
  """
216
- Load teacher and student models, including submodels for North, Mid, and South areas.
217
-
218
  :param model_paths: Dictionary containing paths to the models.
219
  :return: Dictionary of loaded models.
220
  """
221
  models = {}
222
  logging.info("Loading Teacher model...")
 
 
223
  # Load Teacher Model (Global)
224
- teacher = LSTMModelTeacher(in_dim=15, hidden_dim=200, forecast_horizon=1, n_layers=7, dropout=0.2) # 15 features including 'future_hour_feature'
225
  teacher.load_state_dict(torch.load(model_paths['teacher'], map_location=torch.device('cpu')))
226
  teacher.eval()
227
  models['Teacher'] = teacher
228
  logging.info("Teacher model loaded successfully.")
229
 
230
  logging.info("Loading Student North model...")
231
- # Load Student Models (Sub-areas)
232
- student_north = LSTMModelStudent(in_dim=15, hidden_dim=200, forecast_horizon=1, n_layers=3, dropout=0.2)
233
  student_north.load_state_dict(torch.load(model_paths['student_north'], map_location=torch.device('cpu')))
234
  student_north.eval()
235
  models['Student_North'] = student_north
236
  logging.info("Student North model loaded successfully.")
237
 
238
  logging.info("Loading Student Mid model...")
239
- student_mid = LSTMModelStudent(in_dim=15, hidden_dim=200, forecast_horizon=1, n_layers=3, dropout=0.2)
240
  student_mid.load_state_dict(torch.load(model_paths['student_mid'], map_location=torch.device('cpu')))
241
  student_mid.eval()
242
  models['Student_Mid'] = student_mid
243
  logging.info("Student Mid model loaded successfully.")
244
 
245
  logging.info("Loading Student South model...")
246
- student_south = LSTMModelStudent(in_dim=15, hidden_dim=200, forecast_horizon=1, n_layers=3, dropout=0.2)
247
  student_south.load_state_dict(torch.load(model_paths['student_south'], map_location=torch.device('cpu')))
248
  student_south.eval()
249
  models['Student_South'] = student_south
250
  logging.info("Student South model loaded successfully.")
251
 
 
 
 
 
 
 
 
 
 
 
252
  return models
253
 
254
  def load_scalers(scaler_paths):
255
  """
256
  Load scalers for each model.
257
-
258
  :param scaler_paths: Dictionary containing paths to the scaler files.
259
  :return: Dictionary of loaded scalers.
260
  """
@@ -275,7 +281,6 @@ def load_scalers(scaler_paths):
275
  def determine_subarea(df):
276
  """
277
  Determine the sub-area (North, Mid, South) based on latitude and longitude ranges.
278
-
279
  :param df: DataFrame containing 'latitude_degrees' and 'longitude_degrees'.
280
  :return: String indicating the sub-area.
281
  """
@@ -307,24 +312,27 @@ def determine_subarea(df):
307
 
308
  return predominant_subarea
309
 
310
- def select_model(models, subarea):
311
  """
312
- Select the appropriate model based on the sub-area.
313
-
314
  :param models: Dictionary of loaded models.
315
  :param subarea: String indicating the sub-area.
 
316
  :return: Tuple of (selected_model, selected_model_name).
317
  """
318
- if subarea in ['North', 'Mid', 'South']:
319
- selected_model = models.get(f'Student_{subarea}')
320
- selected_model_name = f'Student_{subarea}'
321
- logging.info(f"Selected model: {selected_model_name}")
322
- return selected_model, selected_model_name
 
 
323
  else:
324
- selected_model = models.get('Teacher')
325
- selected_model_name = 'Teacher'
326
- logging.info(f"Selected model: {selected_model_name}")
327
- return selected_model, selected_model_name
 
328
 
329
  # ============================
330
  # Evaluation Metrics Calculation
@@ -333,7 +341,6 @@ def select_model(models, subarea):
333
  def calculate_classic_metrics(y_true, y_pred):
334
  """
335
  Calculate MAE, MSE, and RMSE directly on latitude/longitude pairs.
336
-
337
  :param y_true: Ground truth positions (numpy array of shape (num_samples, 2)).
338
  :param y_pred: Predicted positions (numpy array of shape (num_samples, 2)).
339
  :return: Dictionary containing the classic metrics.
@@ -360,7 +367,6 @@ def calculate_classic_metrics(y_true, y_pred):
360
  def calculate_distance_metrics(y_true, y_pred):
361
  """
362
  Calculate metrics based on distance (in kilometers).
363
-
364
  :param y_true: Ground truth positions (numpy array of shape (num_samples, 2)).
365
  :param y_pred: Predicted positions (numpy array of shape (num_samples, 2)).
366
  :return: Dictionary containing the distance-based metrics.
@@ -416,35 +422,13 @@ def classical_prediction(file_path, model_choice, min_mmsi, max_mmsi, models, lo
416
  if df.empty:
417
  error_message = "No data available after applying MMSI filters."
418
  logging.error(error_message)
419
- return {"error": error_message}, None, None
420
-
421
- # Check if 'time_decimal' exists
422
- if 'time_decimal' not in df.columns:
423
- df = add_time_decimal_feature(df)
424
- else:
425
- logging.info("'time_decimal' feature already exists. Skipping creation.")
426
-
427
- expected_columns = [
428
- "mmsi", "sog_kt", "latitude_degrees", "longitude_degrees", "cog_degrees",
429
- "dimension_a_m", "dimension_b_m", "dimension_c_m", "dimension_d_m",
430
- "ship_type", "day", "month", "year", "time_decimal"
431
- ]
432
- if list(df.columns) != expected_columns:
433
- error_message = (
434
- f"Input data does not have the correct columns.\n"
435
- f"Expected columns: {expected_columns}\n"
436
- f"Got columns: {list(df.columns)}"
437
- )
438
- logging.error(error_message)
439
- return {"error": error_message}, None, None
440
-
441
- logging.info("Input CSV has the correct columns.")
442
 
443
  # Select the appropriate model and scaler
444
  if model_choice == "Auto-Select":
445
  temp_df = df.copy()
446
  subarea = determine_subarea(temp_df)
447
- selected_model, selected_model_name = select_model(models, subarea)
448
  scaler = loaded_scalers[selected_model_name]
449
  else:
450
  if model_choice in models:
@@ -454,17 +438,50 @@ def classical_prediction(file_path, model_choice, min_mmsi, max_mmsi, models, lo
454
  else:
455
  error_message = f"Selected model '{model_choice}' is not available."
456
  logging.error(error_message)
457
- return {"error": error_message}, None, None
458
 
459
  logging.info(f"Using scaler for model: {selected_model_name}")
460
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  # Normalize the data
462
  logging.info("Normalizing the data...")
463
- features_to_scale = [
464
- "mmsi", "sog_kt", "latitude_degrees", "longitude_degrees", "cog_degrees",
465
- "dimension_a_m", "dimension_b_m", "dimension_c_m", "dimension_d_m",
466
- "ship_type", "day", "month", "year", "time_decimal"
467
- ]
468
  X_new = df[features_to_scale]
469
  X_scaled = scaler.transform(X_new)
470
  df_scaled = pd.DataFrame(X_scaled, columns=features_to_scale, index=df.index)
@@ -473,12 +490,14 @@ def classical_prediction(file_path, model_choice, min_mmsi, max_mmsi, models, lo
473
  # Create sequences and get last known positions (scaled)
474
  seq_len = 24
475
  forecast_horizon = 1
476
- X, y, mmsi_seq, last_known_positions_scaled = create_dataset_grouped_by_mmsi(df_scaled, seq_len, forecast_horizon, features_to_scale)
 
 
477
 
478
  if X.size == 0:
479
  error_message = "Not enough data to create sequences."
480
  logging.error(error_message)
481
- return {"error": error_message}, None, None
482
 
483
  logging.info(f"Created {X.shape[0]} sequences.")
484
 
@@ -572,9 +591,13 @@ def abnormal_behavior_detection(prediction_file_path, alpha=0.5, threshold=10.0)
572
 
573
  # Check if necessary columns exist
574
  expected_columns = [
575
- "mmsi", "sog_kt", "latitude_degrees", "longitude_degrees", "cog_degrees",
576
- "dimension_a_m", "dimension_b_m", "dimension_c_m", "dimension_d_m",
577
- "ship_type", "day", "month", "year", "time_decimal"
 
 
 
 
578
  ]
579
 
580
  if not all(col in df.columns for col in expected_columns):
@@ -584,7 +607,7 @@ def abnormal_behavior_detection(prediction_file_path, alpha=0.5, threshold=10.0)
584
  f"Got columns: {list(df.columns)}"
585
  )
586
  logging.error(error_message)
587
- return {"error": error_message}
588
 
589
  # Extract necessary data
590
  mmsi_seq = df['MMSI'].values
@@ -660,15 +683,14 @@ def abnormal_behavior_detection(prediction_file_path, alpha=0.5, threshold=10.0)
660
  # ============================
661
 
662
  def main():
663
- # ============================
664
- # Define Model and Scaler Paths
665
- # ============================
666
 
667
  model_paths = {
668
- 'teacher': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256/horizon_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_seq_24/run_1/best_model.pth',
669
  'student_north': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_North/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_North_seq_24/run_1/best_model.pth',
670
  'student_mid': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_Mid/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_Mid_seq_24/run_1/best_model.pth',
671
- 'student_south': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_South/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_South_seq_24/run_1/best_model.pth'
 
672
  }
673
 
674
  scaler_paths = {
@@ -676,11 +698,10 @@ def main():
676
  'Student_North': 'scaler_train_North_up.joblib',
677
  'Student_Mid': 'scaler_train_Mid_up.joblib',
678
  'Student_South': 'scaler_train_South_up.joblib'
 
679
  }
680
 
681
- # ============================
682
- # Load Models and Scalers
683
- # ============================
684
 
685
  logging.info("Loading models and scalers...")
686
  models = load_models(model_paths)
@@ -693,7 +714,7 @@ def main():
693
  inputs=[
694
  gr.File(label="Upload CSV File", type='filepath'),
695
  gr.Dropdown(
696
- choices=["Auto-Select", "Teacher", "Student_North", "Student_Mid", "Student_South"],
697
  value="Auto-Select",
698
  label="Choose Model"
699
  ),
@@ -749,5 +770,4 @@ def main():
749
 
750
  # Run the app
751
  if __name__ == "__main__":
752
- main()
753
-
 
 
1
  import torch
2
  import torch.nn as nn
3
  import gradio as gr
 
27
  def add_time_decimal_feature(df):
28
  """
29
  Add 'time_decimal' feature by combining 'hour' and 'minutes'.
 
30
  :param df: DataFrame with 'hour' and 'minutes' columns.
31
  :return: DataFrame with 'time_decimal' and without 'hour' and 'minutes'.
32
  """
33
+ if 'time_decimal' in df.columns:
34
+ logging.info("'time_decimal' feature already exists. Skipping creation.")
35
+ return df
36
+ elif 'hour' in df.columns and 'minutes' in df.columns:
37
  logging.info("Adding 'time_decimal' feature...")
38
  df['time_decimal'] = df['hour'] + df['minutes'] / 60.0
39
  df = df.drop(columns=['hour', 'minutes']) # Drop 'hour' and 'minutes' after creation
40
  logging.info("'time_decimal' feature added.")
41
  else:
42
+ logging.warning("Neither 'time_decimal' nor 'hour' and 'minutes' columns found. Cannot create 'time_decimal' feature.")
43
+ raise ValueError("Input data must contain 'time_decimal' or both 'hour' and 'minutes' columns.")
44
  return df
45
 
46
  def haversine(lon1, lat1, lon2, lat2):
47
  """
48
  Calculate the great-circle distance between two points on the Earth.
 
49
  :param lon1: Longitude of point 1 (in decimal degrees)
50
  :param lat1: Latitude of point 1 (in decimal degrees)
51
  :param lon2: Longitude of point 2 (in decimal degrees)
 
66
  def calculate_bearing(lon1, lat1, lon2, lat2):
67
  """
68
  Calculate the bearing between two points.
 
69
  :param lon1: Longitude of point 1 (in decimal degrees)
70
  :param lat1: Latitude of point 1 (in decimal degrees)
71
  :param lon2: Longitude of point 2 (in decimal degrees)
 
90
  def angular_divergence(bearing1, bearing2):
91
  """
92
  Calculate the smallest angle difference between two bearings.
 
93
  :param bearing1: First bearing in degrees
94
  :param bearing2: Second bearing in degrees
95
  :return: Angular divergence in degrees
 
100
  def denormalize(scaled_lat, scaled_lon, scaler, lat_idx, lon_idx):
101
  """
102
  Denormalize latitude and longitude using the scaler's parameters.
 
103
  :param scaled_lat: Scaled latitude values (numpy array).
104
  :param scaled_lon: Scaled longitude values (numpy array).
105
  :param scaler: The scaler object used for normalization.
 
116
  denorm_lon = scaled_lon * (lon_max - lon_min) + lon_min
117
  return denorm_lat, denorm_lon
118
 
119
+ def create_dataset_grouped_by_mmsi(df_scaled, seq_len, forecast_horizon, features_to_scale, future_features):
120
  """
121
  Create input and output sequences grouped by original MMSI.
122
  Returns scaled last known positions.
 
135
  # Future positions to predict (scaled)
136
  future_positions = group[['latitude_degrees', 'longitude_degrees']].iloc[i + seq_len:i + seq_len + forecast_horizon].to_numpy()
137
 
138
+ # Future features
139
+ future_feature_values = group[future_features].iloc[i + seq_len].values
140
+ future_feature_array = np.tile(future_feature_values, (seq_len, 1))
141
 
142
+ # Combine sequence with future features
143
+ sequence_with_future_features = np.hstack((sequence, future_feature_array))
144
 
145
+ Xs.append(sequence_with_future_features)
146
  ys.append(future_positions)
147
  mmsis.append(mmsi)
148
 
 
161
  def __init__(self, in_dim, hidden_dim, forecast_horizon, n_layers=7, dropout=0.2):
162
  """
163
  Teacher LSTM Model.
 
164
  :param in_dim: Number of input features.
165
  :param hidden_dim: Number of hidden units.
166
  :param forecast_horizon: Number of future steps to predict.
 
184
  def __init__(self, in_dim, hidden_dim, forecast_horizon, n_layers=3, dropout=0.2):
185
  """
186
  Student LSTM Model.
 
187
  :param in_dim: Number of input features.
188
  :param hidden_dim: Number of hidden units.
189
  :param forecast_horizon: Number of future steps to predict.
 
209
 
210
  def load_models(model_paths):
211
  """
212
+ Load teacher, student, and cargo vessel models, including submodels for North, Mid, and South areas.
 
213
  :param model_paths: Dictionary containing paths to the models.
214
  :return: Dictionary of loaded models.
215
  """
216
  models = {}
217
  logging.info("Loading Teacher model...")
218
+ # Teacher model input dimension
219
+ teacher_in_dim = 15 # Features including 'future_hour_feature' (time_decimal)
220
  # Load Teacher Model (Global)
221
+ teacher = LSTMModelTeacher(in_dim=teacher_in_dim, hidden_dim=200, forecast_horizon=1, n_layers=7, dropout=0.2)
222
  teacher.load_state_dict(torch.load(model_paths['teacher'], map_location=torch.device('cpu')))
223
  teacher.eval()
224
  models['Teacher'] = teacher
225
  logging.info("Teacher model loaded successfully.")
226
 
227
  logging.info("Loading Student North model...")
228
+ # Student North model input dimension is the same as teacher
229
+ student_north = LSTMModelStudent(in_dim=teacher_in_dim, hidden_dim=200, forecast_horizon=1, n_layers=3, dropout=0.2)
230
  student_north.load_state_dict(torch.load(model_paths['student_north'], map_location=torch.device('cpu')))
231
  student_north.eval()
232
  models['Student_North'] = student_north
233
  logging.info("Student North model loaded successfully.")
234
 
235
  logging.info("Loading Student Mid model...")
236
+ student_mid = LSTMModelStudent(in_dim=teacher_in_dim, hidden_dim=200, forecast_horizon=1, n_layers=3, dropout=0.2)
237
  student_mid.load_state_dict(torch.load(model_paths['student_mid'], map_location=torch.device('cpu')))
238
  student_mid.eval()
239
  models['Student_Mid'] = student_mid
240
  logging.info("Student Mid model loaded successfully.")
241
 
242
  logging.info("Loading Student South model...")
243
+ student_south = LSTMModelStudent(in_dim=teacher_in_dim, hidden_dim=200, forecast_horizon=1, n_layers=3, dropout=0.2)
244
  student_south.load_state_dict(torch.load(model_paths['student_south'], map_location=torch.device('cpu')))
245
  student_south.eval()
246
  models['Student_South'] = student_south
247
  logging.info("Student South model loaded successfully.")
248
 
249
+ # Load Cargo Vessel model
250
+ logging.info("Loading Cargo Vessel model...")
251
+ # Cargo Vessel model input dimension
252
+ cargo_in_dim = 13 + 3 # 13 features (without 'year') + 3 future features ('day', 'month', 'time_decimal')
253
+ cargo_model = LSTMModelTeacher(in_dim=cargo_in_dim, hidden_dim=200, forecast_horizon=1, n_layers=10, dropout=0.2)
254
+ cargo_model.load_state_dict(torch.load(model_paths['cargo_vessel'], map_location=torch.device('cpu')))
255
+ cargo_model.eval()
256
+ models['Cargo_Vessel'] = cargo_model
257
+ logging.info("Cargo Vessel model loaded successfully.")
258
+
259
  return models
260
 
261
  def load_scalers(scaler_paths):
262
  """
263
  Load scalers for each model.
 
264
  :param scaler_paths: Dictionary containing paths to the scaler files.
265
  :return: Dictionary of loaded scalers.
266
  """
 
281
  def determine_subarea(df):
282
  """
283
  Determine the sub-area (North, Mid, South) based on latitude and longitude ranges.
 
284
  :param df: DataFrame containing 'latitude_degrees' and 'longitude_degrees'.
285
  :return: String indicating the sub-area.
286
  """
 
312
 
313
  return predominant_subarea
314
 
315
+ def select_model(models, subarea, model_choice):
316
  """
317
+ Select the appropriate model based on the sub-area and model choice.
 
318
  :param models: Dictionary of loaded models.
319
  :param subarea: String indicating the sub-area.
320
+ :param model_choice: String indicating the selected model.
321
  :return: Tuple of (selected_model, selected_model_name).
322
  """
323
+ if model_choice == "Auto-Select":
324
+ if subarea in ['North', 'Mid', 'South']:
325
+ selected_model = models.get(f'Student_{subarea}')
326
+ selected_model_name = f'Student_{subarea}'
327
+ else:
328
+ selected_model = models.get('Teacher')
329
+ selected_model_name = 'Teacher'
330
  else:
331
+ selected_model = models.get(model_choice)
332
+ selected_model_name = model_choice
333
+
334
+ logging.info(f"Selected model: {selected_model_name}")
335
+ return selected_model, selected_model_name
336
 
337
  # ============================
338
  # Evaluation Metrics Calculation
 
341
  def calculate_classic_metrics(y_true, y_pred):
342
  """
343
  Calculate MAE, MSE, and RMSE directly on latitude/longitude pairs.
 
344
  :param y_true: Ground truth positions (numpy array of shape (num_samples, 2)).
345
  :param y_pred: Predicted positions (numpy array of shape (num_samples, 2)).
346
  :return: Dictionary containing the classic metrics.
 
367
  def calculate_distance_metrics(y_true, y_pred):
368
  """
369
  Calculate metrics based on distance (in kilometers).
 
370
  :param y_true: Ground truth positions (numpy array of shape (num_samples, 2)).
371
  :param y_pred: Predicted positions (numpy array of shape (num_samples, 2)).
372
  :return: Dictionary containing the distance-based metrics.
 
422
  if df.empty:
423
  error_message = "No data available after applying MMSI filters."
424
  logging.error(error_message)
425
+ return {"error": error_message}, None, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
 
427
  # Select the appropriate model and scaler
428
  if model_choice == "Auto-Select":
429
  temp_df = df.copy()
430
  subarea = determine_subarea(temp_df)
431
+ selected_model, selected_model_name = select_model(models, subarea, model_choice)
432
  scaler = loaded_scalers[selected_model_name]
433
  else:
434
  if model_choice in models:
 
438
  else:
439
  error_message = f"Selected model '{model_choice}' is not available."
440
  logging.error(error_message)
441
+ return {"error": error_message}, None, None, None
442
 
443
  logging.info(f"Using scaler for model: {selected_model_name}")
444
 
445
+ # Adjust features_to_scale based on the selected model
446
+ if selected_model_name == 'Cargo_Vessel':
447
+ features_to_scale = [
448
+ "mmsi", "sog_kt", "latitude_degrees", "longitude_degrees", "cog_degrees",
449
+ "dimension_a_m", "dimension_b_m", "dimension_c_m", "dimension_d_m",
450
+ "ship_type", "day", "month", "time_decimal"
451
+ ]
452
+ future_features = ['day', 'month', 'time_decimal']
453
+ else:
454
+ features_to_scale = [
455
+ "mmsi", "sog_kt", "latitude_degrees", "longitude_degrees", "cog_degrees",
456
+ "dimension_a_m", "dimension_b_m", "dimension_c_m", "dimension_d_m",
457
+ "ship_type", "day", "month", "year", "time_decimal"
458
+ ]
459
+ future_features = ['time_decimal']
460
+
461
+ # Check if the necessary columns exist
462
+ expected_columns = features_to_scale
463
+ if not all(col in df.columns for col in expected_columns):
464
+ error_message = (
465
+ f"Input data does not have the correct columns.\n"
466
+ f"Expected columns for {selected_model_name}: {expected_columns}\n"
467
+ f"Got columns: {list(df.columns)}"
468
+ )
469
+ logging.error(error_message)
470
+ return {"error": error_message}, None, None, None
471
+
472
+ logging.info("Input CSV has the correct columns.")
473
+
474
+ # Check and add 'time_decimal' if necessary
475
+ if selected_model_name != 'Cargo_Vessel':
476
+ df = add_time_decimal_feature(df)
477
+ else:
478
+ if 'time_decimal' not in df.columns:
479
+ error_message = "Cargo model requires 'time_decimal' column."
480
+ logging.error(error_message)
481
+ return {"error": error_message}, None, None, None
482
+
483
  # Normalize the data
484
  logging.info("Normalizing the data...")
 
 
 
 
 
485
  X_new = df[features_to_scale]
486
  X_scaled = scaler.transform(X_new)
487
  df_scaled = pd.DataFrame(X_scaled, columns=features_to_scale, index=df.index)
 
490
  # Create sequences and get last known positions (scaled)
491
  seq_len = 24
492
  forecast_horizon = 1
493
+ X, y, mmsi_seq, last_known_positions_scaled = create_dataset_grouped_by_mmsi(
494
+ df_scaled, seq_len, forecast_horizon, features_to_scale, future_features
495
+ )
496
 
497
  if X.size == 0:
498
  error_message = "Not enough data to create sequences."
499
  logging.error(error_message)
500
+ return {"error": error_message}, None, None, None
501
 
502
  logging.info(f"Created {X.shape[0]} sequences.")
503
 
 
591
 
592
  # Check if necessary columns exist
593
  expected_columns = [
594
+ 'MMSI',
595
+ 'Last Known Latitude',
596
+ 'Last Known Longitude',
597
+ 'Predicted Latitude',
598
+ 'Predicted Longitude',
599
+ 'Real Latitude',
600
+ 'Real Longitude'
601
  ]
602
 
603
  if not all(col in df.columns for col in expected_columns):
 
607
  f"Got columns: {list(df.columns)}"
608
  )
609
  logging.error(error_message)
610
+ return None, error_message
611
 
612
  # Extract necessary data
613
  mmsi_seq = df['MMSI'].values
 
683
  # ============================
684
 
685
  def main():
686
+
 
 
687
 
688
  model_paths = {
689
+ 'teacher': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256/horizon_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_seq_24/run_1/best_model.pth',
690
  'student_north': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_North/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_North_seq_24/run_1/best_model.pth',
691
  'student_mid': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_Mid/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_Mid_seq_24/run_1/best_model.pth',
692
+ 'student_south': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_South/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_South_seq_24/run_1/best_model.pth',
693
+ 'cargo_vessel': 'Cago_final/LSTMModel_cargo_horizon1_with_month_day_time_input_batch256/horizon_data_LSTMModel_cargo_horizon1_with_month_day_time_input_batch256_seq_24/run_1/best_model.pth'
694
  }
695
 
696
  scaler_paths = {
 
698
  'Student_North': 'scaler_train_North_up.joblib',
699
  'Student_Mid': 'scaler_train_Mid_up.joblib',
700
  'Student_South': 'scaler_train_South_up.joblib'
701
+ 'Cargo_Vessel': 'scaler_features_cargo_up_final.joblib' # Add this line
702
  }
703
 
704
+ -
 
 
705
 
706
  logging.info("Loading models and scalers...")
707
  models = load_models(model_paths)
 
714
  inputs=[
715
  gr.File(label="Upload CSV File", type='filepath'),
716
  gr.Dropdown(
717
+ choices=["Auto-Select", "Teacher", "Student_North", "Student_Mid", "Student_South", "Cargo_Vessel"],
718
  value="Auto-Select",
719
  label="Choose Model"
720
  ),
 
770
 
771
  # Run the app
772
  if __name__ == "__main__":
773
+ main()