Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
# Final version...
|
2 |
import torch
|
3 |
import torch.nn as nn
|
4 |
import gradio as gr
|
@@ -28,23 +27,25 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
|
|
28 |
def add_time_decimal_feature(df):
|
29 |
"""
|
30 |
Add 'time_decimal' feature by combining 'hour' and 'minutes'.
|
31 |
-
|
32 |
:param df: DataFrame with 'hour' and 'minutes' columns.
|
33 |
:return: DataFrame with 'time_decimal' and without 'hour' and 'minutes'.
|
34 |
"""
|
35 |
-
if '
|
|
|
|
|
|
|
36 |
logging.info("Adding 'time_decimal' feature...")
|
37 |
df['time_decimal'] = df['hour'] + df['minutes'] / 60.0
|
38 |
df = df.drop(columns=['hour', 'minutes']) # Drop 'hour' and 'minutes' after creation
|
39 |
logging.info("'time_decimal' feature added.")
|
40 |
else:
|
41 |
-
logging.warning("'hour' and
|
|
|
42 |
return df
|
43 |
|
44 |
def haversine(lon1, lat1, lon2, lat2):
|
45 |
"""
|
46 |
Calculate the great-circle distance between two points on the Earth.
|
47 |
-
|
48 |
:param lon1: Longitude of point 1 (in decimal degrees)
|
49 |
:param lat1: Latitude of point 1 (in decimal degrees)
|
50 |
:param lon2: Longitude of point 2 (in decimal degrees)
|
@@ -65,7 +66,6 @@ def haversine(lon1, lat1, lon2, lat2):
|
|
65 |
def calculate_bearing(lon1, lat1, lon2, lat2):
|
66 |
"""
|
67 |
Calculate the bearing between two points.
|
68 |
-
|
69 |
:param lon1: Longitude of point 1 (in decimal degrees)
|
70 |
:param lat1: Latitude of point 1 (in decimal degrees)
|
71 |
:param lon2: Longitude of point 2 (in decimal degrees)
|
@@ -90,7 +90,6 @@ def calculate_bearing(lon1, lat1, lon2, lat2):
|
|
90 |
def angular_divergence(bearing1, bearing2):
|
91 |
"""
|
92 |
Calculate the smallest angle difference between two bearings.
|
93 |
-
|
94 |
:param bearing1: First bearing in degrees
|
95 |
:param bearing2: Second bearing in degrees
|
96 |
:return: Angular divergence in degrees
|
@@ -101,7 +100,6 @@ def angular_divergence(bearing1, bearing2):
|
|
101 |
def denormalize(scaled_lat, scaled_lon, scaler, lat_idx, lon_idx):
|
102 |
"""
|
103 |
Denormalize latitude and longitude using the scaler's parameters.
|
104 |
-
|
105 |
:param scaled_lat: Scaled latitude values (numpy array).
|
106 |
:param scaled_lon: Scaled longitude values (numpy array).
|
107 |
:param scaler: The scaler object used for normalization.
|
@@ -118,7 +116,7 @@ def denormalize(scaled_lat, scaled_lon, scaler, lat_idx, lon_idx):
|
|
118 |
denorm_lon = scaled_lon * (lon_max - lon_min) + lon_min
|
119 |
return denorm_lat, denorm_lon
|
120 |
|
121 |
-
def create_dataset_grouped_by_mmsi(df_scaled, seq_len, forecast_horizon, features_to_scale):
|
122 |
"""
|
123 |
Create input and output sequences grouped by original MMSI.
|
124 |
Returns scaled last known positions.
|
@@ -137,14 +135,14 @@ def create_dataset_grouped_by_mmsi(df_scaled, seq_len, forecast_horizon, feature
|
|
137 |
# Future positions to predict (scaled)
|
138 |
future_positions = group[['latitude_degrees', 'longitude_degrees']].iloc[i + seq_len:i + seq_len + forecast_horizon].to_numpy()
|
139 |
|
140 |
-
# Future
|
141 |
-
|
142 |
-
|
143 |
|
144 |
-
# Combine sequence with
|
145 |
-
|
146 |
|
147 |
-
Xs.append(
|
148 |
ys.append(future_positions)
|
149 |
mmsis.append(mmsi)
|
150 |
|
@@ -163,7 +161,6 @@ class LSTMModelTeacher(nn.Module):
|
|
163 |
def __init__(self, in_dim, hidden_dim, forecast_horizon, n_layers=7, dropout=0.2):
|
164 |
"""
|
165 |
Teacher LSTM Model.
|
166 |
-
|
167 |
:param in_dim: Number of input features.
|
168 |
:param hidden_dim: Number of hidden units.
|
169 |
:param forecast_horizon: Number of future steps to predict.
|
@@ -187,7 +184,6 @@ class LSTMModelStudent(nn.Module):
|
|
187 |
def __init__(self, in_dim, hidden_dim, forecast_horizon, n_layers=3, dropout=0.2):
|
188 |
"""
|
189 |
Student LSTM Model.
|
190 |
-
|
191 |
:param in_dim: Number of input features.
|
192 |
:param hidden_dim: Number of hidden units.
|
193 |
:param forecast_horizon: Number of future steps to predict.
|
@@ -213,48 +209,58 @@ class LSTMModelStudent(nn.Module):
|
|
213 |
|
214 |
def load_models(model_paths):
|
215 |
"""
|
216 |
-
Load teacher and
|
217 |
-
|
218 |
:param model_paths: Dictionary containing paths to the models.
|
219 |
:return: Dictionary of loaded models.
|
220 |
"""
|
221 |
models = {}
|
222 |
logging.info("Loading Teacher model...")
|
|
|
|
|
223 |
# Load Teacher Model (Global)
|
224 |
-
teacher = LSTMModelTeacher(in_dim=
|
225 |
teacher.load_state_dict(torch.load(model_paths['teacher'], map_location=torch.device('cpu')))
|
226 |
teacher.eval()
|
227 |
models['Teacher'] = teacher
|
228 |
logging.info("Teacher model loaded successfully.")
|
229 |
|
230 |
logging.info("Loading Student North model...")
|
231 |
-
#
|
232 |
-
student_north = LSTMModelStudent(in_dim=
|
233 |
student_north.load_state_dict(torch.load(model_paths['student_north'], map_location=torch.device('cpu')))
|
234 |
student_north.eval()
|
235 |
models['Student_North'] = student_north
|
236 |
logging.info("Student North model loaded successfully.")
|
237 |
|
238 |
logging.info("Loading Student Mid model...")
|
239 |
-
student_mid = LSTMModelStudent(in_dim=
|
240 |
student_mid.load_state_dict(torch.load(model_paths['student_mid'], map_location=torch.device('cpu')))
|
241 |
student_mid.eval()
|
242 |
models['Student_Mid'] = student_mid
|
243 |
logging.info("Student Mid model loaded successfully.")
|
244 |
|
245 |
logging.info("Loading Student South model...")
|
246 |
-
student_south = LSTMModelStudent(in_dim=
|
247 |
student_south.load_state_dict(torch.load(model_paths['student_south'], map_location=torch.device('cpu')))
|
248 |
student_south.eval()
|
249 |
models['Student_South'] = student_south
|
250 |
logging.info("Student South model loaded successfully.")
|
251 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
return models
|
253 |
|
254 |
def load_scalers(scaler_paths):
|
255 |
"""
|
256 |
Load scalers for each model.
|
257 |
-
|
258 |
:param scaler_paths: Dictionary containing paths to the scaler files.
|
259 |
:return: Dictionary of loaded scalers.
|
260 |
"""
|
@@ -275,7 +281,6 @@ def load_scalers(scaler_paths):
|
|
275 |
def determine_subarea(df):
|
276 |
"""
|
277 |
Determine the sub-area (North, Mid, South) based on latitude and longitude ranges.
|
278 |
-
|
279 |
:param df: DataFrame containing 'latitude_degrees' and 'longitude_degrees'.
|
280 |
:return: String indicating the sub-area.
|
281 |
"""
|
@@ -307,24 +312,27 @@ def determine_subarea(df):
|
|
307 |
|
308 |
return predominant_subarea
|
309 |
|
310 |
-
def select_model(models, subarea):
|
311 |
"""
|
312 |
-
Select the appropriate model based on the sub-area.
|
313 |
-
|
314 |
:param models: Dictionary of loaded models.
|
315 |
:param subarea: String indicating the sub-area.
|
|
|
316 |
:return: Tuple of (selected_model, selected_model_name).
|
317 |
"""
|
318 |
-
if
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
|
|
|
|
323 |
else:
|
324 |
-
selected_model = models.get(
|
325 |
-
selected_model_name =
|
326 |
-
|
327 |
-
|
|
|
328 |
|
329 |
# ============================
|
330 |
# Evaluation Metrics Calculation
|
@@ -333,7 +341,6 @@ def select_model(models, subarea):
|
|
333 |
def calculate_classic_metrics(y_true, y_pred):
|
334 |
"""
|
335 |
Calculate MAE, MSE, and RMSE directly on latitude/longitude pairs.
|
336 |
-
|
337 |
:param y_true: Ground truth positions (numpy array of shape (num_samples, 2)).
|
338 |
:param y_pred: Predicted positions (numpy array of shape (num_samples, 2)).
|
339 |
:return: Dictionary containing the classic metrics.
|
@@ -360,7 +367,6 @@ def calculate_classic_metrics(y_true, y_pred):
|
|
360 |
def calculate_distance_metrics(y_true, y_pred):
|
361 |
"""
|
362 |
Calculate metrics based on distance (in kilometers).
|
363 |
-
|
364 |
:param y_true: Ground truth positions (numpy array of shape (num_samples, 2)).
|
365 |
:param y_pred: Predicted positions (numpy array of shape (num_samples, 2)).
|
366 |
:return: Dictionary containing the distance-based metrics.
|
@@ -416,35 +422,13 @@ def classical_prediction(file_path, model_choice, min_mmsi, max_mmsi, models, lo
|
|
416 |
if df.empty:
|
417 |
error_message = "No data available after applying MMSI filters."
|
418 |
logging.error(error_message)
|
419 |
-
return {"error": error_message}, None, None
|
420 |
-
|
421 |
-
# Check if 'time_decimal' exists
|
422 |
-
if 'time_decimal' not in df.columns:
|
423 |
-
df = add_time_decimal_feature(df)
|
424 |
-
else:
|
425 |
-
logging.info("'time_decimal' feature already exists. Skipping creation.")
|
426 |
-
|
427 |
-
expected_columns = [
|
428 |
-
"mmsi", "sog_kt", "latitude_degrees", "longitude_degrees", "cog_degrees",
|
429 |
-
"dimension_a_m", "dimension_b_m", "dimension_c_m", "dimension_d_m",
|
430 |
-
"ship_type", "day", "month", "year", "time_decimal"
|
431 |
-
]
|
432 |
-
if list(df.columns) != expected_columns:
|
433 |
-
error_message = (
|
434 |
-
f"Input data does not have the correct columns.\n"
|
435 |
-
f"Expected columns: {expected_columns}\n"
|
436 |
-
f"Got columns: {list(df.columns)}"
|
437 |
-
)
|
438 |
-
logging.error(error_message)
|
439 |
-
return {"error": error_message}, None, None
|
440 |
-
|
441 |
-
logging.info("Input CSV has the correct columns.")
|
442 |
|
443 |
# Select the appropriate model and scaler
|
444 |
if model_choice == "Auto-Select":
|
445 |
temp_df = df.copy()
|
446 |
subarea = determine_subarea(temp_df)
|
447 |
-
selected_model, selected_model_name = select_model(models, subarea)
|
448 |
scaler = loaded_scalers[selected_model_name]
|
449 |
else:
|
450 |
if model_choice in models:
|
@@ -454,17 +438,50 @@ def classical_prediction(file_path, model_choice, min_mmsi, max_mmsi, models, lo
|
|
454 |
else:
|
455 |
error_message = f"Selected model '{model_choice}' is not available."
|
456 |
logging.error(error_message)
|
457 |
-
return {"error": error_message}, None, None
|
458 |
|
459 |
logging.info(f"Using scaler for model: {selected_model_name}")
|
460 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
# Normalize the data
|
462 |
logging.info("Normalizing the data...")
|
463 |
-
features_to_scale = [
|
464 |
-
"mmsi", "sog_kt", "latitude_degrees", "longitude_degrees", "cog_degrees",
|
465 |
-
"dimension_a_m", "dimension_b_m", "dimension_c_m", "dimension_d_m",
|
466 |
-
"ship_type", "day", "month", "year", "time_decimal"
|
467 |
-
]
|
468 |
X_new = df[features_to_scale]
|
469 |
X_scaled = scaler.transform(X_new)
|
470 |
df_scaled = pd.DataFrame(X_scaled, columns=features_to_scale, index=df.index)
|
@@ -473,12 +490,14 @@ def classical_prediction(file_path, model_choice, min_mmsi, max_mmsi, models, lo
|
|
473 |
# Create sequences and get last known positions (scaled)
|
474 |
seq_len = 24
|
475 |
forecast_horizon = 1
|
476 |
-
X, y, mmsi_seq, last_known_positions_scaled = create_dataset_grouped_by_mmsi(
|
|
|
|
|
477 |
|
478 |
if X.size == 0:
|
479 |
error_message = "Not enough data to create sequences."
|
480 |
logging.error(error_message)
|
481 |
-
return {"error": error_message}, None, None
|
482 |
|
483 |
logging.info(f"Created {X.shape[0]} sequences.")
|
484 |
|
@@ -572,9 +591,13 @@ def abnormal_behavior_detection(prediction_file_path, alpha=0.5, threshold=10.0)
|
|
572 |
|
573 |
# Check if necessary columns exist
|
574 |
expected_columns = [
|
575 |
-
|
576 |
-
|
577 |
-
|
|
|
|
|
|
|
|
|
578 |
]
|
579 |
|
580 |
if not all(col in df.columns for col in expected_columns):
|
@@ -584,7 +607,7 @@ def abnormal_behavior_detection(prediction_file_path, alpha=0.5, threshold=10.0)
|
|
584 |
f"Got columns: {list(df.columns)}"
|
585 |
)
|
586 |
logging.error(error_message)
|
587 |
-
return
|
588 |
|
589 |
# Extract necessary data
|
590 |
mmsi_seq = df['MMSI'].values
|
@@ -660,15 +683,14 @@ def abnormal_behavior_detection(prediction_file_path, alpha=0.5, threshold=10.0)
|
|
660 |
# ============================
|
661 |
|
662 |
def main():
|
663 |
-
|
664 |
-
# Define Model and Scaler Paths
|
665 |
-
# ============================
|
666 |
|
667 |
model_paths = {
|
668 |
-
|
669 |
'student_north': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_North/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_North_seq_24/run_1/best_model.pth',
|
670 |
'student_mid': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_Mid/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_Mid_seq_24/run_1/best_model.pth',
|
671 |
-
'student_south': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_South/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_South_seq_24/run_1/best_model.pth'
|
|
|
672 |
}
|
673 |
|
674 |
scaler_paths = {
|
@@ -676,11 +698,10 @@ def main():
|
|
676 |
'Student_North': 'scaler_train_North_up.joblib',
|
677 |
'Student_Mid': 'scaler_train_Mid_up.joblib',
|
678 |
'Student_South': 'scaler_train_South_up.joblib'
|
|
|
679 |
}
|
680 |
|
681 |
-
|
682 |
-
# Load Models and Scalers
|
683 |
-
# ============================
|
684 |
|
685 |
logging.info("Loading models and scalers...")
|
686 |
models = load_models(model_paths)
|
@@ -693,7 +714,7 @@ def main():
|
|
693 |
inputs=[
|
694 |
gr.File(label="Upload CSV File", type='filepath'),
|
695 |
gr.Dropdown(
|
696 |
-
choices=["Auto-Select", "Teacher", "Student_North", "Student_Mid", "Student_South"],
|
697 |
value="Auto-Select",
|
698 |
label="Choose Model"
|
699 |
),
|
@@ -749,5 +770,4 @@ def main():
|
|
749 |
|
750 |
# Run the app
|
751 |
if __name__ == "__main__":
|
752 |
-
main()
|
753 |
-
|
|
|
|
|
1 |
import torch
|
2 |
import torch.nn as nn
|
3 |
import gradio as gr
|
|
|
27 |
def add_time_decimal_feature(df):
|
28 |
"""
|
29 |
Add 'time_decimal' feature by combining 'hour' and 'minutes'.
|
|
|
30 |
:param df: DataFrame with 'hour' and 'minutes' columns.
|
31 |
:return: DataFrame with 'time_decimal' and without 'hour' and 'minutes'.
|
32 |
"""
|
33 |
+
if 'time_decimal' in df.columns:
|
34 |
+
logging.info("'time_decimal' feature already exists. Skipping creation.")
|
35 |
+
return df
|
36 |
+
elif 'hour' in df.columns and 'minutes' in df.columns:
|
37 |
logging.info("Adding 'time_decimal' feature...")
|
38 |
df['time_decimal'] = df['hour'] + df['minutes'] / 60.0
|
39 |
df = df.drop(columns=['hour', 'minutes']) # Drop 'hour' and 'minutes' after creation
|
40 |
logging.info("'time_decimal' feature added.")
|
41 |
else:
|
42 |
+
logging.warning("Neither 'time_decimal' nor 'hour' and 'minutes' columns found. Cannot create 'time_decimal' feature.")
|
43 |
+
raise ValueError("Input data must contain 'time_decimal' or both 'hour' and 'minutes' columns.")
|
44 |
return df
|
45 |
|
46 |
def haversine(lon1, lat1, lon2, lat2):
|
47 |
"""
|
48 |
Calculate the great-circle distance between two points on the Earth.
|
|
|
49 |
:param lon1: Longitude of point 1 (in decimal degrees)
|
50 |
:param lat1: Latitude of point 1 (in decimal degrees)
|
51 |
:param lon2: Longitude of point 2 (in decimal degrees)
|
|
|
66 |
def calculate_bearing(lon1, lat1, lon2, lat2):
|
67 |
"""
|
68 |
Calculate the bearing between two points.
|
|
|
69 |
:param lon1: Longitude of point 1 (in decimal degrees)
|
70 |
:param lat1: Latitude of point 1 (in decimal degrees)
|
71 |
:param lon2: Longitude of point 2 (in decimal degrees)
|
|
|
90 |
def angular_divergence(bearing1, bearing2):
|
91 |
"""
|
92 |
Calculate the smallest angle difference between two bearings.
|
|
|
93 |
:param bearing1: First bearing in degrees
|
94 |
:param bearing2: Second bearing in degrees
|
95 |
:return: Angular divergence in degrees
|
|
|
100 |
def denormalize(scaled_lat, scaled_lon, scaler, lat_idx, lon_idx):
|
101 |
"""
|
102 |
Denormalize latitude and longitude using the scaler's parameters.
|
|
|
103 |
:param scaled_lat: Scaled latitude values (numpy array).
|
104 |
:param scaled_lon: Scaled longitude values (numpy array).
|
105 |
:param scaler: The scaler object used for normalization.
|
|
|
116 |
denorm_lon = scaled_lon * (lon_max - lon_min) + lon_min
|
117 |
return denorm_lat, denorm_lon
|
118 |
|
119 |
+
def create_dataset_grouped_by_mmsi(df_scaled, seq_len, forecast_horizon, features_to_scale, future_features):
|
120 |
"""
|
121 |
Create input and output sequences grouped by original MMSI.
|
122 |
Returns scaled last known positions.
|
|
|
135 |
# Future positions to predict (scaled)
|
136 |
future_positions = group[['latitude_degrees', 'longitude_degrees']].iloc[i + seq_len:i + seq_len + forecast_horizon].to_numpy()
|
137 |
|
138 |
+
# Future features
|
139 |
+
future_feature_values = group[future_features].iloc[i + seq_len].values
|
140 |
+
future_feature_array = np.tile(future_feature_values, (seq_len, 1))
|
141 |
|
142 |
+
# Combine sequence with future features
|
143 |
+
sequence_with_future_features = np.hstack((sequence, future_feature_array))
|
144 |
|
145 |
+
Xs.append(sequence_with_future_features)
|
146 |
ys.append(future_positions)
|
147 |
mmsis.append(mmsi)
|
148 |
|
|
|
161 |
def __init__(self, in_dim, hidden_dim, forecast_horizon, n_layers=7, dropout=0.2):
|
162 |
"""
|
163 |
Teacher LSTM Model.
|
|
|
164 |
:param in_dim: Number of input features.
|
165 |
:param hidden_dim: Number of hidden units.
|
166 |
:param forecast_horizon: Number of future steps to predict.
|
|
|
184 |
def __init__(self, in_dim, hidden_dim, forecast_horizon, n_layers=3, dropout=0.2):
|
185 |
"""
|
186 |
Student LSTM Model.
|
|
|
187 |
:param in_dim: Number of input features.
|
188 |
:param hidden_dim: Number of hidden units.
|
189 |
:param forecast_horizon: Number of future steps to predict.
|
|
|
209 |
|
210 |
def load_models(model_paths):
|
211 |
"""
|
212 |
+
Load teacher, student, and cargo vessel models, including submodels for North, Mid, and South areas.
|
|
|
213 |
:param model_paths: Dictionary containing paths to the models.
|
214 |
:return: Dictionary of loaded models.
|
215 |
"""
|
216 |
models = {}
|
217 |
logging.info("Loading Teacher model...")
|
218 |
+
# Teacher model input dimension
|
219 |
+
teacher_in_dim = 15 # Features including 'future_hour_feature' (time_decimal)
|
220 |
# Load Teacher Model (Global)
|
221 |
+
teacher = LSTMModelTeacher(in_dim=teacher_in_dim, hidden_dim=200, forecast_horizon=1, n_layers=7, dropout=0.2)
|
222 |
teacher.load_state_dict(torch.load(model_paths['teacher'], map_location=torch.device('cpu')))
|
223 |
teacher.eval()
|
224 |
models['Teacher'] = teacher
|
225 |
logging.info("Teacher model loaded successfully.")
|
226 |
|
227 |
logging.info("Loading Student North model...")
|
228 |
+
# Student North model input dimension is the same as teacher
|
229 |
+
student_north = LSTMModelStudent(in_dim=teacher_in_dim, hidden_dim=200, forecast_horizon=1, n_layers=3, dropout=0.2)
|
230 |
student_north.load_state_dict(torch.load(model_paths['student_north'], map_location=torch.device('cpu')))
|
231 |
student_north.eval()
|
232 |
models['Student_North'] = student_north
|
233 |
logging.info("Student North model loaded successfully.")
|
234 |
|
235 |
logging.info("Loading Student Mid model...")
|
236 |
+
student_mid = LSTMModelStudent(in_dim=teacher_in_dim, hidden_dim=200, forecast_horizon=1, n_layers=3, dropout=0.2)
|
237 |
student_mid.load_state_dict(torch.load(model_paths['student_mid'], map_location=torch.device('cpu')))
|
238 |
student_mid.eval()
|
239 |
models['Student_Mid'] = student_mid
|
240 |
logging.info("Student Mid model loaded successfully.")
|
241 |
|
242 |
logging.info("Loading Student South model...")
|
243 |
+
student_south = LSTMModelStudent(in_dim=teacher_in_dim, hidden_dim=200, forecast_horizon=1, n_layers=3, dropout=0.2)
|
244 |
student_south.load_state_dict(torch.load(model_paths['student_south'], map_location=torch.device('cpu')))
|
245 |
student_south.eval()
|
246 |
models['Student_South'] = student_south
|
247 |
logging.info("Student South model loaded successfully.")
|
248 |
|
249 |
+
# Load Cargo Vessel model
|
250 |
+
logging.info("Loading Cargo Vessel model...")
|
251 |
+
# Cargo Vessel model input dimension
|
252 |
+
cargo_in_dim = 13 + 3 # 13 features (without 'year') + 3 future features ('day', 'month', 'time_decimal')
|
253 |
+
cargo_model = LSTMModelTeacher(in_dim=cargo_in_dim, hidden_dim=200, forecast_horizon=1, n_layers=10, dropout=0.2)
|
254 |
+
cargo_model.load_state_dict(torch.load(model_paths['cargo_vessel'], map_location=torch.device('cpu')))
|
255 |
+
cargo_model.eval()
|
256 |
+
models['Cargo_Vessel'] = cargo_model
|
257 |
+
logging.info("Cargo Vessel model loaded successfully.")
|
258 |
+
|
259 |
return models
|
260 |
|
261 |
def load_scalers(scaler_paths):
|
262 |
"""
|
263 |
Load scalers for each model.
|
|
|
264 |
:param scaler_paths: Dictionary containing paths to the scaler files.
|
265 |
:return: Dictionary of loaded scalers.
|
266 |
"""
|
|
|
281 |
def determine_subarea(df):
|
282 |
"""
|
283 |
Determine the sub-area (North, Mid, South) based on latitude and longitude ranges.
|
|
|
284 |
:param df: DataFrame containing 'latitude_degrees' and 'longitude_degrees'.
|
285 |
:return: String indicating the sub-area.
|
286 |
"""
|
|
|
312 |
|
313 |
return predominant_subarea
|
314 |
|
315 |
+
def select_model(models, subarea, model_choice):
|
316 |
"""
|
317 |
+
Select the appropriate model based on the sub-area and model choice.
|
|
|
318 |
:param models: Dictionary of loaded models.
|
319 |
:param subarea: String indicating the sub-area.
|
320 |
+
:param model_choice: String indicating the selected model.
|
321 |
:return: Tuple of (selected_model, selected_model_name).
|
322 |
"""
|
323 |
+
if model_choice == "Auto-Select":
|
324 |
+
if subarea in ['North', 'Mid', 'South']:
|
325 |
+
selected_model = models.get(f'Student_{subarea}')
|
326 |
+
selected_model_name = f'Student_{subarea}'
|
327 |
+
else:
|
328 |
+
selected_model = models.get('Teacher')
|
329 |
+
selected_model_name = 'Teacher'
|
330 |
else:
|
331 |
+
selected_model = models.get(model_choice)
|
332 |
+
selected_model_name = model_choice
|
333 |
+
|
334 |
+
logging.info(f"Selected model: {selected_model_name}")
|
335 |
+
return selected_model, selected_model_name
|
336 |
|
337 |
# ============================
|
338 |
# Evaluation Metrics Calculation
|
|
|
341 |
def calculate_classic_metrics(y_true, y_pred):
|
342 |
"""
|
343 |
Calculate MAE, MSE, and RMSE directly on latitude/longitude pairs.
|
|
|
344 |
:param y_true: Ground truth positions (numpy array of shape (num_samples, 2)).
|
345 |
:param y_pred: Predicted positions (numpy array of shape (num_samples, 2)).
|
346 |
:return: Dictionary containing the classic metrics.
|
|
|
367 |
def calculate_distance_metrics(y_true, y_pred):
|
368 |
"""
|
369 |
Calculate metrics based on distance (in kilometers).
|
|
|
370 |
:param y_true: Ground truth positions (numpy array of shape (num_samples, 2)).
|
371 |
:param y_pred: Predicted positions (numpy array of shape (num_samples, 2)).
|
372 |
:return: Dictionary containing the distance-based metrics.
|
|
|
422 |
if df.empty:
|
423 |
error_message = "No data available after applying MMSI filters."
|
424 |
logging.error(error_message)
|
425 |
+
return {"error": error_message}, None, None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
426 |
|
427 |
# Select the appropriate model and scaler
|
428 |
if model_choice == "Auto-Select":
|
429 |
temp_df = df.copy()
|
430 |
subarea = determine_subarea(temp_df)
|
431 |
+
selected_model, selected_model_name = select_model(models, subarea, model_choice)
|
432 |
scaler = loaded_scalers[selected_model_name]
|
433 |
else:
|
434 |
if model_choice in models:
|
|
|
438 |
else:
|
439 |
error_message = f"Selected model '{model_choice}' is not available."
|
440 |
logging.error(error_message)
|
441 |
+
return {"error": error_message}, None, None, None
|
442 |
|
443 |
logging.info(f"Using scaler for model: {selected_model_name}")
|
444 |
|
445 |
+
# Adjust features_to_scale based on the selected model
|
446 |
+
if selected_model_name == 'Cargo_Vessel':
|
447 |
+
features_to_scale = [
|
448 |
+
"mmsi", "sog_kt", "latitude_degrees", "longitude_degrees", "cog_degrees",
|
449 |
+
"dimension_a_m", "dimension_b_m", "dimension_c_m", "dimension_d_m",
|
450 |
+
"ship_type", "day", "month", "time_decimal"
|
451 |
+
]
|
452 |
+
future_features = ['day', 'month', 'time_decimal']
|
453 |
+
else:
|
454 |
+
features_to_scale = [
|
455 |
+
"mmsi", "sog_kt", "latitude_degrees", "longitude_degrees", "cog_degrees",
|
456 |
+
"dimension_a_m", "dimension_b_m", "dimension_c_m", "dimension_d_m",
|
457 |
+
"ship_type", "day", "month", "year", "time_decimal"
|
458 |
+
]
|
459 |
+
future_features = ['time_decimal']
|
460 |
+
|
461 |
+
# Check if the necessary columns exist
|
462 |
+
expected_columns = features_to_scale
|
463 |
+
if not all(col in df.columns for col in expected_columns):
|
464 |
+
error_message = (
|
465 |
+
f"Input data does not have the correct columns.\n"
|
466 |
+
f"Expected columns for {selected_model_name}: {expected_columns}\n"
|
467 |
+
f"Got columns: {list(df.columns)}"
|
468 |
+
)
|
469 |
+
logging.error(error_message)
|
470 |
+
return {"error": error_message}, None, None, None
|
471 |
+
|
472 |
+
logging.info("Input CSV has the correct columns.")
|
473 |
+
|
474 |
+
# Check and add 'time_decimal' if necessary
|
475 |
+
if selected_model_name != 'Cargo_Vessel':
|
476 |
+
df = add_time_decimal_feature(df)
|
477 |
+
else:
|
478 |
+
if 'time_decimal' not in df.columns:
|
479 |
+
error_message = "Cargo model requires 'time_decimal' column."
|
480 |
+
logging.error(error_message)
|
481 |
+
return {"error": error_message}, None, None, None
|
482 |
+
|
483 |
# Normalize the data
|
484 |
logging.info("Normalizing the data...")
|
|
|
|
|
|
|
|
|
|
|
485 |
X_new = df[features_to_scale]
|
486 |
X_scaled = scaler.transform(X_new)
|
487 |
df_scaled = pd.DataFrame(X_scaled, columns=features_to_scale, index=df.index)
|
|
|
490 |
# Create sequences and get last known positions (scaled)
|
491 |
seq_len = 24
|
492 |
forecast_horizon = 1
|
493 |
+
X, y, mmsi_seq, last_known_positions_scaled = create_dataset_grouped_by_mmsi(
|
494 |
+
df_scaled, seq_len, forecast_horizon, features_to_scale, future_features
|
495 |
+
)
|
496 |
|
497 |
if X.size == 0:
|
498 |
error_message = "Not enough data to create sequences."
|
499 |
logging.error(error_message)
|
500 |
+
return {"error": error_message}, None, None, None
|
501 |
|
502 |
logging.info(f"Created {X.shape[0]} sequences.")
|
503 |
|
|
|
591 |
|
592 |
# Check if necessary columns exist
|
593 |
expected_columns = [
|
594 |
+
'MMSI',
|
595 |
+
'Last Known Latitude',
|
596 |
+
'Last Known Longitude',
|
597 |
+
'Predicted Latitude',
|
598 |
+
'Predicted Longitude',
|
599 |
+
'Real Latitude',
|
600 |
+
'Real Longitude'
|
601 |
]
|
602 |
|
603 |
if not all(col in df.columns for col in expected_columns):
|
|
|
607 |
f"Got columns: {list(df.columns)}"
|
608 |
)
|
609 |
logging.error(error_message)
|
610 |
+
return None, error_message
|
611 |
|
612 |
# Extract necessary data
|
613 |
mmsi_seq = df['MMSI'].values
|
|
|
683 |
# ============================
|
684 |
|
685 |
def main():
|
686 |
+
|
|
|
|
|
687 |
|
688 |
model_paths = {
|
689 |
+
'teacher': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256/horizon_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_seq_24/run_1/best_model.pth',
|
690 |
'student_north': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_North/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_North_seq_24/run_1/best_model.pth',
|
691 |
'student_mid': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_Mid/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_Mid_seq_24/run_1/best_model.pth',
|
692 |
+
'student_south': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_South/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_South_seq_24/run_1/best_model.pth',
|
693 |
+
'cargo_vessel': 'Cago_final/LSTMModel_cargo_horizon1_with_month_day_time_input_batch256/horizon_data_LSTMModel_cargo_horizon1_with_month_day_time_input_batch256_seq_24/run_1/best_model.pth'
|
694 |
}
|
695 |
|
696 |
scaler_paths = {
|
|
|
698 |
'Student_North': 'scaler_train_North_up.joblib',
|
699 |
'Student_Mid': 'scaler_train_Mid_up.joblib',
|
700 |
'Student_South': 'scaler_train_South_up.joblib'
|
701 |
+
'Cargo_Vessel': 'scaler_features_cargo_up_final.joblib' # Add this line
|
702 |
}
|
703 |
|
704 |
+
-
|
|
|
|
|
705 |
|
706 |
logging.info("Loading models and scalers...")
|
707 |
models = load_models(model_paths)
|
|
|
714 |
inputs=[
|
715 |
gr.File(label="Upload CSV File", type='filepath'),
|
716 |
gr.Dropdown(
|
717 |
+
choices=["Auto-Select", "Teacher", "Student_North", "Student_Mid", "Student_South", "Cargo_Vessel"],
|
718 |
value="Auto-Select",
|
719 |
label="Choose Model"
|
720 |
),
|
|
|
770 |
|
771 |
# Run the app
|
772 |
if __name__ == "__main__":
|
773 |
+
main()
|
|