OnsAouedi commited on
Commit
a02dc18
·
verified ·
1 Parent(s): 81ea98b

Delete Maginet_inference

Browse files
Maginet_inference/.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Maginet_inference/Dockerfile DELETED
@@ -1,30 +0,0 @@
1
- FROM python:3.11-slim
2
-
3
- WORKDIR /app
4
-
5
- # Install system dependencies
6
- RUN apt-get update && apt-get install -y \
7
- build-essential \
8
- && rm -rf /var/lib/apt/lists/*
9
-
10
- # Copy requirements first for better caching
11
- COPY requirements.txt .
12
-
13
- # Install Python dependencies
14
- RUN pip install --no-cache-dir -r requirements.txt
15
-
16
- # Copy application files
17
- COPY . .
18
-
19
- # Create necessary directories
20
- RUN mkdir -p uploads results
21
-
22
- # Expose port
23
- EXPOSE 7860
24
-
25
- # Set environment variables
26
- ENV PYTHONPATH=/app
27
- ENV FLASK_APP=app.py
28
-
29
- # Run the application
30
- CMD ["python", "app.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Maginet_inference/README.md DELETED
@@ -1,57 +0,0 @@
1
- ---
2
- title: Vessel Trajectory Inference
3
- emoji: 🚢
4
- colorFrom: blue
5
- colorTo: purple
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- ---
10
-
11
- # 🚢 Vessel Trajectory Inference
12
-
13
- Advanced zero-shot vessel trajectory prediction using LSTM with Attention and Residual connections.
14
-
15
- ## Features
16
-
17
- - **Real-time Inference**: Upload CSV data and get instant predictions
18
- - **Interactive UI**: Modern web interface with progress tracking
19
- - **Error Analysis**: Comprehensive error metrics and distribution visualization
20
- - **Download Results**: Export predictions as CSV with detailed analysis
21
-
22
- ## Model Details
23
-
24
- - **Architecture**: LSTMWithAttentionWithResid (7 layers, 250 hidden units)
25
- - **Training Data**: Atlantic Ocean vessel trajectories
26
- - **Sequence Length**: 12 time steps
27
- - **Forecast Horizon**: 1 time step ahead
28
- - **Input Features**: latitude/longitude velocities, positions, time features
29
-
30
- ## Required Data Format
31
-
32
- Your CSV file should contain these columns:
33
- - `segment` - vessel trajectory segment ID
34
- - `latitude_velocity_km` - latitude velocity in km/h
35
- - `longitude_velocity_km` - longitude velocity in km/h
36
- - `latitude_degrees` - latitude position
37
- - `longitude_degrees` - longitude position
38
- - `time_difference_hours` - time between observations
39
- - `time_scalar` or `datetime` - temporal reference
40
-
41
- ## Usage
42
-
43
- 1. Upload your preprocessed vessel trajectory CSV
44
- 2. (Optional) Upload custom model (.pth) or normalization (.json) files
45
- 3. Click "Start Inference" to begin prediction
46
- 4. View real-time progress and error statistics
47
- 5. Download results CSV with predictions and analysis
48
-
49
- ## Performance
50
-
51
- - Mean prediction error: ~2-5 km (varies by dataset)
52
- - Processing speed: ~1000 predictions/second on CPU
53
- - Supports files up to 500MB
54
-
55
- ## Citation
56
-
57
- If you use this model, please cite the original research on vessel trajectory prediction using LSTM networks with attention mechanisms.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Maginet_inference/app.py DELETED
@@ -1,819 +0,0 @@
1
- #!/usr/bin/env python3
2
-
3
- import os
4
- import json
5
- import time
6
- import numpy as np
7
- import pandas as pd
8
- import torch
9
- import torch.nn as nn
10
- import torch.nn.functional as F
11
- from torch.utils.data import TensorDataset, DataLoader
12
- from tqdm import tqdm
13
- from flask import Flask, render_template, request, jsonify, send_file
14
- from flask_socketio import SocketIO, emit
15
- import tempfile
16
- import threading
17
- from pathlib import Path
18
- from werkzeug.utils import secure_filename
19
-
20
- app = Flask(__name__)
21
- app.config['SECRET_KEY'] = 'your-secret-key-here'
22
- app.config['UPLOAD_FOLDER'] = 'uploads'
23
- app.config['MAX_CONTENT_LENGTH'] = 500 * 1024 * 1024 # 500MB max file size
24
- socketio = SocketIO(app, cors_allowed_origins="*")
25
-
26
- # Ensure upload directory exists
27
- os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
28
-
29
- # Global variables for progress tracking
30
- current_progress = {'step': 'idle', 'progress': 0, 'details': ''}
31
-
32
- ########################################
33
- # MODEL DEFINITION #
34
- ########################################
35
-
36
- class LSTMWithAttentionWithResid(nn.Module):
37
- def __init__(self, in_dim, hidden_dim, forecast_horizon, n_layers=10, dropout=0.2):
38
- super(LSTMWithAttentionWithResid, self).__init__()
39
- self.hidden_dim = hidden_dim
40
- self.forecast_horizon = forecast_horizon
41
-
42
- # Embedding layer
43
- self.embedding = nn.Linear(in_dim, hidden_dim)
44
-
45
- # LSTM layers
46
- self.lstm = nn.LSTM(
47
- hidden_dim, hidden_dim, num_layers=n_layers, dropout=dropout, batch_first=True
48
- )
49
-
50
- # Layer normalization after residual connection
51
- self.layer_norm = nn.LayerNorm(hidden_dim)
52
-
53
- # Attention mechanism
54
- self.attention = nn.Linear(hidden_dim, hidden_dim)
55
- self.context_vector = nn.Linear(hidden_dim, 1, bias=False) # Linear layer for scoring
56
-
57
- # Fully connected layer to map attention context to output
58
- self.fc = nn.Linear(hidden_dim, forecast_horizon * 2)
59
-
60
- def forward(self, x):
61
- # x: [batch_size, seq_len, in_dim]
62
-
63
- # Pass through embedding layer
64
- x_embed = self.embedding(x) # [batch_size, seq_len, hidden_dim]
65
-
66
- # Pass through LSTM
67
- lstm_output, (hidden, cell) = self.lstm(x_embed) # [batch_size, seq_len, hidden_dim]
68
-
69
- # Add residual connection (out-of-place)
70
- lstm_output = lstm_output + x_embed # [batch_size, seq_len, hidden_dim]
71
-
72
- # Apply layer normalization
73
- lstm_output = self.layer_norm(lstm_output) # [batch_size, seq_len, hidden_dim]
74
-
75
- # Compute attention scores
76
- attention_weights = torch.tanh(self.attention(lstm_output)) # [batch_size, seq_len, hidden_dim]
77
- attention_scores = self.context_vector(attention_weights).squeeze(-1) # [batch_size, seq_len]
78
-
79
- # Apply softmax to normalize scores
80
- attention_weights = F.softmax(attention_scores, dim=1) # [batch_size, seq_len]
81
-
82
- # Compute the context vector as a weighted sum of LSTM outputs
83
- context_vector = torch.bmm(
84
- attention_weights.unsqueeze(1), lstm_output
85
- ) # [batch_size, 1, hidden_dim]
86
- context_vector = context_vector.squeeze(1) # [batch_size, hidden_dim]
87
-
88
- # Pass context vector through fully connected layer for forecasting
89
- output = self.fc(context_vector) # [batch_size, forecast_horizon * 2]
90
-
91
- # Reshape output to match the expected shape
92
- output = output.view(-1, self.forecast_horizon, 2) # [batch_size, forecast_horizon, 2]
93
-
94
- return output
95
-
96
- ########################################
97
- # UTILITY FUNCTIONS #
98
- ########################################
99
-
100
- def update_progress(step, progress, details=""):
101
- """Update global progress state"""
102
- global current_progress
103
- current_progress = {
104
- 'step': step,
105
- 'progress': progress,
106
- 'details': details
107
- }
108
- socketio.emit('progress_update', current_progress)
109
-
110
- def create_sequences_grouped_by_segment_lat_long_veloc(df_scaled, seq_len=12, forecast_horizon=1, features_to_scale=None):
111
- """
112
- For each segment, creates overlapping sequences of length seq_len.
113
- Returns:
114
- - Xs: input sequences,
115
- - ys: target outputs (future latitude and longitude velocities),
116
- - segments: corresponding segment IDs,
117
- - last_positions: last known positions from each sequence.
118
- """
119
- update_progress('Creating sequences', 10, f'Processing {len(df_scaled)} data points...')
120
-
121
- Xs, ys, segments, last_positions = [], [], [], []
122
-
123
- if features_to_scale is None:
124
- # CRITICAL: Match YOUR EXACT inference logic (segment first, then removed)
125
- features_to_scale = [
126
- "segment", # Index 0 - will be removed before model
127
- "latitude_velocity_km", # Index 1 -> 0 after segment removal
128
- "longitude_velocity_km", # Index 2 -> 1 after segment removal
129
- "latitude_degrees", # Index 3 -> 2 after segment removal
130
- "longitude_degrees", # Index 4 -> 3 after segment removal
131
- "time_difference_hours", # Index 5 -> 4 after segment removal
132
- "time_scalar" # Index 6 -> 5 after segment removal
133
- ]
134
-
135
- # Verify all required features exist
136
- missing_features = [f for f in features_to_scale if f not in df_scaled.columns]
137
- if missing_features:
138
- raise ValueError(f"Missing required features: {missing_features}")
139
-
140
- grouped = df_scaled.groupby('segment')
141
- total_segments = len(grouped)
142
-
143
- for i, (segment_id, group) in enumerate(grouped):
144
- group = group.reset_index(drop=True)
145
- L = len(group)
146
-
147
- # Progress update
148
- if i % max(1, total_segments // 20) == 0:
149
- progress = 10 + (i / total_segments) * 30 # 10-40% range
150
- update_progress('Creating sequences', progress,
151
- f'Processing segment {i+1}/{total_segments}')
152
-
153
- if L >= seq_len + forecast_horizon:
154
- for j in range(L - seq_len - forecast_horizon + 1):
155
- # Get sequence features
156
- seq = group.iloc[j:(j+seq_len)][features_to_scale].to_numpy()
157
-
158
- # Get future time scalar for the forecast horizon
159
- future_time = group['time_scalar'].iloc[j + seq_len + forecast_horizon - 1]
160
- future_time_feature = np.full((seq_len, 1), future_time)
161
-
162
- # Augment sequence with future time
163
- seq_aug = np.hstack((seq, future_time_feature))
164
- Xs.append(seq_aug)
165
-
166
- # Target: future velocity
167
- target = group[['latitude_velocity_km', 'longitude_velocity_km']].iloc[j + seq_len + forecast_horizon - 1].to_numpy()
168
- ys.append(target)
169
-
170
- segments.append(segment_id)
171
-
172
- # Last known position
173
- last_pos = group[['latitude_degrees', 'longitude_degrees']].iloc[j + seq_len - 1].to_numpy()
174
- last_positions.append(last_pos)
175
-
176
- return (np.array(Xs, dtype=np.float32),
177
- np.array(ys, dtype=np.float32),
178
- np.array(segments),
179
- np.array(last_positions, dtype=np.float32))
180
-
181
- def load_normalization_params(json_path):
182
- """Load normalization parameters from JSON file"""
183
- with open(json_path, "r") as f:
184
- normalization_params = json.load(f)
185
- return normalization_params["feature_mins"], normalization_params["feature_maxs"]
186
-
187
- def minmax_denormalize(scaled_series, feature_min, feature_max):
188
- """Denormalize data using min-max scaling"""
189
- return scaled_series * (feature_max - feature_min) + feature_min
190
-
191
- ########################################
192
- # INFERENCE PIPELINE #
193
- ########################################
194
-
195
- def run_inference_pipeline(csv_file_path, model_path, normalization_path):
196
- """Complete inference pipeline following Final_inference_maginet.py logic"""
197
-
198
- try:
199
- # Step 1: Load and validate data
200
- update_progress('Loading data', 5, 'Reading CSV file...')
201
-
202
- # Enhanced CSV parsing with error handling
203
- try:
204
- # Determine separator by reading first few lines
205
- with open(csv_file_path, 'r') as f:
206
- first_line = f.readline()
207
- separator = ';' if ';' in first_line else ','
208
-
209
- # Try reading with detected separator
210
- df = pd.read_csv(csv_file_path, sep=separator, on_bad_lines='skip')
211
- update_progress('Loading data', 8, f'Loaded {len(df)} rows with separator "{separator}"')
212
-
213
- # Debug: Print actual column names
214
- print(f"🔍 CSV COLUMNS FOUND: {list(df.columns)}")
215
- update_progress('Loading data', 8.5, f'Columns: {list(df.columns)}')
216
-
217
- except Exception as e:
218
- print(f"❌ CSV PARSING ERROR: {e}")
219
- # Try alternative parsing methods
220
- try:
221
- df = pd.read_csv(csv_file_path, sep=',', on_bad_lines='skip')
222
- update_progress('Loading data', 8, f'Loaded {len(df)} rows with comma separator (fallback)')
223
- print(f"🔍 CSV COLUMNS FOUND (fallback): {list(df.columns)}")
224
- except Exception as e2:
225
- try:
226
- df = pd.read_csv(csv_file_path, sep=';', on_bad_lines='skip')
227
- update_progress('Loading data', 8, f'Loaded {len(df)} rows with semicolon separator (fallback)')
228
- print(f"🔍 CSV COLUMNS FOUND (fallback): {list(df.columns)}")
229
- except Exception as e3:
230
- raise ValueError(f"Could not parse CSV file. Tried multiple separators. Errors: {e}, {e2}, {e3}")
231
-
232
- # CRITICAL: Create time_scalar (was missing from inference dataset!)
233
- if 'time_scalar' not in df.columns:
234
- if 'datetime' in df.columns:
235
- # Convert datetime to time_scalar (preferred method)
236
- df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce')
237
- reference_date = pd.Timestamp('2023-01-01')
238
- df['time_scalar'] = ((df['datetime'] - reference_date) / pd.Timedelta(days=1)).round(8)
239
- update_progress('Loading data', 9, 'Created time_scalar from datetime column')
240
- elif 'time_decimal' in df.columns:
241
- # Use time_decimal directly as time_scalar (alternative method)
242
- df['time_scalar'] = df['time_decimal'].copy()
243
- update_progress('Loading data', 9, 'Created time_scalar from time_decimal column')
244
- elif all(col in df.columns for col in ['day', 'month', 'time_decimal']):
245
- # Create datetime from components and then time_scalar
246
- df['year'] = df.get('year', 2024) # Default year if not present
247
- df['datetime'] = pd.to_datetime(df[['year', 'month', 'day']], errors='coerce')
248
- df['datetime'] += pd.to_timedelta(df['time_decimal'], unit='h')
249
- reference_date = pd.Timestamp('2023-01-01')
250
- df['time_scalar'] = ((df['datetime'] - reference_date) / pd.Timedelta(days=1)).round(8)
251
- update_progress('Loading data', 9, 'Created time_scalar from day/month/time_decimal')
252
- else:
253
- # Create a simple sequential time_scalar based on row order
254
- df['time_scalar'] = df.index / len(df)
255
- update_progress('Loading data', 9, 'Created sequential time_scalar')
256
-
257
- # Validate required columns with detailed error reporting
258
- required_columns = [
259
- 'segment', 'latitude_velocity_km', 'longitude_velocity_km',
260
- 'latitude_degrees', 'longitude_degrees', 'time_difference_hours', 'time_scalar'
261
- ]
262
-
263
- print(f"🔍 REQUIRED COLUMNS: {required_columns}")
264
- print(f"🔍 ACTUAL COLUMNS: {list(df.columns)}")
265
-
266
- missing_columns = [col for col in required_columns if col not in df.columns]
267
- if missing_columns:
268
- available_cols = list(df.columns)
269
- error_msg = f"""
270
- ❌ COLUMN VALIDATION ERROR:
271
- Missing required columns: {missing_columns}
272
- Available columns: {available_cols}
273
-
274
- Column mapping suggestions:
275
- - Check for extra spaces or different naming
276
- - Verify CSV file format and encoding
277
- - Ensure time_scalar column exists or can be created
278
- """
279
- print(error_msg)
280
- raise ValueError(f"Missing required columns: {missing_columns}. Available: {available_cols}")
281
-
282
- # CRITICAL: Apply the SAME data filtering as training/notebook
283
- update_progress('Filtering data', 10, 'Applying quality filters...')
284
- original_count = len(df)
285
-
286
- # 1. Calculate speed column if missing (CRITICAL!)
287
- if 'speed_km_h' not in df.columns:
288
- df['speed_km_h'] = np.sqrt(df['latitude_velocity_km']**2 + df['longitude_velocity_km']**2)
289
- update_progress('Filtering data', 10.5, 'Calculated speed_km_h column')
290
-
291
- # 2. Speed filtering - EXACTLY like training
292
- df = df[(df['speed_km_h'] >= 2) & (df['speed_km_h'] <= 60)].copy()
293
- update_progress('Filtering data', 11, f'Speed filter: {original_count} -> {len(df)} rows')
294
-
295
- # 3. Velocity filtering - CRITICAL for performance!
296
- velocity_mask = (
297
- (np.abs(df['latitude_velocity_km']) <= 100) &
298
- (np.abs(df['longitude_velocity_km']) <= 100) &
299
- (df['time_difference_hours'] > 0) &
300
- (df['time_difference_hours'] <= 24) # Max 24 hours between points
301
- )
302
- df = df[velocity_mask].copy()
303
- update_progress('Filtering data', 12, f'Velocity filter: -> {len(df)} rows')
304
-
305
- # 4. Segment length filtering - Remove segments with < 20 points
306
- segment_counts = df['segment'].value_counts()
307
- segments_to_remove = segment_counts[segment_counts < 20].index
308
- before_segment_filter = len(df)
309
- df = df[~df['segment'].isin(segments_to_remove)].copy()
310
- update_progress('Filtering data', 13, f'Segment filter: {before_segment_filter} -> {len(df)} rows')
311
-
312
- # 5. Remove NaN and infinite values
313
- df = df.dropna().copy()
314
- numeric_cols = ['latitude_velocity_km', 'longitude_velocity_km', 'time_difference_hours']
315
- for col in numeric_cols:
316
- if col in df.columns:
317
- df = df[~np.isinf(df[col])].copy()
318
-
319
- # DEBUGGING: Add detailed filtering statistics
320
- filtered_count = len(df)
321
- filter_percent = ((original_count - filtered_count) / original_count) * 100
322
- update_progress('Filtering data', 14, f'Final filtered data: {filtered_count} rows ({original_count - filtered_count} removed = {filter_percent:.1f}%)')
323
-
324
- # Debug info for analysis
325
- print(f"🔍 FILTERING SUMMARY:")
326
- print(f" Original: {original_count:,} rows")
327
- print(f" Final: {filtered_count:,} rows")
328
- print(f" Removed: {original_count - filtered_count:,} ({filter_percent:.1f}%)")
329
-
330
- if len(df) == 0:
331
- raise ValueError("No data remaining after quality filtering. Check your input data quality.")
332
-
333
- # Step 2: Load normalization parameters
334
- update_progress('Loading normalization', 12, 'Loading normalization parameters...')
335
- feature_mins, feature_maxs = load_normalization_params(normalization_path)
336
-
337
- # Step 2.5: CRITICAL - Normalize the test data (missing step causing 3373km error!)
338
- update_progress('Normalizing data', 15, 'Applying normalization to test data...')
339
- features_to_normalize = ['latitude_velocity_km', 'longitude_velocity_km',
340
- 'latitude_degrees', 'longitude_degrees',
341
- 'time_difference_hours', 'time_scalar']
342
-
343
- for feature in features_to_normalize:
344
- if feature in df.columns and feature in feature_mins:
345
- min_val = feature_mins[feature]
346
- max_val = feature_maxs[feature]
347
- rng = max_val - min_val if max_val != min_val else 1
348
- df[feature] = (df[feature] - min_val) / rng
349
- update_progress('Normalizing data', 18, f'Normalized {feature}')
350
-
351
- # Step 3: Create sequences
352
- SEQ_LENGTH = 12
353
- FORECAST_HORIZON = 1
354
-
355
- X_test, y_test, test_segments, last_known_positions_scaled = create_sequences_grouped_by_segment_lat_long_veloc(
356
- df, seq_len=SEQ_LENGTH, forecast_horizon=FORECAST_HORIZON
357
- )
358
-
359
- update_progress('Preparing model', 45, f'Created {len(X_test)} sequences')
360
-
361
- if len(X_test) == 0:
362
- raise ValueError("No valid sequences could be created. Check your data and sequence length requirements.")
363
-
364
- # Step 4: Prepare data for model
365
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
366
- X_test_tensor = torch.from_numpy(X_test).float().to(device)
367
- y_test_tensor = torch.from_numpy(y_test).float().to(device)
368
- test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
369
- test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
370
-
371
- # Step 5: Load model
372
- update_progress('Loading model', 50, 'Loading trained model...')
373
-
374
- # CRITICAL: Model expects 6 features (segment removed) + 1 future_time = 7 total
375
- in_dim = X_test.shape[2] - 1 # Remove segment column dimension
376
- # CRITICAL: Match the exact model architecture from Atlantic model weights
377
- hidden_dim = 250 # From best_model.pth
378
- n_layers = 7 # From best_model.pth (CRITICAL: not 10!)
379
- dropout = 0.2
380
-
381
- model = LSTMWithAttentionWithResid(
382
- in_dim, hidden_dim, FORECAST_HORIZON,
383
- n_layers=n_layers, dropout=dropout
384
- ).to(device)
385
-
386
- model.load_state_dict(torch.load(model_path, map_location=device))
387
- model.eval()
388
-
389
- # Step 6: Run inference
390
- update_progress('Running inference', 60, 'Making predictions...')
391
-
392
- # CRITICAL: Extract features batch-by-batch like your notebook
393
- all_preds = []
394
- segments_extracted = []
395
- time_scalars_extracted = []
396
- time_diff_hours_extracted = []
397
-
398
- with torch.no_grad():
399
- for i, batch in enumerate(test_loader):
400
- x_batch, _ = batch
401
-
402
- # CRITICAL: Extract features exactly like your notebook
403
- segment_batch = x_batch[:, 0, 0].cpu().numpy() # Take segment from first time step
404
- time_scalar_batch = x_batch[:, -1, 6].cpu().numpy() # LAST timestep, index 6 = time_scalar
405
- time_diff_hours_batch = x_batch[:, 0, 5].cpu().numpy() # First timestep, index 5
406
-
407
- segments_extracted.extend(segment_batch)
408
- time_scalars_extracted.extend(time_scalar_batch)
409
- time_diff_hours_extracted.extend(time_diff_hours_batch)
410
-
411
- # Remove segment column before model input
412
- x_batch_no_segment = x_batch[:, :, 1:] # Remove segment (index 0) but keep all other features
413
- preds = model(x_batch_no_segment)
414
- all_preds.append(preds.cpu().numpy())
415
-
416
- # Progress update
417
- progress = 60 + (i / len(test_loader)) * 20 # 60-80% range
418
- update_progress('Running inference', progress,
419
- f'Processing batch {i+1}/{len(test_loader)}')
420
-
421
- all_preds = np.concatenate(all_preds, axis=0)
422
-
423
- # Step 7: Process results
424
- update_progress('Processing results', 80, 'Processing predictions...')
425
-
426
- # CRITICAL: Reshape predictions exactly like your notebook
427
- yhat = torch.from_numpy(all_preds)
428
- yhat = yhat.view(-1, 2) # Reshape to [batch_size, 2] - EXACTLY like your notebook
429
-
430
- # Extract predictions exactly like your notebook
431
- predicted_lat_vel = yhat[:, 0].numpy() # Predicted lat velocity
432
- predicted_lon_vel = yhat[:, 1].numpy() # Predicted lon velocity
433
-
434
- # Extract actual values exactly like your notebook
435
- y_real = y_test_tensor.cpu()
436
- actual_lat_vel = y_real[:, 0].numpy() # Actual lat velocity
437
- actual_lon_vel = y_real[:, 1].numpy() # Actual lon velocity
438
-
439
- # CRITICAL: Use extracted features from batches (matching your notebook exactly)
440
- # Ensure all arrays have consistent length
441
- num_samples = len(predicted_lat_vel)
442
- segments_extracted = segments_extracted[:num_samples]
443
- time_scalars_extracted = time_scalars_extracted[:num_samples]
444
- time_diff_hours_extracted = time_diff_hours_extracted[:num_samples]
445
- last_known_positions_scaled = last_known_positions_scaled[:num_samples]
446
-
447
- # Create results dataframe exactly like your notebook
448
- results_df = pd.DataFrame({
449
- 'segment': segments_extracted, # From batch extraction
450
- 'time_difference_hours': time_diff_hours_extracted, # From batch extraction (first timestep)
451
- 'Time Scalar': time_scalars_extracted, # From batch extraction (LAST timestep)
452
- 'Last Known Latitude': [pos[0] for pos in last_known_positions_scaled],
453
- 'Last Known Longitude': [pos[1] for pos in last_known_positions_scaled],
454
- 'predicted_lat_km': predicted_lat_vel,
455
- 'predicted_lon_km': predicted_lon_vel,
456
- 'actual_lat_km': actual_lat_vel,
457
- 'actual_lon_km': actual_lon_vel
458
- })
459
-
460
- # Step 8: Denormalize results
461
- update_progress('Denormalizing results', 85, 'Converting to real units...')
462
-
463
- # Column to feature mapping (COMPLETE mapping for all denormalizable columns)
464
- column_to_feature = {
465
- "predicted_lat_km": "latitude_velocity_km",
466
- "predicted_lon_km": "longitude_velocity_km",
467
- "actual_lat_km": "latitude_velocity_km",
468
- "actual_lon_km": "longitude_velocity_km",
469
- "Last Known Latitude": "latitude_degrees",
470
- "Last Known Longitude": "longitude_degrees",
471
- "time_difference_hours": "time_difference_hours",
472
- "Time Scalar": "time_scalar"
473
- }
474
-
475
- # Denormalize relevant columns
476
- for col, feat in column_to_feature.items():
477
- if col in results_df.columns and feat in feature_mins:
478
- fmin = feature_mins[feat]
479
- fmax = feature_maxs[feat]
480
- results_df[col + "_unscaled"] = minmax_denormalize(results_df[col], fmin, fmax)
481
- update_progress('Denormalizing results', 85, f'Denormalized {col}')
482
-
483
- # Ensure all required _unscaled columns exist
484
- required_unscaled_cols = [
485
- 'predicted_lat_km_unscaled', 'predicted_lon_km_unscaled',
486
- 'actual_lat_km_unscaled', 'actual_lon_km_unscaled',
487
- 'Last Known Latitude_unscaled', 'Last Known Longitude_unscaled',
488
- 'time_difference_hours_unscaled'
489
- ]
490
-
491
- for col in required_unscaled_cols:
492
- if col not in results_df.columns:
493
- base_col = col.replace('_unscaled', '')
494
- if base_col in results_df.columns:
495
- # If base column exists but wasn't denormalized, copy it
496
- results_df[col] = results_df[base_col]
497
- update_progress('Denormalizing results', 87, f'Created missing {col}')
498
- else:
499
- results_df[col] = 0.0
500
- update_progress('Denormalizing results', 87, f'Defaulted missing {col} to 0')
501
-
502
- # ---------------------------
503
- # NEW: Clip predicted velocities to realistic physical bounds to avoid huge errors
504
- # ---------------------------
505
- VELOCITY_RANGE_KM_H = (-100, 100) # Same limits used during input filtering
506
- results_df["predicted_lat_km_unscaled"] = results_df["predicted_lat_km_unscaled"].clip(*VELOCITY_RANGE_KM_H)
507
- results_df["predicted_lon_km_unscaled"] = results_df["predicted_lon_km_unscaled"].clip(*VELOCITY_RANGE_KM_H)
508
- update_progress('Denormalizing results', 88, 'Clipped predicted velocities to realistic range')
509
-
510
- # Step 9: Calculate final positions and errors (EXACT column structure matching your notebook)
511
- update_progress('Calculating errors', 90, 'Computing prediction errors...')
512
-
513
- # Compute displacement components (in km)
514
- results_df["pred_final_lat_km_component"] = (
515
- results_df["predicted_lat_km_unscaled"] * results_df["time_difference_hours_unscaled"]
516
- )
517
- results_df["pred_final_lon_km_component"] = (
518
- results_df["predicted_lon_km_unscaled"] * results_df["time_difference_hours_unscaled"]
519
- )
520
- results_df["actual_final_lat_km_component"] = (
521
- results_df["actual_lat_km_unscaled"] * results_df["time_difference_hours_unscaled"]
522
- )
523
- results_df["actual_final_lon_km_component"] = (
524
- results_df["actual_lon_km_unscaled"] * results_df["time_difference_hours_unscaled"]
525
- )
526
-
527
- # Calculate total displacement magnitudes (MISSING COLUMNS!)
528
- results_df["pred_final_km"] = np.sqrt(
529
- results_df["pred_final_lat_km_component"]**2 + results_df["pred_final_lon_km_component"]**2
530
- )
531
- results_df["actual_final_km"] = np.sqrt(
532
- results_df["actual_final_lat_km_component"]**2 + results_df["actual_final_lon_km_component"]**2
533
- )
534
-
535
- # Calculate Euclidean distance error (in km)
536
- results_df["error_km"] = np.sqrt(
537
- (results_df["pred_final_lat_km_component"] - results_df["actual_final_lat_km_component"])**2 +
538
- (results_df["pred_final_lon_km_component"] - results_df["actual_final_lon_km_component"])**2
539
- )
540
-
541
- # Compute final positions in degrees
542
- km_per_deg_lat = 111 # approximate conversion for latitude
543
- results_df["pred_final_lat_deg"] = results_df["Last Known Latitude_unscaled"] + (
544
- results_df["predicted_lat_km_unscaled"] * results_df["time_difference_hours_unscaled"]
545
- ) / km_per_deg_lat
546
- results_df["actual_final_lat_deg"] = results_df["Last Known Latitude_unscaled"] + (
547
- results_df["actual_lat_km_unscaled"] * results_df["time_difference_hours_unscaled"]
548
- ) / km_per_deg_lat
549
-
550
- # Account for longitude scaling by latitude
551
- results_df["Last_Known_Lat_rad"] = np.deg2rad(results_df["Last Known Latitude_unscaled"])
552
- results_df["pred_final_lon_deg"] = results_df["Last Known Longitude_unscaled"] + (
553
- results_df["predicted_lon_km_unscaled"] * results_df["time_difference_hours_unscaled"]
554
- ) / (km_per_deg_lat * np.cos(results_df["Last_Known_Lat_rad"]))
555
- results_df["actual_final_lon_deg"] = results_df["Last Known Longitude_unscaled"] + (
556
- results_df["actual_lon_km_unscaled"] * results_df["time_difference_hours_unscaled"]
557
- ) / (km_per_deg_lat * np.cos(results_df["Last_Known_Lat_rad"]))
558
-
559
- # Step 10: Reorder columns to match your EXACT specification
560
- update_progress('Finalizing results', 93, 'Reordering columns to match notebook format...')
561
-
562
- # EXACT column order as specified by user
563
- column_order = [
564
- 'segment', 'time_difference_hours', 'Time Scalar', 'Last Known Latitude', 'Last Known Longitude',
565
- 'predicted_lat_km', 'predicted_lon_km', 'actual_lat_km', 'actual_lon_km',
566
- 'predicted_lat_km_unscaled', 'predicted_lon_km_unscaled', 'actual_lat_km_unscaled', 'actual_lon_km_unscaled',
567
- 'Last Known Latitude_unscaled', 'Last Known Longitude_unscaled', 'time_difference_hours_unscaled',
568
- 'pred_final_km', 'actual_final_km',
569
- 'pred_final_lat_km_component', 'pred_final_lon_km_component',
570
- 'actual_final_lat_km_component', 'actual_final_lon_km_component',
571
- 'error_km', 'pred_final_lat_deg', 'actual_final_lat_deg', 'Last_Known_Lat_rad',
572
- 'pred_final_lon_deg', 'actual_final_lon_deg'
573
- ]
574
-
575
- # Validate all required columns exist - add missing ones with defaults if needed
576
- missing_columns = [col for col in column_order if col not in results_df.columns]
577
- if missing_columns:
578
- update_progress('Finalizing results', 94, f'Adding missing columns: {missing_columns}')
579
- for col in missing_columns:
580
- # Add default values for any missing columns
581
- if '_unscaled' in col:
582
- # For unscaled columns, try to find the original scaled column
583
- base_col = col.replace('_unscaled', '')
584
- if base_col in results_df.columns and base_col in column_to_feature:
585
- # Use the same denormalization process
586
- feat = column_to_feature[base_col]
587
- if feat in feature_mins:
588
- fmin = feature_mins[feat]
589
- fmax = feature_maxs[feat]
590
- results_df[col] = minmax_denormalize(results_df[base_col], fmin, fmax)
591
- else:
592
- results_df[col] = results_df[base_col] # No denormalization available
593
- else:
594
- results_df[col] = 0.0 # Default to 0
595
- else:
596
- results_df[col] = 0.0 # Default to 0 for any other missing columns
597
-
598
- # Reorder columns to match exact specification
599
- results_df = results_df[column_order]
600
-
601
- # Step 11: Save results
602
- update_progress('Saving results', 95, 'Saving inference results...')
603
-
604
- # Create results directory
605
- results_dir = Path('results/inference_atlantic')
606
- results_dir.mkdir(parents=True, exist_ok=True)
607
-
608
- # Save to results directory
609
- timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
610
- results_file = results_dir / f'inference_results_{timestamp}.csv'
611
- results_df.to_csv(results_file, index=False)
612
-
613
- # Also save to temporary file for compatibility
614
- output_file = tempfile.NamedTemporaryFile(
615
- mode='w', suffix='_inference_results.csv', delete=False
616
- )
617
- results_df.to_csv(output_file.name, index=False)
618
-
619
- # CRITICAL: Calculate SAME regression metrics as your notebook
620
- # Convert predictions and actuals to tensors for metric calculation
621
- yhat_tensor = torch.from_numpy(np.column_stack([predicted_lat_vel, predicted_lon_vel])).float()
622
- y_real_tensor = torch.from_numpy(np.column_stack([actual_lat_vel, actual_lon_vel])).float()
623
-
624
- # Calculate regression metrics exactly like your notebook
625
- def calc_metrics_like_notebook(preds, labels):
626
- """Calculate metrics exactly like your notebook's calc_metrics function"""
627
- EPS = 1e-8
628
- mse = torch.mean((preds - labels) ** 2)
629
- mae = torch.mean(torch.abs(preds - labels))
630
- rmse = torch.sqrt(mse)
631
- mape = torch.mean(torch.abs((preds - labels) / (labels + EPS))) * 100 # Convert to percentage
632
- rse = torch.sum((preds - labels) ** 2) / torch.sum((labels + EPS) ** 2)
633
- return rse.item(), mae.item(), mse.item(), mape.item(), rmse.item()
634
-
635
- # Calculate regression metrics on velocity predictions
636
- rse, mae, mse, mape, rmse = calc_metrics_like_notebook(yhat_tensor, y_real_tensor)
637
-
638
- # Calculate summary statistics
639
- error_stats = {
640
- # Distance-based metrics (web app specific)
641
- 'mean_error_km': float(results_df["error_km"].mean()),
642
- 'median_error_km': float(results_df["error_km"].median()),
643
- 'std_error_km': float(results_df["error_km"].std()),
644
- 'min_error_km': float(results_df["error_km"].min()),
645
- 'max_error_km': float(results_df["error_km"].max()),
646
-
647
- # Regression metrics (matching your notebook)
648
- 'rse': rse,
649
- 'mae': mae,
650
- 'mse': mse,
651
- 'mape': mape,
652
- 'rmse': rmse,
653
-
654
- # General stats
655
- 'total_predictions': len(results_df),
656
- 'total_segments': len(results_df['segment'].unique()),
657
- 'columns_generated': list(results_df.columns),
658
- 'total_columns': len(results_df.columns)
659
- }
660
-
661
- # NEW: Create histogram of error distribution (30 bins by default)
662
- hist_counts, bin_edges = np.histogram(results_df["error_km"], bins=30)
663
- histogram_data = {
664
- 'bins': bin_edges.tolist(),
665
- 'counts': hist_counts.tolist()
666
- }
667
-
668
- update_progress('Complete', 100,
669
- f'✅ Inference complete! Distance: {error_stats["mean_error_km"]:.2f} km | MAE: {error_stats["mae"]:.2f} | MAPE: {error_stats["mape"]:.2f}%')
670
-
671
- # Emit inference_complete with full statistics and histogram for the frontend chart
672
- try:
673
- socketio.emit('inference_complete', {
674
- 'success': True,
675
- 'stats': error_stats,
676
- 'histogram': histogram_data
677
- })
678
- except Exception:
679
- pass # In case we are in CLI context without SocketIO
680
-
681
- return {
682
- 'success': True,
683
- 'results_file': output_file.name,
684
- 'stats': error_stats,
685
- 'histogram': histogram_data,
686
- 'message': f'Successfully processed {len(results_df)} predictions'
687
- }
688
-
689
- except Exception as e:
690
- error_msg = f"Error during inference: {str(e)}"
691
- update_progress('Error', 0, error_msg)
692
- return {
693
- 'success': False,
694
- 'error': error_msg
695
- }
696
-
697
- ########################################
698
- # WEB ROUTES #
699
- ########################################
700
-
701
- @app.route('/')
702
- def index():
703
- return render_template('vessel_inference.html')
704
-
705
- @app.route('/upload', methods=['POST'])
706
- def upload_file():
707
- try:
708
- # Check if files were uploaded
709
- if 'csv_file' not in request.files:
710
- return jsonify({'success': False, 'error': 'No CSV file uploaded'})
711
-
712
- csv_file = request.files['csv_file']
713
- if csv_file.filename == '':
714
- return jsonify({'success': False, 'error': 'No CSV file selected'})
715
-
716
- # Default model and normalization files
717
- model_path = 'best_model.pth'
718
- normalization_path = 'normalization_params_1_atlanttic_regular_intervals_with_lat_lon_velocity_and_time_difference_filter_outlier_segment_min_20_points.json'
719
-
720
- # Check for optional uploads
721
- if 'model_file' in request.files and request.files['model_file'].filename != '':
722
- model_file = request.files['model_file']
723
- model_filename = secure_filename(model_file.filename)
724
- model_path = os.path.join(app.config['UPLOAD_FOLDER'], model_filename)
725
- model_file.save(model_path)
726
-
727
- if 'normalization_file' in request.files and request.files['normalization_file'].filename != '':
728
- norm_file = request.files['normalization_file']
729
- norm_filename = secure_filename(norm_file.filename)
730
- normalization_path = os.path.join(app.config['UPLOAD_FOLDER'], norm_filename)
731
- norm_file.save(normalization_path)
732
-
733
- # Check if required files exist
734
- if not os.path.exists(model_path):
735
- return jsonify({'success': False, 'error': f'Model file not found: {model_path}'})
736
-
737
- if not os.path.exists(normalization_path):
738
- return jsonify({'success': False, 'error': f'Normalization file not found: {normalization_path}'})
739
-
740
- # Save CSV file
741
- csv_filename = secure_filename(csv_file.filename)
742
- csv_path = os.path.join(app.config['UPLOAD_FOLDER'], csv_filename)
743
- csv_file.save(csv_path)
744
-
745
- # Start inference in background thread
746
- def run_inference_background():
747
- return run_inference_pipeline(csv_path, model_path, normalization_path)
748
-
749
- thread = threading.Thread(target=run_inference_background)
750
- thread.start()
751
-
752
- return jsonify({'success': True, 'message': 'Files uploaded successfully. Inference started.'})
753
-
754
- except Exception as e:
755
- return jsonify({'success': False, 'error': str(e)})
756
-
757
- @app.route('/progress')
758
- def get_progress():
759
- return jsonify(current_progress)
760
-
761
- @app.route('/download_results')
762
- def download_results():
763
- # Find the most recent results file
764
- upload_dir = app.config['UPLOAD_FOLDER']
765
- temp_dir = tempfile.gettempdir()
766
-
767
- # Look for results files in both directories
768
- for directory in [upload_dir, temp_dir]:
769
- if os.path.exists(directory):
770
- files = [f for f in os.listdir(directory) if f.endswith('_inference_results.csv')]
771
- if files:
772
- latest_file = max(files, key=lambda x: os.path.getctime(os.path.join(directory, x)))
773
- return send_file(
774
- os.path.join(directory, latest_file),
775
- as_attachment=True,
776
- download_name='vessel_inference_results.csv'
777
- )
778
-
779
- return jsonify({'error': 'No results file found'}), 404
780
-
781
- ########################################
782
- # SOCKETIO EVENTS #
783
- ########################################
784
-
785
- @socketio.on('connect')
786
- def handle_connect():
787
- emit('progress_update', current_progress)
788
-
789
- @socketio.on('start_inference')
790
- def handle_start_inference(data):
791
- """Handle inference request via WebSocket"""
792
- try:
793
- csv_path = data.get('csv_path')
794
- model_path = data.get('model_path', 'best_model.pth')
795
- norm_path = data.get('normalization_path', 'normalization_params_1_atlanttic_regular_intervals_with_lat_lon_velocity_and_time_difference_filter_outlier_segment_min_20_points.json')
796
-
797
- def run_inference_background():
798
- result = run_inference_pipeline(csv_path, model_path, norm_path)
799
- emit('inference_complete', result)
800
-
801
- thread = threading.Thread(target=run_inference_background)
802
- thread.start()
803
-
804
- except Exception as e:
805
- emit('inference_complete', {'success': False, 'error': str(e)})
806
-
807
- if __name__ == '__main__':
808
- print("🚢 Vessel Trajectory Inference Web App")
809
- print("📊 Using Final_inference_maginet.py logic")
810
-
811
- # Get port from environment variable (Hugging Face Spaces uses 7860)
812
- port = int(os.environ.get('PORT', 7860))
813
- print(f"🌐 Starting server at http://0.0.0.0:{port}")
814
- print("📝 Make sure you have:")
815
- print(" - best_model.pth")
816
- print(" - normalization_params_1_atlanttic_regular_intervals_...json")
817
- print(" - Your test dataset CSV")
818
-
819
- socketio.run(app, host='0.0.0.0', port=port, debug=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Maginet_inference/requirements.txt DELETED
@@ -1,13 +0,0 @@
1
- # Core ML and Data Processing
2
- torch>=1.9.0,<2.1.0
3
- pandas>=1.3.0,<2.1.0
4
- numpy>=1.21.0,<1.25.0
5
- tqdm>=4.62.0
6
-
7
- # Web Interface
8
- flask>=2.0.0,<3.0.0
9
- flask-socketio>=5.1.0,<6.0.0
10
- werkzeug>=2.0.0,<3.0.0
11
-
12
- # Additional utilities
13
- pathlib
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Maginet_inference/templates/vessel_inference.html DELETED
@@ -1,681 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>🚢 Vessel Trajectory Inference</title>
7
- <script src="https://cdn.socket.io/4.0.0/socket.io.min.js"></script>
8
- <!-- Chart.js for error distribution plot -->
9
- <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/chart.umd.min.js"></script>
10
- <style>
11
- * {
12
- margin: 0;
13
- padding: 0;
14
- box-sizing: border-box;
15
- }
16
-
17
- body {
18
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
19
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
20
- min-height: 100vh;
21
- padding: 20px;
22
- }
23
-
24
- .container {
25
- max-width: 900px;
26
- margin: 0 auto;
27
- background: white;
28
- border-radius: 15px;
29
- box-shadow: 0 20px 40px rgba(0,0,0,0.1);
30
- overflow: hidden;
31
- }
32
-
33
- .header {
34
- background: linear-gradient(135deg, #2c3e50 0%, #3498db 100%);
35
- color: white;
36
- padding: 30px;
37
- text-align: center;
38
- }
39
-
40
- .header h1 {
41
- font-size: 2.5rem;
42
- margin-bottom: 10px;
43
- text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
44
- }
45
-
46
- .header p {
47
- font-size: 1.1rem;
48
- opacity: 0.9;
49
- margin-bottom: 5px;
50
- }
51
-
52
- .subtitle {
53
- font-size: 0.95rem !important;
54
- font-style: italic;
55
- opacity: 0.8 !important;
56
- }
57
-
58
- .content {
59
- padding: 40px;
60
- }
61
-
62
- .data-format-info {
63
- background: #e8f4fd;
64
- border: 2px solid #3498db;
65
- border-radius: 10px;
66
- padding: 20px;
67
- margin-bottom: 30px;
68
- }
69
-
70
- .data-format-info h3 {
71
- color: #2c3e50;
72
- margin-bottom: 15px;
73
- font-size: 1.3rem;
74
- }
75
-
76
- .required-columns {
77
- display: grid;
78
- grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
79
- gap: 10px;
80
- margin-top: 15px;
81
- }
82
-
83
- .column-item {
84
- background: white;
85
- padding: 10px;
86
- border-radius: 5px;
87
- border-left: 4px solid #3498db;
88
- font-family: 'Courier New', monospace;
89
- font-size: 0.9rem;
90
- }
91
-
92
- .upload-section {
93
- margin-bottom: 30px;
94
- }
95
-
96
- .upload-section h3 {
97
- color: #2c3e50;
98
- margin-bottom: 15px;
99
- font-size: 1.3rem;
100
- }
101
-
102
- .file-upload {
103
- border: 2px dashed #3498db;
104
- border-radius: 10px;
105
- padding: 30px;
106
- text-align: center;
107
- background: #f8f9fa;
108
- margin-bottom: 20px;
109
- transition: all 0.3s ease;
110
- }
111
-
112
- .file-upload:hover {
113
- border-color: #2980b9;
114
- background: #e8f4fd;
115
- }
116
-
117
- .file-upload input[type="file"] {
118
- display: none;
119
- }
120
-
121
- .file-upload label {
122
- display: block;
123
- cursor: pointer;
124
- font-size: 1.1rem;
125
- color: #2c3e50;
126
- }
127
-
128
- .file-upload .upload-icon {
129
- font-size: 3rem;
130
- margin-bottom: 10px;
131
- color: #3498db;
132
- }
133
-
134
- .file-info {
135
- margin-top: 10px;
136
- padding: 10px;
137
- background: #d4edda;
138
- border-radius: 5px;
139
- border: 1px solid #c3e6cb;
140
- display: none;
141
- }
142
-
143
- .optional-uploads {
144
- display: grid;
145
- grid-template-columns: 1fr 1fr;
146
- gap: 20px;
147
- margin-top: 20px;
148
- }
149
-
150
- .optional-upload {
151
- border: 1px solid #dee2e6;
152
- border-radius: 8px;
153
- padding: 20px;
154
- background: #f8f9fa;
155
- }
156
-
157
- .optional-upload h4 {
158
- color: #495057;
159
- margin-bottom: 10px;
160
- font-size: 1rem;
161
- }
162
-
163
- .optional-upload input[type="file"] {
164
- width: 100%;
165
- padding: 8px;
166
- border: 1px solid #ced4da;
167
- border-radius: 4px;
168
- font-size: 0.9rem;
169
- }
170
-
171
- .btn {
172
- background: linear-gradient(135deg, #3498db 0%, #2980b9 100%);
173
- color: white;
174
- border: none;
175
- padding: 15px 30px;
176
- font-size: 1.1rem;
177
- border-radius: 8px;
178
- cursor: pointer;
179
- transition: all 0.3s ease;
180
- width: 100%;
181
- margin-top: 20px;
182
- }
183
-
184
- .btn:hover {
185
- transform: translateY(-2px);
186
- box-shadow: 0 5px 15px rgba(52, 152, 219, 0.4);
187
- }
188
-
189
- .btn:disabled {
190
- background: #95a5a6;
191
- cursor: not-allowed;
192
- transform: none;
193
- box-shadow: none;
194
- }
195
-
196
- .progress-container {
197
- display: none;
198
- margin-top: 30px;
199
- padding: 20px;
200
- background: #f8f9fa;
201
- border-radius: 10px;
202
- border: 1px solid #dee2e6;
203
- }
204
-
205
- .progress-bar {
206
- width: 100%;
207
- height: 25px;
208
- background: #e9ecef;
209
- border-radius: 15px;
210
- overflow: hidden;
211
- margin-bottom: 15px;
212
- }
213
-
214
- .progress-fill {
215
- height: 100%;
216
- background: linear-gradient(90deg, #3498db, #2ecc71);
217
- width: 0%;
218
- transition: width 0.3s ease;
219
- border-radius: 15px;
220
- }
221
-
222
- .progress-text {
223
- text-align: center;
224
- font-weight: bold;
225
- color: #2c3e50;
226
- margin-bottom: 10px;
227
- }
228
-
229
- .progress-details {
230
- text-align: center;
231
- color: #7f8c8d;
232
- font-size: 0.9rem;
233
- }
234
-
235
- .results-container {
236
- display: none;
237
- margin-top: 30px;
238
- padding: 20px;
239
- background: #d4edda;
240
- border-radius: 10px;
241
- border: 1px solid #c3e6cb;
242
- }
243
-
244
- .results-stats {
245
- display: grid;
246
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
247
- gap: 15px;
248
- margin-bottom: 20px;
249
- }
250
-
251
- .stat-item {
252
- background: white;
253
- padding: 15px;
254
- border-radius: 8px;
255
- text-align: center;
256
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
257
- }
258
-
259
- .stat-value {
260
- font-size: 1.5rem;
261
- font-weight: bold;
262
- color: #2c3e50;
263
- }
264
-
265
- .stat-label {
266
- font-size: 0.9rem;
267
- color: #7f8c8d;
268
- margin-top: 5px;
269
- }
270
-
271
- .download-btn {
272
- background: linear-gradient(135deg, #27ae60 0%, #229954 100%);
273
- margin-top: 0;
274
- }
275
-
276
- .download-btn:hover {
277
- box-shadow: 0 5px 15px rgba(39, 174, 96, 0.4);
278
- }
279
-
280
- .error-container {
281
- display: none;
282
- margin-top: 20px;
283
- padding: 20px;
284
- background: #f8d7da;
285
- border-radius: 10px;
286
- border: 1px solid #f5c6cb;
287
- color: #721c24;
288
- }
289
-
290
- .model-info {
291
- background: #fff3cd;
292
- border: 1px solid #ffeaa7;
293
- border-radius: 8px;
294
- padding: 15px;
295
- margin-bottom: 20px;
296
- }
297
-
298
- .model-info h4 {
299
- color: #856404;
300
- margin-bottom: 10px;
301
- }
302
-
303
- .model-info ul {
304
- margin-left: 20px;
305
- color: #856404;
306
- }
307
-
308
- @media (max-width: 768px) {
309
- .container {
310
- margin: 10px;
311
- border-radius: 10px;
312
- }
313
-
314
- .header {
315
- padding: 20px;
316
- }
317
-
318
- .header h1 {
319
- font-size: 2rem;
320
- }
321
-
322
- .content {
323
- padding: 20px;
324
- }
325
-
326
- .optional-uploads {
327
- grid-template-columns: 1fr;
328
- }
329
-
330
- .results-stats {
331
- grid-template-columns: 1fr;
332
- }
333
- }
334
- </style>
335
- </head>
336
- <body>
337
- <div class="container">
338
- <div class="header">
339
- <h1>🚢 Vessel Trajectory Inference</h1>
340
- <p>Vessel trajectory prediction using our model logic</p>
341
- <p class="subtitle">Upload your preprocessed dataset with segments and velocity features</p>
342
- </div>
343
-
344
- <div class="content">
345
- <!-- Data Format Information -->
346
- <div class="data-format-info">
347
- <h3>📋 Required Data Format</h3>
348
- <p>Your CSV file should contain the following columns (automatically detects ';' or ',' separator):</p>
349
- <div class="required-columns">
350
- <div class="column-item">segment</div>
351
- <div class="column-item">latitude_velocity_km</div>
352
- <div class="column-item">longitude_velocity_km</div>
353
- <div class="column-item">latitude_degrees</div>
354
- <div class="column-item">longitude_degrees</div>
355
- <div class="column-item">time_difference_hours</div>
356
- <div class="column-item">time_scalar (or datetime)</div>
357
- </div>
358
- <p style="margin-top: 15px; font-style: italic; color: #495057;">
359
- ✅ Your data format is supported! The app will automatically create time_scalar from datetime/time_decimal if needed.
360
- </p>
361
- </div>
362
-
363
- <!-- Model Information -->
364
- <div class="model-info">
365
- <h4>🤖 Default Model Configuration</h4>
366
- <ul>
367
- <li><strong>Model:</strong> LSTMWithAttentionWithResid (7 layers, 250 hidden units)</li>
368
- <li><strong>Training:</strong> Atlantic Ocean vessel trajectories</li>
369
- <li><strong>Normalization:</strong> Atlantic dataset parameters</li>
370
- <li><strong>Sequence Length:</strong> 12 time steps</li>
371
- <li><strong>Forecast Horizon:</strong> 1 time step</li>
372
- </ul>
373
- </div>
374
-
375
- <!-- File Upload Section -->
376
- <div class="upload-section">
377
- <h3>📁 Upload Files</h3>
378
-
379
- <!-- Main CSV Upload -->
380
- <div class="file-upload" onclick="document.getElementById('csv_file').click()">
381
- <div class="upload-icon">📊</div>
382
- <label for="csv_file">
383
- <strong>Select Your Inference Dataset (CSV)</strong><br>
384
- Click here or drag and drop your CSV file
385
- </label>
386
- <input type="file" id="csv_file" accept=".csv" onchange="handleFileSelect(this, 'csv')">
387
- <div id="csv_file_info" class="file-info"></div>
388
- </div>
389
-
390
- <!-- Optional Uploads -->
391
- <div class="optional-uploads">
392
- <div class="optional-upload">
393
- <h4>🧠 Custom Model (Optional)</h4>
394
- <input type="file" id="model_file" accept=".pth,.pt" onchange="handleFileSelect(this, 'model')">
395
- <small>Default: best_model.pth</small>
396
- </div>
397
-
398
- <div class="optional-upload">
399
- <h4>⚙️ Custom Normalization (Optional)</h4>
400
- <input type="file" id="normalization_file" accept=".json" onchange="handleFileSelect(this, 'norm')">
401
- <small>Default: Atlantic dataset parameters</small>
402
- </div>
403
- </div>
404
- </div>
405
-
406
- <!-- Submit Button -->
407
- <button class="btn" id="submit_btn" onclick="startInference()" disabled>
408
- 🚀 Start Inference
409
- </button>
410
-
411
- <!-- Progress Container -->
412
- <div class="progress-container" id="progress_container">
413
- <div class="progress-text" id="progress_text">Initializing...</div>
414
- <div class="progress-bar">
415
- <div class="progress-fill" id="progress_fill"></div>
416
- </div>
417
- <div class="progress-details" id="progress_details">Please wait...</div>
418
- </div>
419
-
420
- <!-- Results Container -->
421
- <div class="results-container" id="results_container">
422
- <h3>📈 Inference Results</h3>
423
- <div class="results-stats" id="results_stats">
424
- <!-- Stats will be populated dynamically -->
425
- </div>
426
- <button class="btn download-btn" onclick="downloadResults()">
427
- 💾 Download Results CSV
428
- </button>
429
-
430
- <!-- Error Distribution Chart -->
431
- <div style="margin-top:30px;">
432
- <canvas id="errorChart" height="220"></canvas>
433
- </div>
434
- </div>
435
-
436
- <!-- Error Container -->
437
- <div class="error-container" id="error_container">
438
- <h3>❌ Error</h3>
439
- <p id="error_message"></p>
440
- </div>
441
- </div>
442
- </div>
443
-
444
- <script>
445
- const socket = io();
446
- let inferenceInProgress = false;
447
-
448
- // File selection handlers
449
- function handleFileSelect(input, type) {
450
- const file = input.files[0];
451
- if (file) {
452
- const infoDiv = document.getElementById(input.id + '_info');
453
- if (infoDiv) {
454
- infoDiv.style.display = 'block';
455
- infoDiv.innerHTML = `📁 Selected: ${file.name} (${(file.size / 1024 / 1024).toFixed(2)} MB)`;
456
- }
457
-
458
- // Enable submit button if CSV is selected
459
- if (type === 'csv') {
460
- document.getElementById('submit_btn').disabled = false;
461
- }
462
- }
463
- }
464
-
465
- // Start inference
466
- function startInference() {
467
- if (inferenceInProgress) return;
468
-
469
- const csvFile = document.getElementById('csv_file').files[0];
470
- if (!csvFile) {
471
- alert('Please select a CSV file first!');
472
- return;
473
- }
474
-
475
- inferenceInProgress = true;
476
- document.getElementById('submit_btn').disabled = true;
477
- document.getElementById('progress_container').style.display = 'block';
478
- document.getElementById('results_container').style.display = 'none';
479
- document.getElementById('error_container').style.display = 'none';
480
-
481
- // Prepare form data
482
- const formData = new FormData();
483
- formData.append('csv_file', csvFile);
484
-
485
- const modelFile = document.getElementById('model_file').files[0];
486
- if (modelFile) {
487
- formData.append('model_file', modelFile);
488
- }
489
-
490
- const normFile = document.getElementById('normalization_file').files[0];
491
- if (normFile) {
492
- formData.append('normalization_file', normFile);
493
- }
494
-
495
- // Upload files and start inference
496
- fetch('/upload', {
497
- method: 'POST',
498
- body: formData
499
- })
500
- .then(response => response.json())
501
- .then(data => {
502
- if (!data.success) {
503
- showError(data.error);
504
- resetUI();
505
- }
506
- })
507
- .catch(error => {
508
- showError('Upload failed: ' + error.message);
509
- resetUI();
510
- });
511
- }
512
-
513
- // Download results
514
- function downloadResults() {
515
- window.location.href = '/download_results';
516
- }
517
-
518
- // Show error
519
- function showError(message) {
520
- document.getElementById('error_container').style.display = 'block';
521
- document.getElementById('error_message').textContent = message;
522
- document.getElementById('progress_container').style.display = 'none';
523
- }
524
-
525
- // Reset UI
526
- function resetUI() {
527
- inferenceInProgress = false;
528
- document.getElementById('submit_btn').disabled = !document.getElementById('csv_file').files[0];
529
- document.getElementById('progress_container').style.display = 'none';
530
- }
531
-
532
- // Show results
533
- function showResults(stats, histogram) {
534
- const resultsStats = document.getElementById('results_stats');
535
- resultsStats.innerHTML = `
536
- <div class="stat-item">
537
- <div class="stat-value">${stats.mean_error_km.toFixed(2)} km</div>
538
- <div class="stat-label">Mean Error</div>
539
- </div>
540
- <div class="stat-item">
541
- <div class="stat-value">${stats.median_error_km.toFixed(2)} km</div>
542
- <div class="stat-label">Median Error</div>
543
- </div>
544
- <div class="stat-item">
545
- <div class="stat-value">${stats.total_predictions.toLocaleString()}</div>
546
- <div class="stat-label">Total Predictions</div>
547
- </div>
548
- <div class="stat-item">
549
- <div class="stat-value">${stats.total_segments}</div>
550
- <div class="stat-label">Segments Processed</div>
551
- </div>
552
- <div class="stat-item">
553
- <div class="stat-value">${stats.min_error_km.toFixed(2)} km</div>
554
- <div class="stat-label">Min Error</div>
555
- </div>
556
- <div class="stat-item">
557
- <div class="stat-value">${stats.max_error_km.toFixed(2)} km</div>
558
- <div class="stat-label">Max Error</div>
559
- </div>
560
- `;
561
-
562
- // Render error distribution histogram if data provided
563
- if (histogram && histogram.bins && histogram.counts) {
564
- renderErrorChart(histogram);
565
- }
566
- document.getElementById('results_container').style.display = 'block';
567
- document.getElementById('progress_container').style.display = 'none';
568
- resetUI();
569
- }
570
-
571
- // Chart.js rendering function
572
- let errorChartInstance = null;
573
- function renderErrorChart(histogram) {
574
- const ctx = document.getElementById('errorChart').getContext('2d');
575
- // Prepare labels as mid-points of bins
576
- const labels = [];
577
- for (let i = 0; i < histogram.bins.length - 1; i++) {
578
- const mid = (histogram.bins[i] + histogram.bins[i + 1]) / 2;
579
- labels.push(mid.toFixed(1));
580
- }
581
- // Destroy previous chart if it exists (for multiple runs)
582
- if (errorChartInstance) {
583
- errorChartInstance.destroy();
584
- }
585
- errorChartInstance = new Chart(ctx, {
586
- type: 'bar',
587
- data: {
588
- labels: labels,
589
- datasets: [{
590
- label: 'Error (km) distribution',
591
- data: histogram.counts,
592
- backgroundColor: 'rgba(52, 152, 219, 0.5)',
593
- borderColor: 'rgba(41, 128, 185, 1)',
594
- borderWidth: 1
595
- }]
596
- },
597
- options: {
598
- scales: {
599
- x: {
600
- title: {
601
- display: true,
602
- text: 'Error (km)'
603
- }
604
- },
605
- y: {
606
- beginAtZero: true,
607
- title: {
608
- display: true,
609
- text: 'Count'
610
- }
611
- }
612
- }
613
- }
614
- });
615
- }
616
-
617
- // Socket event handlers
618
- socket.on('progress_update', function(data) {
619
- document.getElementById('progress_text').textContent = data.step;
620
- document.getElementById('progress_fill').style.width = data.progress + '%';
621
- document.getElementById('progress_details').textContent = data.details;
622
-
623
- if (data.step === 'Complete') {
624
- // Results will be shown via the inference_complete event
625
- } else if (data.step === 'Error') {
626
- showError(data.details);
627
- resetUI();
628
- }
629
- });
630
-
631
- socket.on('inference_complete', function(data) {
632
- if (data.success) {
633
- showResults(data.stats, data.histogram);
634
- } else {
635
- showError(data.error);
636
- resetUI();
637
- }
638
- });
639
-
640
- // Drag and drop functionality
641
- ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
642
- document.addEventListener(eventName, preventDefaults, false);
643
- });
644
-
645
- function preventDefaults(e) {
646
- e.preventDefault();
647
- e.stopPropagation();
648
- }
649
-
650
- ['dragenter', 'dragover'].forEach(eventName => {
651
- document.querySelector('.file-upload').addEventListener(eventName, highlight, false);
652
- });
653
-
654
- ['dragleave', 'drop'].forEach(eventName => {
655
- document.querySelector('.file-upload').addEventListener(eventName, unhighlight, false);
656
- });
657
-
658
- function highlight(e) {
659
- e.currentTarget.style.borderColor = '#2980b9';
660
- e.currentTarget.style.background = '#e8f4fd';
661
- }
662
-
663
- function unhighlight(e) {
664
- e.currentTarget.style.borderColor = '#3498db';
665
- e.currentTarget.style.background = '#f8f9fa';
666
- }
667
-
668
- document.querySelector('.file-upload').addEventListener('drop', handleDrop, false);
669
-
670
- function handleDrop(e) {
671
- const dt = e.dataTransfer;
672
- const files = dt.files;
673
-
674
- if (files.length > 0) {
675
- document.getElementById('csv_file').files = files;
676
- handleFileSelect(document.getElementById('csv_file'), 'csv');
677
- }
678
- }
679
- </script>
680
- </body>
681
- </html>