Spaces:

OnsAouedi
/

Maginet_inference

Sleeping

App Files Files Community

OnsAouedi commited on Jun 11

Commit

81ea98b

verified ·

1 Parent(s): 0420f98

Upload app.py

Browse files

Files changed (1) hide show

app.py +819 -0

app.py ADDED Viewed

	@@ -0,0 +1,819 @@

+#!/usr/bin/env python3
+import os
+import json
+import time
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import TensorDataset, DataLoader
+from tqdm import tqdm
+from flask import Flask, render_template, request, jsonify, send_file
+from flask_socketio import SocketIO, emit
+import tempfile
+import threading
+from pathlib import Path
+from werkzeug.utils import secure_filename
+app = Flask(__name__)
+app.config['SECRET_KEY'] = 'your-secret-key-here'
+app.config['UPLOAD_FOLDER'] = 'uploads'
+app.config['MAX_CONTENT_LENGTH'] = 500 * 1024 * 1024  # 500MB max file size
+socketio = SocketIO(app, cors_allowed_origins="*")
+# Ensure upload directory exists
+os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+# Global variables for progress tracking
+current_progress = {'step': 'idle', 'progress': 0, 'details': ''}
+########################################
+#          MODEL DEFINITION            #
+########################################
+class LSTMWithAttentionWithResid(nn.Module):
+    def __init__(self, in_dim, hidden_dim, forecast_horizon, n_layers=10, dropout=0.2):
+        super(LSTMWithAttentionWithResid, self).__init__()
+        self.hidden_dim = hidden_dim
+        self.forecast_horizon = forecast_horizon
+        # Embedding layer
+        self.embedding = nn.Linear(in_dim, hidden_dim)
+        # LSTM layers
+        self.lstm = nn.LSTM(
+            hidden_dim, hidden_dim, num_layers=n_layers, dropout=dropout, batch_first=True
+        )
+        # Layer normalization after residual connection
+        self.layer_norm = nn.LayerNorm(hidden_dim)
+        # Attention mechanism
+        self.attention = nn.Linear(hidden_dim, hidden_dim)
+        self.context_vector = nn.Linear(hidden_dim, 1, bias=False)  # Linear layer for scoring
+        # Fully connected layer to map attention context to output
+        self.fc = nn.Linear(hidden_dim, forecast_horizon * 2)
+    def forward(self, x):
+        # x: [batch_size, seq_len, in_dim]
+        # Pass through embedding layer
+        x_embed = self.embedding(x)  # [batch_size, seq_len, hidden_dim]
+        # Pass through LSTM
+        lstm_output, (hidden, cell) = self.lstm(x_embed)  # [batch_size, seq_len, hidden_dim]
+        # Add residual connection (out-of-place)
+        lstm_output = lstm_output + x_embed  # [batch_size, seq_len, hidden_dim]
+        # Apply layer normalization
+        lstm_output = self.layer_norm(lstm_output)  # [batch_size, seq_len, hidden_dim]
+        # Compute attention scores
+        attention_weights = torch.tanh(self.attention(lstm_output))  # [batch_size, seq_len, hidden_dim]
+        attention_scores = self.context_vector(attention_weights).squeeze(-1)  # [batch_size, seq_len]
+        # Apply softmax to normalize scores
+        attention_weights = F.softmax(attention_scores, dim=1)  # [batch_size, seq_len]
+        # Compute the context vector as a weighted sum of LSTM outputs
+        context_vector = torch.bmm(
+            attention_weights.unsqueeze(1), lstm_output
+        )  # [batch_size, 1, hidden_dim]
+        context_vector = context_vector.squeeze(1)  # [batch_size, hidden_dim]
+        # Pass context vector through fully connected layer for forecasting
+        output = self.fc(context_vector)  # [batch_size, forecast_horizon * 2]
+        # Reshape output to match the expected shape
+        output = output.view(-1, self.forecast_horizon, 2)  # [batch_size, forecast_horizon, 2]
+        return output
+########################################
+#         UTILITY FUNCTIONS            #
+########################################
+def update_progress(step, progress, details=""):
+    """Update global progress state"""
+    global current_progress
+    current_progress = {
+        'step': step,
+        'progress': progress,
+        'details': details
+    }
+    socketio.emit('progress_update', current_progress)
+def create_sequences_grouped_by_segment_lat_long_veloc(df_scaled, seq_len=12, forecast_horizon=1, features_to_scale=None):
+    """
+    For each segment, creates overlapping sequences of length seq_len.
+    Returns:
+      - Xs: input sequences,
+      - ys: target outputs (future latitude and longitude velocities),
+      - segments: corresponding segment IDs,
+      - last_positions: last known positions from each sequence.
+    """
+    update_progress('Creating sequences', 10, f'Processing {len(df_scaled)} data points...')
+    Xs, ys, segments, last_positions = [], [], [], []
+    if features_to_scale is None:
+        # CRITICAL: Match YOUR EXACT inference logic (segment first, then removed)
+        features_to_scale = [
+            "segment",                    # Index 0 - will be removed before model
+            "latitude_velocity_km",       # Index 1 -> 0 after segment removal
+            "longitude_velocity_km",      # Index 2 -> 1 after segment removal
+            "latitude_degrees",           # Index 3 -> 2 after segment removal
+            "longitude_degrees",          # Index 4 -> 3 after segment removal
+            "time_difference_hours",      # Index 5 -> 4 after segment removal
+            "time_scalar"                 # Index 6 -> 5 after segment removal
+        ]
+    # Verify all required features exist
+    missing_features = [f for f in features_to_scale if f not in df_scaled.columns]
+    if missing_features:
+        raise ValueError(f"Missing required features: {missing_features}")
+    grouped = df_scaled.groupby('segment')
+    total_segments = len(grouped)
+    for i, (segment_id, group) in enumerate(grouped):
+        group = group.reset_index(drop=True)
+        L = len(group)
+        # Progress update
+        if i % max(1, total_segments // 20) == 0:
+            progress = 10 + (i / total_segments) * 30  # 10-40% range
+            update_progress('Creating sequences', progress,
+                          f'Processing segment {i+1}/{total_segments}')
+        if L >= seq_len + forecast_horizon:
+            for j in range(L - seq_len - forecast_horizon + 1):
+                # Get sequence features
+                seq = group.iloc[j:(j+seq_len)][features_to_scale].to_numpy()
+                # Get future time scalar for the forecast horizon
+                future_time = group['time_scalar'].iloc[j + seq_len + forecast_horizon - 1]
+                future_time_feature = np.full((seq_len, 1), future_time)
+                # Augment sequence with future time
+                seq_aug = np.hstack((seq, future_time_feature))
+                Xs.append(seq_aug)
+                # Target: future velocity
+                target = group[['latitude_velocity_km', 'longitude_velocity_km']].iloc[j + seq_len + forecast_horizon - 1].to_numpy()
+                ys.append(target)
+                segments.append(segment_id)
+                # Last known position
+                last_pos = group[['latitude_degrees', 'longitude_degrees']].iloc[j + seq_len - 1].to_numpy()
+                last_positions.append(last_pos)
+    return (np.array(Xs, dtype=np.float32),
+            np.array(ys, dtype=np.float32),
+            np.array(segments),
+            np.array(last_positions, dtype=np.float32))
+def load_normalization_params(json_path):
+    """Load normalization parameters from JSON file"""
+    with open(json_path, "r") as f:
+        normalization_params = json.load(f)
+    return normalization_params["feature_mins"], normalization_params["feature_maxs"]
+def minmax_denormalize(scaled_series, feature_min, feature_max):
+    """Denormalize data using min-max scaling"""
+    return scaled_series * (feature_max - feature_min) + feature_min
+########################################
+#         INFERENCE PIPELINE           #
+########################################
+def run_inference_pipeline(csv_file_path, model_path, normalization_path):
+    """Complete inference pipeline following Final_inference_maginet.py logic"""
+    try:
+        # Step 1: Load and validate data
+        update_progress('Loading data', 5, 'Reading CSV file...')
+        # Enhanced CSV parsing with error handling
+        try:
+            # Determine separator by reading first few lines
+            with open(csv_file_path, 'r') as f:
+                first_line = f.readline()
+                separator = ';' if ';' in first_line else ','
+            # Try reading with detected separator
+            df = pd.read_csv(csv_file_path, sep=separator, on_bad_lines='skip')
+            update_progress('Loading data', 8, f'Loaded {len(df)} rows with separator "{separator}"')
+            # Debug: Print actual column names
+            print(f"🔍 CSV COLUMNS FOUND: {list(df.columns)}")
+            update_progress('Loading data', 8.5, f'Columns: {list(df.columns)}')
+        except Exception as e:
+            print(f"❌ CSV PARSING ERROR: {e}")
+            # Try alternative parsing methods
+            try:
+                df = pd.read_csv(csv_file_path, sep=',', on_bad_lines='skip')
+                update_progress('Loading data', 8, f'Loaded {len(df)} rows with comma separator (fallback)')
+                print(f"🔍 CSV COLUMNS FOUND (fallback): {list(df.columns)}")
+            except Exception as e2:
+                try:
+                    df = pd.read_csv(csv_file_path, sep=';', on_bad_lines='skip')
+                    update_progress('Loading data', 8, f'Loaded {len(df)} rows with semicolon separator (fallback)')
+                    print(f"🔍 CSV COLUMNS FOUND (fallback): {list(df.columns)}")
+                except Exception as e3:
+                    raise ValueError(f"Could not parse CSV file. Tried multiple separators. Errors: {e}, {e2}, {e3}")
+        # CRITICAL: Create time_scalar (was missing from inference dataset!)
+        if 'time_scalar' not in df.columns:
+            if 'datetime' in df.columns:
+                # Convert datetime to time_scalar (preferred method)
+                df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce')
+                reference_date = pd.Timestamp('2023-01-01')
+                df['time_scalar'] = ((df['datetime'] - reference_date) / pd.Timedelta(days=1)).round(8)
+                update_progress('Loading data', 9, 'Created time_scalar from datetime column')
+            elif 'time_decimal' in df.columns:
+                # Use time_decimal directly as time_scalar (alternative method)
+                df['time_scalar'] = df['time_decimal'].copy()
+                update_progress('Loading data', 9, 'Created time_scalar from time_decimal column')
+            elif all(col in df.columns for col in ['day', 'month', 'time_decimal']):
+                # Create datetime from components and then time_scalar
+                df['year'] = df.get('year', 2024)  # Default year if not present
+                df['datetime'] = pd.to_datetime(df[['year', 'month', 'day']], errors='coerce')
+                df['datetime'] += pd.to_timedelta(df['time_decimal'], unit='h')
+                reference_date = pd.Timestamp('2023-01-01')
+                df['time_scalar'] = ((df['datetime'] - reference_date) / pd.Timedelta(days=1)).round(8)
+                update_progress('Loading data', 9, 'Created time_scalar from day/month/time_decimal')
+            else:
+                # Create a simple sequential time_scalar based on row order
+                df['time_scalar'] = df.index / len(df)
+                update_progress('Loading data', 9, 'Created sequential time_scalar')
+        # Validate required columns with detailed error reporting
+        required_columns = [
+            'segment', 'latitude_velocity_km', 'longitude_velocity_km',
+            'latitude_degrees', 'longitude_degrees', 'time_difference_hours', 'time_scalar'
+        ]
+        print(f"🔍 REQUIRED COLUMNS: {required_columns}")
+        print(f"🔍 ACTUAL COLUMNS: {list(df.columns)}")
+        missing_columns = [col for col in required_columns if col not in df.columns]
+        if missing_columns:
+            available_cols = list(df.columns)
+            error_msg = f"""
+❌ COLUMN VALIDATION ERROR:
+   Missing required columns: {missing_columns}
+   Available columns: {available_cols}
+   Column mapping suggestions:
+   - Check for extra spaces or different naming
+   - Verify CSV file format and encoding
+   - Ensure time_scalar column exists or can be created
+"""
+            print(error_msg)
+            raise ValueError(f"Missing required columns: {missing_columns}. Available: {available_cols}")
+        # CRITICAL: Apply the SAME data filtering as training/notebook
+        update_progress('Filtering data', 10, 'Applying quality filters...')
+        original_count = len(df)
+        # 1. Calculate speed column if missing (CRITICAL!)
+        if 'speed_km_h' not in df.columns:
+            df['speed_km_h'] = np.sqrt(df['latitude_velocity_km']**2 + df['longitude_velocity_km']**2)
+            update_progress('Filtering data', 10.5, 'Calculated speed_km_h column')
+        # 2. Speed filtering - EXACTLY like training
+        df = df[(df['speed_km_h'] >= 2) & (df['speed_km_h'] <= 60)].copy()
+        update_progress('Filtering data', 11, f'Speed filter: {original_count} -> {len(df)} rows')
+        # 3. Velocity filtering - CRITICAL for performance!
+        velocity_mask = (
+            (np.abs(df['latitude_velocity_km']) <= 100) &
+            (np.abs(df['longitude_velocity_km']) <= 100) &
+            (df['time_difference_hours'] > 0) &
+            (df['time_difference_hours'] <= 24)  # Max 24 hours between points
+        )
+        df = df[velocity_mask].copy()
+        update_progress('Filtering data', 12, f'Velocity filter: -> {len(df)} rows')
+        # 4. Segment length filtering - Remove segments with < 20 points
+        segment_counts = df['segment'].value_counts()
+        segments_to_remove = segment_counts[segment_counts < 20].index
+        before_segment_filter = len(df)
+        df = df[~df['segment'].isin(segments_to_remove)].copy()
+        update_progress('Filtering data', 13, f'Segment filter: {before_segment_filter} -> {len(df)} rows')
+        # 5. Remove NaN and infinite values
+        df = df.dropna().copy()
+        numeric_cols = ['latitude_velocity_km', 'longitude_velocity_km', 'time_difference_hours']
+        for col in numeric_cols:
+            if col in df.columns:
+                df = df[~np.isinf(df[col])].copy()
+        # DEBUGGING: Add detailed filtering statistics
+        filtered_count = len(df)
+        filter_percent = ((original_count - filtered_count) / original_count) * 100
+        update_progress('Filtering data', 14, f'Final filtered data: {filtered_count} rows ({original_count - filtered_count} removed = {filter_percent:.1f}%)')
+        # Debug info for analysis
+        print(f"🔍 FILTERING SUMMARY:")
+        print(f"   Original: {original_count:,} rows")
+        print(f"   Final: {filtered_count:,} rows")
+        print(f"   Removed: {original_count - filtered_count:,} ({filter_percent:.1f}%)")
+        if len(df) == 0:
+            raise ValueError("No data remaining after quality filtering. Check your input data quality.")
+        # Step 2: Load normalization parameters
+        update_progress('Loading normalization', 12, 'Loading normalization parameters...')
+        feature_mins, feature_maxs = load_normalization_params(normalization_path)
+        # Step 2.5: CRITICAL - Normalize the test data (missing step causing 3373km error!)
+        update_progress('Normalizing data', 15, 'Applying normalization to test data...')
+        features_to_normalize = ['latitude_velocity_km', 'longitude_velocity_km',
+                               'latitude_degrees', 'longitude_degrees',
+                               'time_difference_hours', 'time_scalar']
+        for feature in features_to_normalize:
+            if feature in df.columns and feature in feature_mins:
+                min_val = feature_mins[feature]
+                max_val = feature_maxs[feature]
+                rng = max_val - min_val if max_val != min_val else 1
+                df[feature] = (df[feature] - min_val) / rng
+                update_progress('Normalizing data', 18, f'Normalized {feature}')
+        # Step 3: Create sequences
+        SEQ_LENGTH = 12
+        FORECAST_HORIZON = 1
+        X_test, y_test, test_segments, last_known_positions_scaled = create_sequences_grouped_by_segment_lat_long_veloc(
+            df, seq_len=SEQ_LENGTH, forecast_horizon=FORECAST_HORIZON
+        )
+        update_progress('Preparing model', 45, f'Created {len(X_test)} sequences')
+        if len(X_test) == 0:
+            raise ValueError("No valid sequences could be created. Check your data and sequence length requirements.")
+        # Step 4: Prepare data for model
+        device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        X_test_tensor = torch.from_numpy(X_test).float().to(device)
+        y_test_tensor = torch.from_numpy(y_test).float().to(device)
+        test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
+        test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
+        # Step 5: Load model
+        update_progress('Loading model', 50, 'Loading trained model...')
+        # CRITICAL: Model expects 6 features (segment removed) + 1 future_time = 7 total
+        in_dim = X_test.shape[2] - 1  # Remove segment column dimension
+        # CRITICAL: Match the exact model architecture from Atlantic model weights
+        hidden_dim = 250  # From best_model.pth
+        n_layers = 7      # From best_model.pth (CRITICAL: not 10!)
+        dropout = 0.2
+        model = LSTMWithAttentionWithResid(
+            in_dim, hidden_dim, FORECAST_HORIZON,
+            n_layers=n_layers, dropout=dropout
+        ).to(device)
+        model.load_state_dict(torch.load(model_path, map_location=device))
+        model.eval()
+        # Step 6: Run inference
+        update_progress('Running inference', 60, 'Making predictions...')
+        # CRITICAL: Extract features batch-by-batch like your notebook
+        all_preds = []
+        segments_extracted = []
+        time_scalars_extracted = []
+        time_diff_hours_extracted = []
+        with torch.no_grad():
+            for i, batch in enumerate(test_loader):
+                x_batch, _ = batch
+                # CRITICAL: Extract features exactly like your notebook
+                segment_batch = x_batch[:, 0, 0].cpu().numpy()  # Take segment from first time step
+                time_scalar_batch = x_batch[:, -1, 6].cpu().numpy()  # LAST timestep, index 6 = time_scalar
+                time_diff_hours_batch = x_batch[:, 0, 5].cpu().numpy()  # First timestep, index 5
+                segments_extracted.extend(segment_batch)
+                time_scalars_extracted.extend(time_scalar_batch)
+                time_diff_hours_extracted.extend(time_diff_hours_batch)
+                # Remove segment column before model input
+                x_batch_no_segment = x_batch[:, :, 1:]  # Remove segment (index 0) but keep all other features
+                preds = model(x_batch_no_segment)
+                all_preds.append(preds.cpu().numpy())
+                # Progress update
+                progress = 60 + (i / len(test_loader)) * 20  # 60-80% range
+                update_progress('Running inference', progress,
+                              f'Processing batch {i+1}/{len(test_loader)}')
+        all_preds = np.concatenate(all_preds, axis=0)
+        # Step 7: Process results
+        update_progress('Processing results', 80, 'Processing predictions...')
+        # CRITICAL: Reshape predictions exactly like your notebook
+        yhat = torch.from_numpy(all_preds)
+        yhat = yhat.view(-1, 2)  # Reshape to [batch_size, 2] - EXACTLY like your notebook
+        # Extract predictions exactly like your notebook
+        predicted_lat_vel = yhat[:, 0].numpy()  # Predicted lat velocity
+        predicted_lon_vel = yhat[:, 1].numpy()  # Predicted lon velocity
+        # Extract actual values exactly like your notebook
+        y_real = y_test_tensor.cpu()
+        actual_lat_vel = y_real[:, 0].numpy()  # Actual lat velocity
+        actual_lon_vel = y_real[:, 1].numpy()  # Actual lon velocity
+        # CRITICAL: Use extracted features from batches (matching your notebook exactly)
+        # Ensure all arrays have consistent length
+        num_samples = len(predicted_lat_vel)
+        segments_extracted = segments_extracted[:num_samples]
+        time_scalars_extracted = time_scalars_extracted[:num_samples]
+        time_diff_hours_extracted = time_diff_hours_extracted[:num_samples]
+        last_known_positions_scaled = last_known_positions_scaled[:num_samples]
+        # Create results dataframe exactly like your notebook
+        results_df = pd.DataFrame({
+            'segment': segments_extracted,              # From batch extraction
+            'time_difference_hours': time_diff_hours_extracted,  # From batch extraction (first timestep)
+            'Time Scalar': time_scalars_extracted,      # From batch extraction (LAST timestep)
+            'Last Known Latitude': [pos[0] for pos in last_known_positions_scaled],
+            'Last Known Longitude': [pos[1] for pos in last_known_positions_scaled],
+            'predicted_lat_km': predicted_lat_vel,
+            'predicted_lon_km': predicted_lon_vel,
+            'actual_lat_km': actual_lat_vel,
+            'actual_lon_km': actual_lon_vel
+        })
+        # Step 8: Denormalize results
+        update_progress('Denormalizing results', 85, 'Converting to real units...')
+        # Column to feature mapping (COMPLETE mapping for all denormalizable columns)
+        column_to_feature = {
+            "predicted_lat_km": "latitude_velocity_km",
+            "predicted_lon_km": "longitude_velocity_km",
+            "actual_lat_km": "latitude_velocity_km",
+            "actual_lon_km": "longitude_velocity_km",
+            "Last Known Latitude": "latitude_degrees",
+            "Last Known Longitude": "longitude_degrees",
+            "time_difference_hours": "time_difference_hours",
+            "Time Scalar": "time_scalar"
+        }
+        # Denormalize relevant columns
+        for col, feat in column_to_feature.items():
+            if col in results_df.columns and feat in feature_mins:
+                fmin = feature_mins[feat]
+                fmax = feature_maxs[feat]
+                results_df[col + "_unscaled"] = minmax_denormalize(results_df[col], fmin, fmax)
+                update_progress('Denormalizing results', 85, f'Denormalized {col}')
+        # Ensure all required _unscaled columns exist
+        required_unscaled_cols = [
+            'predicted_lat_km_unscaled', 'predicted_lon_km_unscaled',
+            'actual_lat_km_unscaled', 'actual_lon_km_unscaled',
+            'Last Known Latitude_unscaled', 'Last Known Longitude_unscaled',
+            'time_difference_hours_unscaled'
+        ]
+        for col in required_unscaled_cols:
+            if col not in results_df.columns:
+                base_col = col.replace('_unscaled', '')
+                if base_col in results_df.columns:
+                    # If base column exists but wasn't denormalized, copy it
+                    results_df[col] = results_df[base_col]
+                    update_progress('Denormalizing results', 87, f'Created missing {col}')
+                else:
+                    results_df[col] = 0.0
+                    update_progress('Denormalizing results', 87, f'Defaulted missing {col} to 0')
+        # ---------------------------
+        # NEW: Clip predicted velocities to realistic physical bounds to avoid huge errors
+        # ---------------------------
+        VELOCITY_RANGE_KM_H = (-100, 100)  # Same limits used during input filtering
+        results_df["predicted_lat_km_unscaled"] = results_df["predicted_lat_km_unscaled"].clip(*VELOCITY_RANGE_KM_H)
+        results_df["predicted_lon_km_unscaled"] = results_df["predicted_lon_km_unscaled"].clip(*VELOCITY_RANGE_KM_H)
+        update_progress('Denormalizing results', 88, 'Clipped predicted velocities to realistic range')
+        # Step 9: Calculate final positions and errors (EXACT column structure matching your notebook)
+        update_progress('Calculating errors', 90, 'Computing prediction errors...')
+        # Compute displacement components (in km)
+        results_df["pred_final_lat_km_component"] = (
+            results_df["predicted_lat_km_unscaled"] * results_df["time_difference_hours_unscaled"]
+        )
+        results_df["pred_final_lon_km_component"] = (
+            results_df["predicted_lon_km_unscaled"] * results_df["time_difference_hours_unscaled"]
+        )
+        results_df["actual_final_lat_km_component"] = (
+            results_df["actual_lat_km_unscaled"] * results_df["time_difference_hours_unscaled"]
+        )
+        results_df["actual_final_lon_km_component"] = (
+            results_df["actual_lon_km_unscaled"] * results_df["time_difference_hours_unscaled"]
+        )
+        # Calculate total displacement magnitudes (MISSING COLUMNS!)
+        results_df["pred_final_km"] = np.sqrt(
+            results_df["pred_final_lat_km_component"]**2 + results_df["pred_final_lon_km_component"]**2
+        )
+        results_df["actual_final_km"] = np.sqrt(
+            results_df["actual_final_lat_km_component"]**2 + results_df["actual_final_lon_km_component"]**2
+        )
+        # Calculate Euclidean distance error (in km)
+        results_df["error_km"] = np.sqrt(
+            (results_df["pred_final_lat_km_component"] - results_df["actual_final_lat_km_component"])**2 +
+            (results_df["pred_final_lon_km_component"] - results_df["actual_final_lon_km_component"])**2
+        )
+        # Compute final positions in degrees
+        km_per_deg_lat = 111  # approximate conversion for latitude
+        results_df["pred_final_lat_deg"] = results_df["Last Known Latitude_unscaled"] + (
+            results_df["predicted_lat_km_unscaled"] * results_df["time_difference_hours_unscaled"]
+        ) / km_per_deg_lat
+        results_df["actual_final_lat_deg"] = results_df["Last Known Latitude_unscaled"] + (
+            results_df["actual_lat_km_unscaled"] * results_df["time_difference_hours_unscaled"]
+        ) / km_per_deg_lat
+        # Account for longitude scaling by latitude
+        results_df["Last_Known_Lat_rad"] = np.deg2rad(results_df["Last Known Latitude_unscaled"])
+        results_df["pred_final_lon_deg"] = results_df["Last Known Longitude_unscaled"] + (
+            results_df["predicted_lon_km_unscaled"] * results_df["time_difference_hours_unscaled"]
+        ) / (km_per_deg_lat * np.cos(results_df["Last_Known_Lat_rad"]))
+        results_df["actual_final_lon_deg"] = results_df["Last Known Longitude_unscaled"] + (
+            results_df["actual_lon_km_unscaled"] * results_df["time_difference_hours_unscaled"]
+        ) / (km_per_deg_lat * np.cos(results_df["Last_Known_Lat_rad"]))
+        # Step 10: Reorder columns to match your EXACT specification
+        update_progress('Finalizing results', 93, 'Reordering columns to match notebook format...')
+        # EXACT column order as specified by user
+        column_order = [
+            'segment', 'time_difference_hours', 'Time Scalar', 'Last Known Latitude', 'Last Known Longitude',
+            'predicted_lat_km', 'predicted_lon_km', 'actual_lat_km', 'actual_lon_km',
+            'predicted_lat_km_unscaled', 'predicted_lon_km_unscaled', 'actual_lat_km_unscaled', 'actual_lon_km_unscaled',
+            'Last Known Latitude_unscaled', 'Last Known Longitude_unscaled', 'time_difference_hours_unscaled',
+            'pred_final_km', 'actual_final_km',
+            'pred_final_lat_km_component', 'pred_final_lon_km_component',
+            'actual_final_lat_km_component', 'actual_final_lon_km_component',
+            'error_km', 'pred_final_lat_deg', 'actual_final_lat_deg', 'Last_Known_Lat_rad',
+            'pred_final_lon_deg', 'actual_final_lon_deg'
+        ]
+        # Validate all required columns exist - add missing ones with defaults if needed
+        missing_columns = [col for col in column_order if col not in results_df.columns]
+        if missing_columns:
+            update_progress('Finalizing results', 94, f'Adding missing columns: {missing_columns}')
+            for col in missing_columns:
+                # Add default values for any missing columns
+                if '_unscaled' in col:
+                    # For unscaled columns, try to find the original scaled column
+                    base_col = col.replace('_unscaled', '')
+                    if base_col in results_df.columns and base_col in column_to_feature:
+                        # Use the same denormalization process
+                        feat = column_to_feature[base_col]
+                        if feat in feature_mins:
+                            fmin = feature_mins[feat]
+                            fmax = feature_maxs[feat]
+                            results_df[col] = minmax_denormalize(results_df[base_col], fmin, fmax)
+                        else:
+                            results_df[col] = results_df[base_col]  # No denormalization available
+                    else:
+                        results_df[col] = 0.0  # Default to 0
+                else:
+                    results_df[col] = 0.0  # Default to 0 for any other missing columns
+        # Reorder columns to match exact specification
+        results_df = results_df[column_order]
+        # Step 11: Save results
+        update_progress('Saving results', 95, 'Saving inference results...')
+        # Create results directory
+        results_dir = Path('results/inference_atlantic')
+        results_dir.mkdir(parents=True, exist_ok=True)
+        # Save to results directory
+        timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
+        results_file = results_dir / f'inference_results_{timestamp}.csv'
+        results_df.to_csv(results_file, index=False)
+        # Also save to temporary file for compatibility
+        output_file = tempfile.NamedTemporaryFile(
+            mode='w', suffix='_inference_results.csv', delete=False
+        )
+        results_df.to_csv(output_file.name, index=False)
+        # CRITICAL: Calculate SAME regression metrics as your notebook
+        # Convert predictions and actuals to tensors for metric calculation
+        yhat_tensor = torch.from_numpy(np.column_stack([predicted_lat_vel, predicted_lon_vel])).float()
+        y_real_tensor = torch.from_numpy(np.column_stack([actual_lat_vel, actual_lon_vel])).float()
+        # Calculate regression metrics exactly like your notebook
+        def calc_metrics_like_notebook(preds, labels):
+            """Calculate metrics exactly like your notebook's calc_metrics function"""
+            EPS = 1e-8
+            mse = torch.mean((preds - labels) ** 2)
+            mae = torch.mean(torch.abs(preds - labels))
+            rmse = torch.sqrt(mse)
+            mape = torch.mean(torch.abs((preds - labels) / (labels + EPS))) * 100  # Convert to percentage
+            rse = torch.sum((preds - labels) ** 2) / torch.sum((labels + EPS) ** 2)
+            return rse.item(), mae.item(), mse.item(), mape.item(), rmse.item()
+        # Calculate regression metrics on velocity predictions
+        rse, mae, mse, mape, rmse = calc_metrics_like_notebook(yhat_tensor, y_real_tensor)
+        # Calculate summary statistics
+        error_stats = {
+            # Distance-based metrics (web app specific)
+            'mean_error_km': float(results_df["error_km"].mean()),
+            'median_error_km': float(results_df["error_km"].median()),
+            'std_error_km': float(results_df["error_km"].std()),
+            'min_error_km': float(results_df["error_km"].min()),
+            'max_error_km': float(results_df["error_km"].max()),
+            # Regression metrics (matching your notebook)
+            'rse': rse,
+            'mae': mae,
+            'mse': mse,
+            'mape': mape,
+            'rmse': rmse,
+            # General stats
+            'total_predictions': len(results_df),
+            'total_segments': len(results_df['segment'].unique()),
+            'columns_generated': list(results_df.columns),
+            'total_columns': len(results_df.columns)
+        }
+        # NEW: Create histogram of error distribution (30 bins by default)
+        hist_counts, bin_edges = np.histogram(results_df["error_km"], bins=30)
+        histogram_data = {
+            'bins': bin_edges.tolist(),
+            'counts': hist_counts.tolist()
+        }
+        update_progress('Complete', 100,
+                       f'✅ Inference complete! Distance: {error_stats["mean_error_km"]:.2f} km | MAE: {error_stats["mae"]:.2f} | MAPE: {error_stats["mape"]:.2f}%')
+        # Emit inference_complete with full statistics and histogram for the frontend chart
+        try:
+            socketio.emit('inference_complete', {
+                'success': True,
+                'stats': error_stats,
+                'histogram': histogram_data
+            })
+        except Exception:
+            pass  # In case we are in CLI context without SocketIO
+        return {
+            'success': True,
+            'results_file': output_file.name,
+            'stats': error_stats,
+            'histogram': histogram_data,
+            'message': f'Successfully processed {len(results_df)} predictions'
+        }
+    except Exception as e:
+        error_msg = f"Error during inference: {str(e)}"
+        update_progress('Error', 0, error_msg)
+        return {
+            'success': False,
+            'error': error_msg
+        }
+########################################
+#            WEB ROUTES                #
+########################################
+@app.route('/')
+def index():
+    return render_template('vessel_inference.html')
+@app.route('/upload', methods=['POST'])
+def upload_file():
+    try:
+        # Check if files were uploaded
+        if 'csv_file' not in request.files:
+            return jsonify({'success': False, 'error': 'No CSV file uploaded'})
+        csv_file = request.files['csv_file']
+        if csv_file.filename == '':
+            return jsonify({'success': False, 'error': 'No CSV file selected'})
+        # Default model and normalization files
+        model_path = 'best_model.pth'
+        normalization_path = 'normalization_params_1_atlanttic_regular_intervals_with_lat_lon_velocity_and_time_difference_filter_outlier_segment_min_20_points.json'
+        # Check for optional uploads
+        if 'model_file' in request.files and request.files['model_file'].filename != '':
+            model_file = request.files['model_file']
+            model_filename = secure_filename(model_file.filename)
+            model_path = os.path.join(app.config['UPLOAD_FOLDER'], model_filename)
+            model_file.save(model_path)
+        if 'normalization_file' in request.files and request.files['normalization_file'].filename != '':
+            norm_file = request.files['normalization_file']
+            norm_filename = secure_filename(norm_file.filename)
+            normalization_path = os.path.join(app.config['UPLOAD_FOLDER'], norm_filename)
+            norm_file.save(normalization_path)
+        # Check if required files exist
+        if not os.path.exists(model_path):
+            return jsonify({'success': False, 'error': f'Model file not found: {model_path}'})
+        if not os.path.exists(normalization_path):
+            return jsonify({'success': False, 'error': f'Normalization file not found: {normalization_path}'})
+        # Save CSV file
+        csv_filename = secure_filename(csv_file.filename)
+        csv_path = os.path.join(app.config['UPLOAD_FOLDER'], csv_filename)
+        csv_file.save(csv_path)
+        # Start inference in background thread
+        def run_inference_background():
+            return run_inference_pipeline(csv_path, model_path, normalization_path)
+        thread = threading.Thread(target=run_inference_background)
+        thread.start()
+        return jsonify({'success': True, 'message': 'Files uploaded successfully. Inference started.'})
+    except Exception as e:
+        return jsonify({'success': False, 'error': str(e)})
+@app.route('/progress')
+def get_progress():
+    return jsonify(current_progress)
+@app.route('/download_results')
+def download_results():
+    # Find the most recent results file
+    upload_dir = app.config['UPLOAD_FOLDER']
+    temp_dir = tempfile.gettempdir()
+    # Look for results files in both directories
+    for directory in [upload_dir, temp_dir]:
+        if os.path.exists(directory):
+            files = [f for f in os.listdir(directory) if f.endswith('_inference_results.csv')]
+            if files:
+                latest_file = max(files, key=lambda x: os.path.getctime(os.path.join(directory, x)))
+                return send_file(
+                    os.path.join(directory, latest_file),
+                    as_attachment=True,
+                    download_name='vessel_inference_results.csv'
+                )
+    return jsonify({'error': 'No results file found'}), 404
+########################################
+#           SOCKETIO EVENTS            #
+########################################
+@socketio.on('connect')
+def handle_connect():
+    emit('progress_update', current_progress)
+@socketio.on('start_inference')
+def handle_start_inference(data):
+    """Handle inference request via WebSocket"""
+    try:
+        csv_path = data.get('csv_path')
+        model_path = data.get('model_path', 'best_model.pth')
+        norm_path = data.get('normalization_path', 'normalization_params_1_atlanttic_regular_intervals_with_lat_lon_velocity_and_time_difference_filter_outlier_segment_min_20_points.json')
+        def run_inference_background():
+            result = run_inference_pipeline(csv_path, model_path, norm_path)
+            emit('inference_complete', result)
+        thread = threading.Thread(target=run_inference_background)
+        thread.start()
+    except Exception as e:
+        emit('inference_complete', {'success': False, 'error': str(e)})
+if __name__ == '__main__':
+    print("🚢 Vessel Trajectory Inference Web App")
+    print("📊 Using Final_inference_maginet.py logic")
+    # Get port from environment variable (Hugging Face Spaces uses 7860)
+    port = int(os.environ.get('PORT', 7860))
+    print(f"🌐 Starting server at http://0.0.0.0:{port}")
+    print("📝 Make sure you have:")
+    print("   - best_model.pth")
+    print("   - normalization_params_1_atlanttic_regular_intervals_...json")
+    print("   - Your test dataset CSV")
+    socketio.run(app, host='0.0.0.0', port=port, debug=False)