Simula-COMPLEX
diff --git a/‎.gitignore
Lines changed: 39 additions & 0 deletions b/‎.gitignore
Lines changed: 39 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 88 additions & 0 deletions b/‎README.md
Lines changed: 88 additions & 0 deletions
diff --git a/‎config/config_inference.py
Lines changed: 55 additions & 0 deletions b/‎config/config_inference.py
Lines changed: 55 additions & 0 deletions
diff --git a/‎config/config_train.py
Lines changed: 85 additions & 0 deletions b/‎config/config_train.py
Lines changed: 85 additions & 0 deletions
diff --git a/‎config/pal_config_train.py
Lines changed: 66 additions & 0 deletions b/‎config/pal_config_train.py
Lines changed: 66 additions & 0 deletions
diff --git a/‎data/dataset.py
Lines changed: 49 additions & 0 deletions b/‎data/dataset.py
Lines changed: 49 additions & 0 deletions
@@ -0,0 +1,39 @@
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Virtual environment
+.env/
+.venv/
+env/
+venv/
+ENV/
+
+# VS Code settings
+.vscode/
+
+# Jupyter Notebook checkpoints
+.ipynb_checkpoints/
+
+# Pytest cache
+.pytest_cache/
+
+# Logs and local data
+*.log
+*.csv
+*.xlsx
+*.db
+*.npz
+*.npy
+
+# Mac system files
+.DS_Store
+
+# Model weights or outputs
+*.pth
+*.pt
+
+# Other
+*.swp
@@ -0,0 +1,88 @@
+
+# Digital Twin OOD Detection with Transformer
+
+This repository implements a Transformer-based approach for proactive **out-of-distribution (OOD)** detection using **forecasting and reconstruction error analysis**. It is structured around a two-phase training process and supports confidence-aware inference and explainability.
+
+---
+
+## 🚀 Setup
+
+1. **Clone the Repository**
+
+```bash
+git clone https://github.com/ErblinIsaku/dt-ood-detection.git
+cd dt-ood-detection
+```
+
+2. **Install Dependencies**
+
+```bash
+pip install -r requirements.txt
+```
+
+---
+
+## ⚙️ Configuration
+
+- Modify `config/config_train.py` to set:
+  - `INPUT_FEATURES`, `OUTPUT_FEATURES`
+  - `DATA_PATHS`, `SEQUENCE_SETTINGS`, `TRAINING_PARAMS`
+
+- Modify `config/config_inference.py` to set:
+  - `TEST_DATA_DIR`, `MODEL_PATH`, `SCALER_PATH`, etc.
+
+---
+
+## 🧠 Model Training
+
+```bash
+python main_train.py
+```
+
+Or for PAL-specific config:
+
+```bash
+python pal_main_train.py
+```
+
+This performs:
+- **Phase 1**: Joint training for forecasting and reconstruction
+- **Phase 2**: Fine-tuning the reconstruction head only
+- Computes and saves reconstruction and uncertainty thresholds
+
+---
+
+## 🔍 Inference & OOD Detection
+
+```bash
+python main_inference.py
+```
+
+This performs:
+- Forecasting on test data
+- Computes forecast reconstruction error and MC-dropout variance
+- Applies thresholds to detect OOD
+- Generates visualizations and JSON-based diagnostics
+
+---
+
+## 📊 Outputs
+
+- `forecast_results.csv`: Ground truth vs forecasted values
+- `forecast_errors.csv`: MSE/RMSE per feature
+- `thresholds.pkl`: Saved thresholds for inference
+- `ood_diagnostics.json`: Confidence-aware OOD decision metadata
+- Plots:
+  - Forecasted vs GT curves
+  - Reconstruction/variance distributions
+  - Quadrant-based OOD scatter
+
+---
+
+## 📘 Notes
+
+- Ensure that your **input and output features are defined clearly** and the combined feature set used for normalization does **not contain duplicates**.
+- Inference will reuse the training-time normalization statistics.
+- The script supports datasets with different feature combinations (e.g., Ship dynamics, Mobile robot navigation).
+
+---
@@ -0,0 +1,55 @@
+import os
+
+# === Base Paths ===
+BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
+EXPERIMENT_NAME = 'dt-ood-repo'
+
+# === Data Path ===
+# TEST_DATA_DIR = os.path.join(BASE_DIR, 'NTNU', 'data_OOD_case_1')
+# TEST_DATA_DIR = os.path.join(BASE_DIR, 'NTNU', 'data_OOD_case_2')
+# TEST_DATA_DIR = os.path.join(BASE_DIR, 'NTNU', 'data_OOD_case_3')
+TEST_DATA_DIR = os.path.join(BASE_DIR, 'NTNU', 'data_OOD_case_all')
+
+# === Feature Selection ===
+# FEATURE_NAMES = [
+#     'Rudder Angle',
+#     'Surge Speed',
+#     'Sway Speed',
+#     'Yaw Rate',
+#     'Roll Angle',
+#     'Roll Rate'
+# ]
+
+INPUT_FEATURES = [
+    'Rudder Angle',
+
+    'Surge Speed',
+    'Sway Speed',
+    'Yaw Rate',
+    'Roll Angle',
+    'Roll Rate'
+]
+
+OUTPUT_FEATURES = [
+    'Surge Speed',
+    'Sway Speed',
+    'Yaw Rate',
+    'Roll Angle',
+    'Roll Rate'
+]
+
+
+# === Model & Threshold Paths ===
+MODEL_PATH = os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_models', 'phase2_model.pth')
+SCALER_PATH = os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_models', 'normalization_params.pkl')
+THRESHOLD_PATH = os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_models', 'threshold_params.pkl')
+
+# === Inference Results Directory ===
+INFERENCE_RESULTS_DIR = os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_results', 'inference')
+
+JSON_OUTPUT_PATH = os.path.join(INFERENCE_RESULTS_DIR, 'ood_forecast_window_diagnostics.json')
+
+# === Inference Settings ===
+SEQ_LEN = 60
+FORECAST_HORIZON = 60
+BATCH_SIZE = 64
@@ -0,0 +1,85 @@
+import os
+
+# === Base Directory (auto-detected) ===
+BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
+EXPERIMENT_NAME = 'dt-ood-repo'
+
+# # === Feature Selection ===
+# FEATURE_NAMES = [
+#     'Rudder Angle',
+#     'Surge Speed',
+#     'Sway Speed',
+#     'Yaw Rate',
+#     'Roll Angle',
+#     'Roll Rate'
+# ]
+
+INPUT_FEATURES = [
+    'Rudder Angle',
+
+    'Surge Speed',
+    'Sway Speed',
+    'Yaw Rate',
+    'Roll Angle',
+    'Roll Rate'
+]
+
+OUTPUT_FEATURES = [
+    'Surge Speed',
+    'Sway Speed',
+    'Yaw Rate',
+    'Roll Angle',
+    'Roll Rate'
+]
+
+# === Data Paths ===
+DATA_PATHS = {
+    'train_data_dir': os.path.join(BASE_DIR, 'NTNU', 'ind', 'processed-v2', 'train'),
+    'validation_data_dir': os.path.join(BASE_DIR, 'NTNU', 'ind', 'processed-v2', 'validation')
+}
+
+# === Sequence Configuration ===
+SEQUENCE_SETTINGS = {
+    'input_window_size': 60,         # Past time steps used as input
+    'forecast_horizon_size': 60      # Future steps to predict
+}
+
+# === Model Hyperparameters ===
+MODEL_PARAMS = {
+    'd_model': 64,
+    'num_heads': 4,
+    'ff_dim': 128,
+    'dropout_rate': 0.1
+}
+
+# === Training Configuration ===
+TRAINING_PARAMS = {
+    'batch_size': 64,
+    'learning_rate': 0.0001,
+    'epochs_phase1': 120,     # Forecasting phase
+    'epochs_phase2': 100,    # Reconstruction fine-tuning phase
+
+    'early_stopping': {
+        'enabled': True,         
+        'patience': 10,           # Stop if no improvement for 10 epochs
+        'patience_extension': 5  # Allow 5 extra epochs after plateau
+    }
+}
+
+# === Training Control Flags ===
+TRAINING_FLAGS = {
+    'enable_phase1_training': True,
+    'enable_phase2_training': True
+}
+
+# === Output Paths ===
+OUTPUT_PATHS = {
+    'phase1_model': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_models', 'phase1_model.pth'),
+    'phase2_model': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_models', 'phase2_model.pth'),
+    'normalization_params': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_models', 'normalization_params.pkl'),
+    'threshold_params': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_models', 'threshold_params.pkl'),
+    'forecast_results_csv': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_results', 'forecast_results.csv'),
+    'forecast_errors_csv': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_results', 'forecast_errors.csv'),
+    'log_dir': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_logs'),
+    'forecast_plot_path': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_results', 'forecast_plot_train.png')
+}
@@ -0,0 +1,66 @@
+import os
+
+# === Base Directory ===
+BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
+EXPERIMENT_NAME = 'dt-ood-repo'
+
+# === Data Paths ===
+DATA_PATHS = {
+    'train_data_dir': os.path.join(BASE_DIR, 'PAL', 'train'),
+    'validation_data_dir': os.path.join(BASE_DIR, 'PAL', 'validation')
+}
+
+# === Features ===
+INPUT_FEATURES = [
+    'position_x', 'position_y', 
+    'orientation_z', 'orientation_w',
+    'linear_velocity_x', 'linear_velocity_y', 
+    'angular_velocity_z'
+]
+
+OUTPUT_FEATURES = ['position_x', 'position_y']  # Only forecast x and y
+
+# === Sequence Settings ===
+SEQUENCE_SETTINGS = {
+    'input_window_size': 60,
+    'forecast_horizon_size': 60
+}
+
+# === Model Parameters ===
+MODEL_PARAMS = {
+    'd_model': 64,
+    'num_heads': 4,
+    'ff_dim': 128,
+    'dropout_rate': 0.1
+}
+
+# === Training Parameters ===
+TRAINING_PARAMS = {
+    'batch_size': 64,
+    'learning_rate': 0.00092,
+    'epochs_phase1': 100,
+    'epochs_phase2': 200,
+    'early_stopping': {
+        'enabled': False,
+        'patience': 10,
+        'patience_extension': 5
+    }
+}
+
+# === Flags to Enable/Disable Phases ===
+TRAINING_FLAGS = {
+    'enable_phase1_training': True,
+    'enable_phase2_training': False
+}
+
+# === Output Paths ===
+OUTPUT_PATHS = {
+    'phase1_model': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_models', 'phase1_model.pth'),
+    'phase2_model': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_models', 'phase2_model.pth'),
+    'normalization_params': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_models', 'scaler.pkl'),
+    'threshold_params': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_models', 'thresholds.pkl'),
+    'forecast_results_csv': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_results', 'forecast_results.csv'),
+    'forecast_errors_csv': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_results', 'forecast_errors.csv'),
+    'forecast_plot_path': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_results', 'forecast_plot.png'),
+    'log_dir': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_logs')
+}
@@ -0,0 +1,49 @@
+import os
+import pandas as pd
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+class TimeSeriesDataset(Dataset):
+    def __init__(self, inputs, targets):
+        self.inputs = torch.tensor(inputs, dtype=torch.float32)
+        self.targets = torch.tensor(targets, dtype=torch.float32)
+
+    def __len__(self):
+        return len(self.inputs)
+
+    def __getitem__(self, idx):
+        return self.inputs[idx], self.targets[idx]
+
+
+def load_directory_data(dir_path, feature_names):
+    """
+    Loads and concatenates all CSV files in the given directory,
+    selecting only the specified feature columns.
+    """
+    all_data = []
+    for file_name in os.listdir(dir_path):
+        file_path = os.path.join(dir_path, file_name)
+        if os.path.isfile(file_path) and file_name.endswith('.csv'):
+            df = pd.read_csv(file_path)
+            df = df[[col for col in feature_names if col in df.columns]]
+            all_data.append(df)
+    if not all_data:
+        raise ValueError(f"No valid data in {dir_path}")
+    return pd.concat(all_data, axis=0).reset_index(drop=True)
+
+
+def create_sequences(input_df, output_df, input_window_size, forecast_horizon_size, overlap=True):
+    sequences = []
+    targets = []
+    step_size = 1 if overlap else (input_window_size + forecast_horizon_size)
+
+    for i in range(0, len(input_df) - input_window_size - forecast_horizon_size + 1, step_size):
+        x_seq = input_df.iloc[i : i + input_window_size].values
+        y_seq = output_df.iloc[i + input_window_size : i + input_window_size + forecast_horizon_size].values
+        sequences.append(x_seq)
+        targets.append(y_seq)
+
+    return np.array(sequences), np.array(targets)
+
+