Skip to content

Commit 8d6abbe

Browse files
Erblin IsakuErblin Isaku
authored andcommitted
Initial commit with refactored DT-OOD code
0 parents  commit 8d6abbe

14 files changed

+1449
-0
lines changed

.gitignore

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
2+
# Byte-compiled / optimized / DLL files
3+
__pycache__/
4+
*.py[cod]
5+
*$py.class
6+
7+
# Virtual environment
8+
.env/
9+
.venv/
10+
env/
11+
venv/
12+
ENV/
13+
14+
# VS Code settings
15+
.vscode/
16+
17+
# Jupyter Notebook checkpoints
18+
.ipynb_checkpoints/
19+
20+
# Pytest cache
21+
.pytest_cache/
22+
23+
# Logs and local data
24+
*.log
25+
*.csv
26+
*.xlsx
27+
*.db
28+
*.npz
29+
*.npy
30+
31+
# Mac system files
32+
.DS_Store
33+
34+
# Model weights or outputs
35+
*.pth
36+
*.pt
37+
38+
# Other
39+
*.swp

README.md

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
2+
# Digital Twin OOD Detection with Transformer
3+
4+
This repository implements a Transformer-based approach for proactive **out-of-distribution (OOD)** detection using **forecasting and reconstruction error analysis**. It is structured around a two-phase training process and supports confidence-aware inference and explainability.
5+
6+
---
7+
8+
## 🚀 Setup
9+
10+
1. **Clone the Repository**
11+
12+
```bash
13+
git clone https://github.com/ErblinIsaku/dt-ood-detection.git
14+
cd dt-ood-detection
15+
```
16+
17+
2. **Install Dependencies**
18+
19+
```bash
20+
pip install -r requirements.txt
21+
```
22+
23+
---
24+
25+
## ⚙️ Configuration
26+
27+
- Modify `config/config_train.py` to set:
28+
- `INPUT_FEATURES`, `OUTPUT_FEATURES`
29+
- `DATA_PATHS`, `SEQUENCE_SETTINGS`, `TRAINING_PARAMS`
30+
31+
- Modify `config/config_inference.py` to set:
32+
- `TEST_DATA_DIR`, `MODEL_PATH`, `SCALER_PATH`, etc.
33+
34+
---
35+
36+
## 🧠 Model Training
37+
38+
```bash
39+
python main_train.py
40+
```
41+
42+
Or for PAL-specific config:
43+
44+
```bash
45+
python pal_main_train.py
46+
```
47+
48+
This performs:
49+
- **Phase 1**: Joint training for forecasting and reconstruction
50+
- **Phase 2**: Fine-tuning the reconstruction head only
51+
- Computes and saves reconstruction and uncertainty thresholds
52+
53+
---
54+
55+
## 🔍 Inference & OOD Detection
56+
57+
```bash
58+
python main_inference.py
59+
```
60+
61+
This performs:
62+
- Forecasting on test data
63+
- Computes forecast reconstruction error and MC-dropout variance
64+
- Applies thresholds to detect OOD
65+
- Generates visualizations and JSON-based diagnostics
66+
67+
---
68+
69+
## 📊 Outputs
70+
71+
- `forecast_results.csv`: Ground truth vs forecasted values
72+
- `forecast_errors.csv`: MSE/RMSE per feature
73+
- `thresholds.pkl`: Saved thresholds for inference
74+
- `ood_diagnostics.json`: Confidence-aware OOD decision metadata
75+
- Plots:
76+
- Forecasted vs GT curves
77+
- Reconstruction/variance distributions
78+
- Quadrant-based OOD scatter
79+
80+
---
81+
82+
## 📘 Notes
83+
84+
- Ensure that your **input and output features are defined clearly** and the combined feature set used for normalization does **not contain duplicates**.
85+
- Inference will reuse the training-time normalization statistics.
86+
- The script supports datasets with different feature combinations (e.g., Ship dynamics, Mobile robot navigation).
87+
88+
---

config/config_inference.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import os
2+
3+
# === Base Paths ===
4+
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
5+
EXPERIMENT_NAME = 'dt-ood-repo'
6+
7+
# === Data Path ===
8+
# TEST_DATA_DIR = os.path.join(BASE_DIR, 'NTNU', 'data_OOD_case_1')
9+
# TEST_DATA_DIR = os.path.join(BASE_DIR, 'NTNU', 'data_OOD_case_2')
10+
# TEST_DATA_DIR = os.path.join(BASE_DIR, 'NTNU', 'data_OOD_case_3')
11+
TEST_DATA_DIR = os.path.join(BASE_DIR, 'NTNU', 'data_OOD_case_all')
12+
13+
# === Feature Selection ===
14+
# FEATURE_NAMES = [
15+
# 'Rudder Angle',
16+
# 'Surge Speed',
17+
# 'Sway Speed',
18+
# 'Yaw Rate',
19+
# 'Roll Angle',
20+
# 'Roll Rate'
21+
# ]
22+
23+
INPUT_FEATURES = [
24+
'Rudder Angle',
25+
26+
'Surge Speed',
27+
'Sway Speed',
28+
'Yaw Rate',
29+
'Roll Angle',
30+
'Roll Rate'
31+
]
32+
33+
OUTPUT_FEATURES = [
34+
'Surge Speed',
35+
'Sway Speed',
36+
'Yaw Rate',
37+
'Roll Angle',
38+
'Roll Rate'
39+
]
40+
41+
42+
# === Model & Threshold Paths ===
43+
MODEL_PATH = os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_models', 'phase2_model.pth')
44+
SCALER_PATH = os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_models', 'normalization_params.pkl')
45+
THRESHOLD_PATH = os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_models', 'threshold_params.pkl')
46+
47+
# === Inference Results Directory ===
48+
INFERENCE_RESULTS_DIR = os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_results', 'inference')
49+
50+
JSON_OUTPUT_PATH = os.path.join(INFERENCE_RESULTS_DIR, 'ood_forecast_window_diagnostics.json')
51+
52+
# === Inference Settings ===
53+
SEQ_LEN = 60
54+
FORECAST_HORIZON = 60
55+
BATCH_SIZE = 64

config/config_train.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import os
2+
3+
# === Base Directory (auto-detected) ===
4+
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
5+
EXPERIMENT_NAME = 'dt-ood-repo'
6+
7+
# # === Feature Selection ===
8+
# FEATURE_NAMES = [
9+
# 'Rudder Angle',
10+
# 'Surge Speed',
11+
# 'Sway Speed',
12+
# 'Yaw Rate',
13+
# 'Roll Angle',
14+
# 'Roll Rate'
15+
# ]
16+
17+
INPUT_FEATURES = [
18+
'Rudder Angle',
19+
20+
'Surge Speed',
21+
'Sway Speed',
22+
'Yaw Rate',
23+
'Roll Angle',
24+
'Roll Rate'
25+
]
26+
27+
OUTPUT_FEATURES = [
28+
'Surge Speed',
29+
'Sway Speed',
30+
'Yaw Rate',
31+
'Roll Angle',
32+
'Roll Rate'
33+
]
34+
35+
# === Data Paths ===
36+
DATA_PATHS = {
37+
'train_data_dir': os.path.join(BASE_DIR, 'NTNU', 'ind', 'processed-v2', 'train'),
38+
'validation_data_dir': os.path.join(BASE_DIR, 'NTNU', 'ind', 'processed-v2', 'validation')
39+
}
40+
41+
# === Sequence Configuration ===
42+
SEQUENCE_SETTINGS = {
43+
'input_window_size': 60, # Past time steps used as input
44+
'forecast_horizon_size': 60 # Future steps to predict
45+
}
46+
47+
# === Model Hyperparameters ===
48+
MODEL_PARAMS = {
49+
'd_model': 64,
50+
'num_heads': 4,
51+
'ff_dim': 128,
52+
'dropout_rate': 0.1
53+
}
54+
55+
# === Training Configuration ===
56+
TRAINING_PARAMS = {
57+
'batch_size': 64,
58+
'learning_rate': 0.0001,
59+
'epochs_phase1': 120, # Forecasting phase
60+
'epochs_phase2': 100, # Reconstruction fine-tuning phase
61+
62+
'early_stopping': {
63+
'enabled': True,
64+
'patience': 10, # Stop if no improvement for 10 epochs
65+
'patience_extension': 5 # Allow 5 extra epochs after plateau
66+
}
67+
}
68+
69+
# === Training Control Flags ===
70+
TRAINING_FLAGS = {
71+
'enable_phase1_training': True,
72+
'enable_phase2_training': True
73+
}
74+
75+
# === Output Paths ===
76+
OUTPUT_PATHS = {
77+
'phase1_model': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_models', 'phase1_model.pth'),
78+
'phase2_model': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_models', 'phase2_model.pth'),
79+
'normalization_params': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_models', 'normalization_params.pkl'),
80+
'threshold_params': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_models', 'threshold_params.pkl'),
81+
'forecast_results_csv': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_results', 'forecast_results.csv'),
82+
'forecast_errors_csv': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_results', 'forecast_errors.csv'),
83+
'log_dir': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_logs'),
84+
'forecast_plot_path': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'ntnu_results', 'forecast_plot_train.png')
85+
}

config/pal_config_train.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import os
2+
3+
# === Base Directory ===
4+
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
5+
EXPERIMENT_NAME = 'dt-ood-repo'
6+
7+
# === Data Paths ===
8+
DATA_PATHS = {
9+
'train_data_dir': os.path.join(BASE_DIR, 'PAL', 'train'),
10+
'validation_data_dir': os.path.join(BASE_DIR, 'PAL', 'validation')
11+
}
12+
13+
# === Features ===
14+
INPUT_FEATURES = [
15+
'position_x', 'position_y',
16+
'orientation_z', 'orientation_w',
17+
'linear_velocity_x', 'linear_velocity_y',
18+
'angular_velocity_z'
19+
]
20+
21+
OUTPUT_FEATURES = ['position_x', 'position_y'] # Only forecast x and y
22+
23+
# === Sequence Settings ===
24+
SEQUENCE_SETTINGS = {
25+
'input_window_size': 60,
26+
'forecast_horizon_size': 60
27+
}
28+
29+
# === Model Parameters ===
30+
MODEL_PARAMS = {
31+
'd_model': 64,
32+
'num_heads': 4,
33+
'ff_dim': 128,
34+
'dropout_rate': 0.1
35+
}
36+
37+
# === Training Parameters ===
38+
TRAINING_PARAMS = {
39+
'batch_size': 64,
40+
'learning_rate': 0.00092,
41+
'epochs_phase1': 100,
42+
'epochs_phase2': 200,
43+
'early_stopping': {
44+
'enabled': False,
45+
'patience': 10,
46+
'patience_extension': 5
47+
}
48+
}
49+
50+
# === Flags to Enable/Disable Phases ===
51+
TRAINING_FLAGS = {
52+
'enable_phase1_training': True,
53+
'enable_phase2_training': False
54+
}
55+
56+
# === Output Paths ===
57+
OUTPUT_PATHS = {
58+
'phase1_model': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_models', 'phase1_model.pth'),
59+
'phase2_model': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_models', 'phase2_model.pth'),
60+
'normalization_params': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_models', 'scaler.pkl'),
61+
'threshold_params': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_models', 'thresholds.pkl'),
62+
'forecast_results_csv': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_results', 'forecast_results.csv'),
63+
'forecast_errors_csv': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_results', 'forecast_errors.csv'),
64+
'forecast_plot_path': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_results', 'forecast_plot.png'),
65+
'log_dir': os.path.join(BASE_DIR, EXPERIMENT_NAME, 'pal_logs')
66+
}

data/dataset.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import os
2+
import pandas as pd
3+
import numpy as np
4+
import torch
5+
from torch.utils.data import Dataset
6+
7+
class TimeSeriesDataset(Dataset):
8+
def __init__(self, inputs, targets):
9+
self.inputs = torch.tensor(inputs, dtype=torch.float32)
10+
self.targets = torch.tensor(targets, dtype=torch.float32)
11+
12+
def __len__(self):
13+
return len(self.inputs)
14+
15+
def __getitem__(self, idx):
16+
return self.inputs[idx], self.targets[idx]
17+
18+
19+
def load_directory_data(dir_path, feature_names):
20+
"""
21+
Loads and concatenates all CSV files in the given directory,
22+
selecting only the specified feature columns.
23+
"""
24+
all_data = []
25+
for file_name in os.listdir(dir_path):
26+
file_path = os.path.join(dir_path, file_name)
27+
if os.path.isfile(file_path) and file_name.endswith('.csv'):
28+
df = pd.read_csv(file_path)
29+
df = df[[col for col in feature_names if col in df.columns]]
30+
all_data.append(df)
31+
if not all_data:
32+
raise ValueError(f"No valid data in {dir_path}")
33+
return pd.concat(all_data, axis=0).reset_index(drop=True)
34+
35+
36+
def create_sequences(input_df, output_df, input_window_size, forecast_horizon_size, overlap=True):
37+
sequences = []
38+
targets = []
39+
step_size = 1 if overlap else (input_window_size + forecast_horizon_size)
40+
41+
for i in range(0, len(input_df) - input_window_size - forecast_horizon_size + 1, step_size):
42+
x_seq = input_df.iloc[i : i + input_window_size].values
43+
y_seq = output_df.iloc[i + input_window_size : i + input_window_size + forecast_horizon_size].values
44+
sequences.append(x_seq)
45+
targets.append(y_seq)
46+
47+
return np.array(sequences), np.array(targets)
48+
49+

0 commit comments

Comments
 (0)