mgagvani · Croopan · Jan 16, 2026 · Jan 24, 2026 · Feb 23, 2026
diff --git a/preference_dataset.py b/preference_dataset.py
@@ -0,0 +1,118 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+import pickle
+import os
+import numpy as np
+import e2e_pb2
+from tqdm import tqdm
+import random
+
+
+class PreferenceDataset(Dataset):    
+    def __init__(self, index_file, data_dir, cache_file='preference_cache_full.pkl'):
+        self.data_dir = data_dir
+
+        if os.path.exists(cache_file):
+            print(f"Loading filtered samples from cache: {cache_file}")
+            with open(cache_file, 'rb') as f:
+                self.valid_samples = pickle.load(f)
+            print(f"Loaded {len(self.valid_samples)} valid preference trajectory samples from cache")
+            return
+
+        print("Cache not found. Filtering dataset for valid preference trajectories...")
+        with open(index_file, 'rb') as f:
+            all_indexes = pickle.load(f)
+
+        self.valid_samples = []
+
+        # Group indexes by filename
+        file_groups = {}
+        for idx, (filename, start_byte, byte_length) in enumerate(all_indexes):
+            if filename not in file_groups:
+                file_groups[filename] = []
+            file_groups[filename].append((idx, start_byte, byte_length))
+
+        # Process each file once
+        for filename, frame_list in tqdm(file_groups.items(), desc="Processing files"):
+            file_path = os.path.join(data_dir, filename)
+
+            # Check if file exists
+            if not os.path.exists(file_path):
+                print(f"WARNING: File not found: {file_path}")
+                continue
+
+            with open(file_path, 'rb') as f:
+                for original_idx, start_byte, byte_length in frame_list:
+                    # Read and parse frame
+                    f.seek(start_byte)
+                    protobuf = f.read(byte_length)
+                    frame = e2e_pb2.E2EDFrame()
+                    frame.ParseFromString(protobuf)
+                    print("num preference trajectories:", len(frame.preference_trajectories))
+
+                    # Check if this frame has valid preference trajectories
+                    for traj_idx, pref_traj in enumerate(frame.preference_trajectories):
+                        if pref_traj.preference_score >= 0:  # Valid score
+                            self.valid_samples.append({
+                                'file_info': (filename, start_byte, byte_length),
+                                'traj_idx': traj_idx,
+                                'original_idx': original_idx
+                            })
+
+        print(f"Found {len(self.valid_samples)} valid preference trajectory samples")
+
+        # Save to cache
+        print(f"Saving filtered samples to cache: {cache_file}")
+        with open(cache_file, 'wb') as f:
+            pickle.dump(self.valid_samples, f)
+        print("Cache saved!")
+
+    def __len__(self):
+        return len(self.valid_samples)
+
+    def __getitem__(self, idx):
+        sample_info = self.valid_samples[idx]
+        filename, start_byte, byte_length = sample_info['file_info']
+        traj_idx = sample_info['traj_idx']
+
+        # Read frame
+        file_path = os.path.join(self.data_dir, filename)
+        with open(file_path, 'rb') as f:
+            f.seek(start_byte)
+            protobuf = f.read(byte_length)
+            frame = e2e_pb2.E2EDFrame()
+            frame.ParseFromString(protobuf)
+
+        # Extract past states (T,6)
+        past = np.stack([
+            frame.past_states.pos_x,
+            frame.past_states.pos_y,
+            frame.past_states.vel_x,
+            frame.past_states.vel_y,
+            frame.past_states.accel_x,
+            frame.past_states.accel_y
+        ], axis=-1).astype(np.float32)
+
+        # Extract intent (one-hot encode)
+        intent = frame.intent
+        intent_onehot = np.zeros(4, dtype=np.float32)
+        intent_onehot[intent] = 1.0
+
+        # Extract preference trajectory (only pos_x, pos_y are populated)
+        pref_traj = frame.preference_trajectories[traj_idx]
+        trajectory = np.stack([
+            pref_traj.pos_x,
+            pref_traj.pos_y
+        ], axis=-1).astype(np.float32)
+
+        # Extract preference score (target)
+        score = pref_traj.preference_score
+
+        return {
+            'past_states': torch.from_numpy(past.flatten()),
+            'intent': torch.from_numpy(intent_onehot),
+            'trajectory': torch.from_numpy(trajectory.flatten()),
+            'score': torch.tensor(score, dtype=torch.float32)
+        }
diff --git a/src/camera-based-e2e/dataset/export_dataset.py b/src/camera-based-e2e/dataset/export_dataset.py
@@ -15,17 +15,18 @@
 
 def safe_name(name: str) -> str:
     # Keep filesystem-friendly ASCII.
-    return re.sub(r"[^A-Za-z0-9_.-]+", "_", name)
+    return re.sub(r"[^A-Za-z0-9_.-]+", "_", name) #substitutes any non ascii chars with _
 
 
-def camera_name(name_enum: int) -> str:
+def camera_name(name_enum: int) -> str: #which camera (front, left, etc)
     return dataset_pb2.CameraName.Name.Name(name_enum)
+    #access dataset_pb2 class, then CameraName nested class, then Name enum nested class, then Name constructor method to get string name
 
 
 def intent_name(intent_enum: int) -> str:
     return e2e_pb2.EgoIntent.Intent.Name(intent_enum)
 
-
+#NOT SURE ABOUT THIS FUNCTION
 def transform_list(transform_msg) -> list:
     return list(transform_msg.transform)
 

diff --git a/src/camera-based-e2e/depthLoss.py b/src/camera-based-e2e/depthLoss.py
@@ -0,0 +1,107 @@
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+import torch
+import torch.nn.functional as F
+
+DEPTH_MODEL_ID = "depth-anything/Depth-Anything-V2-Small-hf"
+
+class DepthLoss:
+    def __init__(self, device):
+        self.device = device
+        self.depth_processor = AutoImageProcessor.from_pretrained(DEPTH_MODEL_ID)
+        self.depth_model = AutoModelForDepthEstimation.from_pretrained(DEPTH_MODEL_ID).to(self.device)
+
+    def get_depth(self, images):
+        """
+        Compute ground truth depth from images
+
+        Args:
+            images (torch.Tensor): Input images of shape (B, C, H, W).           
+        """
+        # Preprocess images
+        inputs = self.depth_processor(images=images, return_tensors="pt").to(self.device)
+
+        # Forward pass through the depth estimation model
+        with torch.no_grad():
+            outputs = self.depth_model(**inputs)
+            predicted_depth = outputs.predicted_depth
+
+        height, width = images.shape[2], images.shape[3]
+
+        # Interpolate to original size
+        prediction = F.interpolate(
+            predicted_depth.unsqueeze(1),
+            size=(height, width),
+            mode="bicubic",
+            align_corners=False,
+        ).squeeze(1)  # (B, H, W)
+
+        return prediction
+
+    def compute_depth_loss(self, gt_images, pred_depths, loss_fn):
+        pred_depth = self.get_depth(gt_images)
+        depth_loss = loss_fn(pred_depth, pred_depths)
+
+        return depth_loss
+
+    def __call__(self, gt_images, pred_depths, loss_fn=F.l1_loss):
+        return self.compute_depth_loss(gt_images, pred_depths, loss_fn)
+
+
+if __name__ == "__main__":
+    if not torch.cuda.is_available():
+        print("Must run on GPU")
+
+    import sys
+    from pathlib import Path
+    # Determine the absolute path to the directory containing loader.py
+    # depth_loss.py is in models/losses/
+    current_dir = Path(__file__).resolve().parent
+    project_root = current_dir.parent.parent
+    sys.path.append(str(project_root))
+
+    from loader import WaymoE2E
+    loader = WaymoE2E(indexFile="index_val.pkl", data_dir="/anvil/scratch/x-mgagvani/wod/waymo_end_to_end_camera_v1_0_0/waymo_open_dataset_end_to_end_camera_v_1_0_0/", images=True)
+    data_iterator = iter(torch.utils.data.DataLoader(loader, batch_size=8, num_workers=4))
+
+    device = torch.device("cuda")
+    depth_loss_fn = DepthLoss(device)
+
+    from matplotlib import pyplot as plt
+    import numpy as np
+
+    for _ in range(6):
+        batch = next(data_iterator)
+    images = batch["IMAGES"][1].to(device)  # front camera
+
+    res = depth_loss_fn.get_depth(images)  # (B, H, W) tensor
+
+    fig, ax = plt.subplots(8, 2, figsize=(16, 48))
+    for i in range(8):
+        # Input image
+        ax[i, 0].set_title(f"Input Image {i+1}")
+        ax[i, 0].imshow(images[i].permute(1, 2, 0).cpu().numpy().astype(np.uint8))
+        ax[i, 0].axis('off')
+
+        # Predicted depth
+        ax[i, 1].set_title(f"Predicted Depth {i+1}")
+        depth_img = ax[i, 1].imshow(res[i].cpu().numpy(), cmap='plasma')
+        ax[i, 1].axis('off')
+        fig.colorbar(depth_img, ax=ax[i, 1], orientation='vertical', label='Depth')
+
+    plt.tight_layout()
+    fig.savefig("predicted_depth.png", dpi=150, bbox_inches='tight')
+
+    # throughput test
+    from time import perf_counter
+
+    start_time = perf_counter()
+    times = []
+    for _ in range(100):
+        batch = next(data_iterator)
+        images = batch["IMAGES"][1].to(device)  # front camera
+        t0 = perf_counter()
+        res = depth_loss_fn.get_depth(images)
+        times.append(perf_counter() - t0)
+    end_time = perf_counter()
+    print(f"Throughput: {100 / (end_time - start_time):.2f} batches/sec")
+    print(f"Avg depth inference batch/s: {1 / np.mean(times):.2f} FPS")
diff --git a/src/camera-based-e2e/loader.py b/src/camera-based-e2e/loader.py
@@ -1,6 +1,6 @@
 import torch
 from torch.utils.data import IterableDataset
-from waymo_open_dataset.protos import end_to_end_driving_data_pb2 as e2e_pb2
+from protos import e2e_pb2
 import torchvision
 import pickle
 import struct
@@ -11,6 +11,10 @@
 import cv2 
 from typing import Optional
 import random
+import tqdm
+import open3d as o3d
+from point_cloud import get_waymo_intrinsics, create_point_cloud
+from depthLoss import DepthLoss
 
 devices = ['cuda:0', 'cuda:1']
 
@@ -55,7 +59,7 @@ def decode_img(self, img):
         gpu_tensors_list = torchvision.io.decode_jpeg(
             img_tensor, 
             mode=torchvision.io.ImageReadMode.UNCHANGED,
-            device= 'cpu' #['cuda:0', 'cuda:1'][torch.utils.data.get_worker_info().id%2]
+            device= 'cpu', #['cuda:0', 'cuda:1'][torch.utils.data.get_worker_info().id%2]
         )
         # img_array = np.frombuffer(img, np.uint8)
         return gpu_tensors_list
@@ -96,30 +100,101 @@ def __iter__(self):
             # For submission to waymo evaluation server
             name = frame.frame.context.name
 
-            yield {'PAST': past, 'FUTURE': future, 'IMAGES': [self.decode_img(images.image) for images in frame.frame.images], 'INTENT': frame.intent, 'NAME': name}
+            intrinsics_vec = np.zeros(6, dtype=np.float32)
+
+            # Find FRONT camera (Enum 1) in the context
+            for calib in frame.frame.context.camera_calibrations:
+                if calib.name == 1: # 1 = FRONT
+                    intrinsics_vec[0] = calib.intrinsic[0] # fx
+                    intrinsics_vec[1] = calib.intrinsic[1] # fy
+                    intrinsics_vec[2] = calib.intrinsic[2] # cx
+                    intrinsics_vec[3] = calib.intrinsic[3] # cy
+                    intrinsics_vec[4] = calib.width
+                    intrinsics_vec[5] = calib.height
+                    break
+
+            decoded_images = [self.decode_img(img.image) for img in frame.frame.images]
+
+            yield {
+                'PAST': past, 
+                'FUTURE': future, 
+                'IMAGES': decoded_images, 
+                'INTRINSICS': intrinsics_vec, # <--- Passing this to main
+                'NAME': frame.frame.context.name
+            }
 
 if __name__ == "__main__":
 
     from torch.utils.data import DataLoader
     import time
     from tqdm import tqdm
     # NOTE: Replace with your path
-    DATA_DIR = '/scratch/gilbreth/mgagvani/wod/waymo_open_dataset_end_to_end_camera_v_1_0_0/'
+    DATA_DIR = '/scratch/gilbreth/svelmuru/waymo_end_to_end_dataset/waymo_open_dataset_end_to_end_camera_v_1_0_0/'
     BATCH_SIZE = 32
-    dataset = WaymoE2E(indexFile="index_train.pkl", data_dir = DATA_DIR, images=True)
+    dataset = WaymoE2E(indexFile="index_train.pkl", data_dir = DATA_DIR, images=True, n_items= 2)
     loader = DataLoader(
         dataset, 
         batch_size=BATCH_SIZE,
         num_workers=16,
     )
 
     def main():
-        # start = time.time()
-        for batch_of_frames in tqdm(loader):
-            # print(batch_of_frames["INTENT"])
-            # print(batch_of_frames.keys(), [b.shape for b in batch_of_frames.values() if isinstance(b, torch.Tensor)])
-            pass
-        # print("Total Time:", time.time()-start)
+        device = torch.device("cuda")
+        depth_model = DepthLoss(device)
+        output_dir = "visualizations"
+
+        for batch_idx, batch_of_frames in enumerate(tqdm(loader)):
+            images = batch_of_frames["IMAGES"][1].to(device)  # Shape: (B, 3, H, W)
+            intrinsics_batch = batch_of_frames["INTRINSICS"]
+            pred_depths = depth_model.get_depth(images)       # Shape: (B, H, W)
+
+            batch_size = images.shape[0] # B
+
+            for i in range(batch_size):
+                # Image: (H, W, 3) uint8
+
+                img_np = images[i].permute(1, 2, 0).cpu().numpy().copy()  
+
+                if img_np.max() <= 1.0: 
+                    img_np = (img_np * 255).astype(np.uint8)
+                else: 
+                    img_np = img_np.astype(np.uint8)
+
+                #save img
+                img_filename = f"batch_{batch_idx:04d}_img_{i:02d}.png"
+                img_path = os.path.join(output_dir, img_filename)
+
+                img_bgr = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
+                cv2.imwrite(img_path, img_bgr)
+
+                # Depth: (H, W) float32
+                depth_np = pred_depths[i].cpu().numpy()
+
+                # set camera intrinsics
+                vals = intrinsics_batch[i].numpy()
+                camera_intrinsics = o3d.camera.PinholeCameraIntrinsic()
+                camera_intrinsics.set_intrinsics(
+                    width=int(vals[4]), 
+                    height=int(vals[5]), 
+                    fx=vals[0], fy=vals[1], 
+                    cx=vals[2], cy=vals[3]
+                )
+
+                # Create Point Cloud
+                pcd = create_point_cloud(
+                    img_np, 
+                    depth_np, 
+                    intrinsics=camera_intrinsics,
+                    depth_scale=1.0
+                )
+
+                # Save to Disk
+                filename = f"batch_{batch_idx:04d}_img_{i:02d}.pcd"
+                file_path = os.path.join(output_dir, filename)
+
+                # non-blocking save command
+                o3d.io.write_point_cloud(file_path, pcd)
+
 
     import cProfile
-    main()
+    main()