Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions preference_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pickle
import os
import numpy as np
import e2e_pb2
from tqdm import tqdm
import random


class PreferenceDataset(Dataset):
def __init__(self, index_file, data_dir, cache_file='preference_cache_full.pkl'):
self.data_dir = data_dir

if os.path.exists(cache_file):
print(f"Loading filtered samples from cache: {cache_file}")
with open(cache_file, 'rb') as f:
self.valid_samples = pickle.load(f)
print(f"Loaded {len(self.valid_samples)} valid preference trajectory samples from cache")
return

print("Cache not found. Filtering dataset for valid preference trajectories...")
with open(index_file, 'rb') as f:
all_indexes = pickle.load(f)

self.valid_samples = []

# Group indexes by filename
file_groups = {}
for idx, (filename, start_byte, byte_length) in enumerate(all_indexes):
if filename not in file_groups:
file_groups[filename] = []
file_groups[filename].append((idx, start_byte, byte_length))

# Process each file once
for filename, frame_list in tqdm(file_groups.items(), desc="Processing files"):
file_path = os.path.join(data_dir, filename)

# Check if file exists
if not os.path.exists(file_path):
print(f"WARNING: File not found: {file_path}")
continue

with open(file_path, 'rb') as f:
for original_idx, start_byte, byte_length in frame_list:
# Read and parse frame
f.seek(start_byte)
protobuf = f.read(byte_length)
frame = e2e_pb2.E2EDFrame()
frame.ParseFromString(protobuf)
print("num preference trajectories:", len(frame.preference_trajectories))

# Check if this frame has valid preference trajectories
for traj_idx, pref_traj in enumerate(frame.preference_trajectories):
if pref_traj.preference_score >= 0: # Valid score
self.valid_samples.append({
'file_info': (filename, start_byte, byte_length),
'traj_idx': traj_idx,
'original_idx': original_idx
})

print(f"Found {len(self.valid_samples)} valid preference trajectory samples")

# Save to cache
print(f"Saving filtered samples to cache: {cache_file}")
with open(cache_file, 'wb') as f:
pickle.dump(self.valid_samples, f)
print("Cache saved!")

def __len__(self):
return len(self.valid_samples)

def __getitem__(self, idx):
sample_info = self.valid_samples[idx]
filename, start_byte, byte_length = sample_info['file_info']
traj_idx = sample_info['traj_idx']

# Read frame
file_path = os.path.join(self.data_dir, filename)
with open(file_path, 'rb') as f:
f.seek(start_byte)
protobuf = f.read(byte_length)
frame = e2e_pb2.E2EDFrame()
frame.ParseFromString(protobuf)

# Extract past states (T,6)
past = np.stack([
frame.past_states.pos_x,
frame.past_states.pos_y,
frame.past_states.vel_x,
frame.past_states.vel_y,
frame.past_states.accel_x,
frame.past_states.accel_y
], axis=-1).astype(np.float32)

# Extract intent (one-hot encode)
intent = frame.intent
intent_onehot = np.zeros(4, dtype=np.float32)
intent_onehot[intent] = 1.0

# Extract preference trajectory (only pos_x, pos_y are populated)
pref_traj = frame.preference_trajectories[traj_idx]
trajectory = np.stack([
pref_traj.pos_x,
pref_traj.pos_y
], axis=-1).astype(np.float32)

# Extract preference score (target)
score = pref_traj.preference_score

return {
'past_states': torch.from_numpy(past.flatten()),
'intent': torch.from_numpy(intent_onehot),
'trajectory': torch.from_numpy(trajectory.flatten()),
'score': torch.tensor(score, dtype=torch.float32)
}
7 changes: 4 additions & 3 deletions src/camera-based-e2e/dataset/export_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,18 @@

def safe_name(name: str) -> str:
# Keep filesystem-friendly ASCII.
return re.sub(r"[^A-Za-z0-9_.-]+", "_", name)
return re.sub(r"[^A-Za-z0-9_.-]+", "_", name) #substitutes any non ascii chars with _


def camera_name(name_enum: int) -> str:
def camera_name(name_enum: int) -> str: #which camera (front, left, etc)
return dataset_pb2.CameraName.Name.Name(name_enum)
#access dataset_pb2 class, then CameraName nested class, then Name enum nested class, then Name constructor method to get string name


def intent_name(intent_enum: int) -> str:
return e2e_pb2.EgoIntent.Intent.Name(intent_enum)


#NOT SURE ABOUT THIS FUNCTION
def transform_list(transform_msg) -> list:
return list(transform_msg.transform)

Expand Down
107 changes: 107 additions & 0 deletions src/camera-based-e2e/depthLoss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
import torch
import torch.nn.functional as F

DEPTH_MODEL_ID = "depth-anything/Depth-Anything-V2-Small-hf"

class DepthLoss:
def __init__(self, device):
self.device = device
self.depth_processor = AutoImageProcessor.from_pretrained(DEPTH_MODEL_ID)
self.depth_model = AutoModelForDepthEstimation.from_pretrained(DEPTH_MODEL_ID).to(self.device)

def get_depth(self, images):
"""
Compute ground truth depth from images

Args:
images (torch.Tensor): Input images of shape (B, C, H, W).
"""
# Preprocess images
inputs = self.depth_processor(images=images, return_tensors="pt").to(self.device)

# Forward pass through the depth estimation model
with torch.no_grad():
outputs = self.depth_model(**inputs)
predicted_depth = outputs.predicted_depth

height, width = images.shape[2], images.shape[3]

# Interpolate to original size
prediction = F.interpolate(
predicted_depth.unsqueeze(1),
size=(height, width),
mode="bicubic",
align_corners=False,
).squeeze(1) # (B, H, W)

return prediction

def compute_depth_loss(self, gt_images, pred_depths, loss_fn):
pred_depth = self.get_depth(gt_images)
depth_loss = loss_fn(pred_depth, pred_depths)

return depth_loss

def __call__(self, gt_images, pred_depths, loss_fn=F.l1_loss):
return self.compute_depth_loss(gt_images, pred_depths, loss_fn)


if __name__ == "__main__":
if not torch.cuda.is_available():
print("Must run on GPU")

import sys
from pathlib import Path
# Determine the absolute path to the directory containing loader.py
# depth_loss.py is in models/losses/
current_dir = Path(__file__).resolve().parent
project_root = current_dir.parent.parent
sys.path.append(str(project_root))

from loader import WaymoE2E
loader = WaymoE2E(indexFile="index_val.pkl", data_dir="/anvil/scratch/x-mgagvani/wod/waymo_end_to_end_camera_v1_0_0/waymo_open_dataset_end_to_end_camera_v_1_0_0/", images=True)
data_iterator = iter(torch.utils.data.DataLoader(loader, batch_size=8, num_workers=4))

device = torch.device("cuda")
depth_loss_fn = DepthLoss(device)

from matplotlib import pyplot as plt
import numpy as np

for _ in range(6):
batch = next(data_iterator)
images = batch["IMAGES"][1].to(device) # front camera

res = depth_loss_fn.get_depth(images) # (B, H, W) tensor

fig, ax = plt.subplots(8, 2, figsize=(16, 48))
for i in range(8):
# Input image
ax[i, 0].set_title(f"Input Image {i+1}")
ax[i, 0].imshow(images[i].permute(1, 2, 0).cpu().numpy().astype(np.uint8))
ax[i, 0].axis('off')

# Predicted depth
ax[i, 1].set_title(f"Predicted Depth {i+1}")
depth_img = ax[i, 1].imshow(res[i].cpu().numpy(), cmap='plasma')
ax[i, 1].axis('off')
fig.colorbar(depth_img, ax=ax[i, 1], orientation='vertical', label='Depth')

plt.tight_layout()
fig.savefig("predicted_depth.png", dpi=150, bbox_inches='tight')

# throughput test
from time import perf_counter

start_time = perf_counter()
times = []
for _ in range(100):
batch = next(data_iterator)
images = batch["IMAGES"][1].to(device) # front camera
t0 = perf_counter()
res = depth_loss_fn.get_depth(images)
times.append(perf_counter() - t0)
end_time = perf_counter()
print(f"Throughput: {100 / (end_time - start_time):.2f} batches/sec")
print(f"Avg depth inference batch/s: {1 / np.mean(times):.2f} FPS")
99 changes: 87 additions & 12 deletions src/camera-based-e2e/loader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import torch
from torch.utils.data import IterableDataset
from waymo_open_dataset.protos import end_to_end_driving_data_pb2 as e2e_pb2
from protos import e2e_pb2
import torchvision
import pickle
import struct
Expand All @@ -11,6 +11,10 @@
import cv2
from typing import Optional
import random
import tqdm
import open3d as o3d
from point_cloud import get_waymo_intrinsics, create_point_cloud
from depthLoss import DepthLoss

devices = ['cuda:0', 'cuda:1']

Expand Down Expand Up @@ -55,7 +59,7 @@ def decode_img(self, img):
gpu_tensors_list = torchvision.io.decode_jpeg(
img_tensor,
mode=torchvision.io.ImageReadMode.UNCHANGED,
device= 'cpu' #['cuda:0', 'cuda:1'][torch.utils.data.get_worker_info().id%2]
device= 'cpu', #['cuda:0', 'cuda:1'][torch.utils.data.get_worker_info().id%2]
)
# img_array = np.frombuffer(img, np.uint8)
return gpu_tensors_list
Expand Down Expand Up @@ -96,30 +100,101 @@ def __iter__(self):
# For submission to waymo evaluation server
name = frame.frame.context.name

yield {'PAST': past, 'FUTURE': future, 'IMAGES': [self.decode_img(images.image) for images in frame.frame.images], 'INTENT': frame.intent, 'NAME': name}
intrinsics_vec = np.zeros(6, dtype=np.float32)

# Find FRONT camera (Enum 1) in the context
for calib in frame.frame.context.camera_calibrations:
if calib.name == 1: # 1 = FRONT
intrinsics_vec[0] = calib.intrinsic[0] # fx
intrinsics_vec[1] = calib.intrinsic[1] # fy
intrinsics_vec[2] = calib.intrinsic[2] # cx
intrinsics_vec[3] = calib.intrinsic[3] # cy
intrinsics_vec[4] = calib.width
intrinsics_vec[5] = calib.height
break

decoded_images = [self.decode_img(img.image) for img in frame.frame.images]

yield {
'PAST': past,
'FUTURE': future,
'IMAGES': decoded_images,
'INTRINSICS': intrinsics_vec, # <--- Passing this to main
'NAME': frame.frame.context.name
}

if __name__ == "__main__":

from torch.utils.data import DataLoader
import time
from tqdm import tqdm
# NOTE: Replace with your path
DATA_DIR = '/scratch/gilbreth/mgagvani/wod/waymo_open_dataset_end_to_end_camera_v_1_0_0/'
DATA_DIR = '/scratch/gilbreth/svelmuru/waymo_end_to_end_dataset/waymo_open_dataset_end_to_end_camera_v_1_0_0/'
BATCH_SIZE = 32
dataset = WaymoE2E(indexFile="index_train.pkl", data_dir = DATA_DIR, images=True)
dataset = WaymoE2E(indexFile="index_train.pkl", data_dir = DATA_DIR, images=True, n_items= 2)
loader = DataLoader(
dataset,
batch_size=BATCH_SIZE,
num_workers=16,
)

def main():
# start = time.time()
for batch_of_frames in tqdm(loader):
# print(batch_of_frames["INTENT"])
# print(batch_of_frames.keys(), [b.shape for b in batch_of_frames.values() if isinstance(b, torch.Tensor)])
pass
# print("Total Time:", time.time()-start)
device = torch.device("cuda")
depth_model = DepthLoss(device)
output_dir = "visualizations"

for batch_idx, batch_of_frames in enumerate(tqdm(loader)):
images = batch_of_frames["IMAGES"][1].to(device) # Shape: (B, 3, H, W)
intrinsics_batch = batch_of_frames["INTRINSICS"]
pred_depths = depth_model.get_depth(images) # Shape: (B, H, W)

batch_size = images.shape[0] # B

for i in range(batch_size):
# Image: (H, W, 3) uint8

img_np = images[i].permute(1, 2, 0).cpu().numpy().copy()

if img_np.max() <= 1.0:
img_np = (img_np * 255).astype(np.uint8)
else:
img_np = img_np.astype(np.uint8)

#save img
img_filename = f"batch_{batch_idx:04d}_img_{i:02d}.png"
img_path = os.path.join(output_dir, img_filename)

img_bgr = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
cv2.imwrite(img_path, img_bgr)

# Depth: (H, W) float32
depth_np = pred_depths[i].cpu().numpy()

# set camera intrinsics
vals = intrinsics_batch[i].numpy()
camera_intrinsics = o3d.camera.PinholeCameraIntrinsic()
camera_intrinsics.set_intrinsics(
width=int(vals[4]),
height=int(vals[5]),
fx=vals[0], fy=vals[1],
cx=vals[2], cy=vals[3]
)

# Create Point Cloud
pcd = create_point_cloud(
img_np,
depth_np,
intrinsics=camera_intrinsics,
depth_scale=1.0
)

# Save to Disk
filename = f"batch_{batch_idx:04d}_img_{i:02d}.pcd"
file_path = os.path.join(output_dir, filename)

# non-blocking save command
o3d.io.write_point_cloud(file_path, pcd)


import cProfile
main()
main()
Loading