diff --git a/gen2-license-plate-recognition/main.py b/gen2-license-plate-recognition/main.py index b10558cc0..0f152f0f0 100755 --- a/gen2-license-plate-recognition/main.py +++ b/gen2-license-plate-recognition/main.py @@ -6,7 +6,7 @@ import cv2 import depthai as dai import numpy as np -from depthai_sdk import FPSHandler +from depthai_sdk.fps import FPSHandler parser = argparse.ArgumentParser() parser.add_argument('-nd', '--no-debug', action="store_true", help="Prevent debug output") diff --git a/gen2-license-plate-recognition/requirements.txt b/gen2-license-plate-recognition/requirements.txt index f8ab811d8..a4e74fcbf 100644 --- a/gen2-license-plate-recognition/requirements.txt +++ b/gen2-license-plate-recognition/requirements.txt @@ -1,4 +1,4 @@ opencv-python==4.5.1.48 depthai==2.16.0.0 blobconverter==1.2.8 -depthai-sdk==1.2.0 +depthai-sdk==1.9.6 diff --git a/gen2-record-replay/README.md b/gen2-record-replay/README.md index 03935f924..ec141e6e6 100644 --- a/gen2-record-replay/README.md +++ b/gen2-record-replay/README.md @@ -2,13 +2,15 @@ These tools allow you to record encoded and synced camera streams and replay them, including reconstructing the stereo depth perception. +`record.py` and `replay.py` are using [Record](https://docs.luxonis.com/projects/sdk/en/latest/features/recording/) and [Replay](https://docs.luxonis.com/projects/sdk/en/latest/features/replaying/) DepthAI SDK functionalities under the hood. + ### Recording When running `record.py`, it will record encoded streams from all devices and will synchronize all streams across all devices on the host. Recordings will be saved in the specified folder (with `-p`, by default that folder is `recordings/`). Recordings will be saved as: -- By default: MJPEG (motion JPEG) files or H265, depending on the quality of the recording You can manually use `ffmpeg` to convert these `.mjpeg` recordings to `.mp4` -- If [PyAv](https://github.com/PyAV-Org/PyAV) is installed: It will save encoded streames directly into `.mp4` containers. Install PyAv with `python3 -mpip install av`. This will allow you to watch videos with a standard video player. More [info here](../gen2-container-encoding). -- If depth is enabled: Program will save depth into rosbag (`.bag`), which you can open with [RealSense Viewer](https://www.intelrealsense.com/sdk-2/#sdk2-tools) (image below) -- If `-mcap` is enabled, depthai-record will record selected streams into [mcap file](https://github.com/foxglove/mcap) and can be viewed with [Foxglove studio](https://foxglove.dev/). Depth is converted to pointcloud on the host before being saved. Standalone Foxglove studio streaming demo can be [found here](../gen2-foxglove/). +- By default: MJPEG (motion JPEG) files or H265, depending on the quality of the recording You can manually use `ffmpeg` to convert these `.mjpeg` recordings to `.mp4`. + - Make sure to install the `pip install depthai-sdk[record]`, as this will also install the required `av` ([PyAv](https://github.com/PyAV-Org/PyAV)) library for saving encoded frames directly into container. This will allow you to watch videos with a standard video player. More [info here](../gen2-container-encoding). +- If depth is enabled: Program will save depth into rosbag (`.bag`) or DB3. + ![depth gif](https://user-images.githubusercontent.com/18037362/141661982-f206ed61-b505-4b17-8673-211a4029754b.gif) @@ -21,49 +23,40 @@ python record.py [arguments] **Optional arguments:** - `-p / --path`: Folder path where recordings will be saved. Default: `recordings/`. -- `-save / --save`: Choose which streams to save. Currently supported: `color`, `left`, `right`, `disparity`, `depth` (.bag or .mcap), `pointcloud` (.mcap) -- `-f / --fps`: Camera sensor FPS, applied to all cameras +- `-save / --save`: Choose which streams to save. Currently supported: `color`, `left`, `right`, `disparity`, `depth` (rosbag or db3) +- `--fps`: Camera sensor FPS, applied to all cameras - `-q / --quality`: Selects the quality of the encoded streams that are being recording. It can either be `BEST` (lossless encoding), `HIGH`, `MEDIUM` or `LOW`. More information regarding **file sizes and quality of recordings** can be [found here](encoding_quality/README.md). Default: `HIGH`. If integer 0..100 is used, MJPEG encoding will be used and the MJPEG quality will be set to the value specified. -- `-fc / --frame_cnt`: Number of frames to record. App will record until it's stopped (CTRL+C) by default. If you select eg. `-fc 300 --fps 30`, recording will be of 300 frames (of each stream), for a total of 10 seconds. -- `-tl / --timelapse`: Number of seconds between saved frames, which is used for timelapse recording. By default, timelapse is disabled. -- `-mcap / --mcap`: Record all streams into the .mcap file, so it can be viewed with [Foxglove Studio](https://foxglove.dev/) +- `-type`: Either `VIDEO` (default), `ROSBAG`, or `DB3`. +- `--disable_preview` - Disable preview output to reduce resource usage. By default, all streams being saved are displayed. + + ### Replaying -`replay.py` is a demo script that runs Spatial MobileNet network. It will reconstruct stereo depth perception, which will allow it to calculate spatial coordinates as well. +`replay.py` is a demo script that runs Spatial MobileNet network. It will reconstruct stereo depth perception (using [DepthAI SDK's Replay](https://docs.luxonis.com/projects/sdk/en/latest/features/replaying/) functionality), which will allow it to calculate spatial coordinates as well. #### Replay usage -`Replay` class (from `libraries/depthai_replay.py`) will read `recordings` and send recorded and synced frames back to the device to reconstruct the stereo depth perception. - -There are a few things you can specify when using the `Replay` class: +DepthAI SDK's [Replay functionality](https://docs.luxonis.com/projects/sdk/en/latest/features/replaying/) will read `recordings` and send frames back to the device to replay the whole pipeline, including reconstruction of stereo depth perception. -```pyhton -# First initialize the Replay object, passing path to the depthai_recording -replay = Replay(path) +```python +from depthai_sdk import OakCamera +# Here, instead of using one of the public depthai recordings +# https://docs.luxonis.com/projects/sdk/en/latest/features/replaying/#public-depthai-recordings +# We can specify path to our recording, eg. OakCamera(replay='recordings/1-184430102127631200') +with OakCamera(replay='path/to/recording') as oak: + oak.replay.set_loop(True) + left = oak.create_camera('CAM_A') # path/to/recording/CAM_A.mp4 + right = oak.create_camera('CAM_C') # path/to/recording/CAM_C.mp4 -# Resize color frames prior to sending them to the device -replay.set_resize_color((width, height)) + # Reconstruct stereo depth from the recording + stereo = oak.create_stereo(left=left, right=right) -# Keep aspect ratio when resizing the color frames. This will crop -# the color frame to the desired aspect ratio (in our case 300x300) -# It's set to True by default. Setting it to False will squish the image, -# but will preserve the full FOV -replay.keep_aspect_ratio(False) + # Run Spatial object detection on right video stream + nn = oak.create_nn('yolov7tiny_coco_640x352', right, spatial=stereo) -# Don't read/stream recorded disparity -replay.disable_stream("disparity", disable_reading=True) -# Read but don't stream recorded depth -replay.disable_stream("depth") -``` -#### Replay usage + oak.visualize(nn) # Show spatial detections visualized on CAM_C video ``` -usage: replay.py -p PATH - -optional arguments: - -p PATH, --path PATH Path where to store the captured data -``` - ## Pre-requisites ``` diff --git a/gen2-record-replay/record.py b/gen2-record-replay/record.py index fe482b136..2bb7b9e28 100644 --- a/gen2-record-replay/record.py +++ b/gen2-record-replay/record.py @@ -1,20 +1,13 @@ #!/usr/bin/env python3 -from multiprocessing.sharedctypes import Value import depthai as dai -import contextlib -import math -import time from pathlib import Path import signal -import threading - -# DepthAI Record library -from depthai_sdk import Record, EncodingQuality -from depthai_sdk.managers import ArgsManager import argparse +from depthai_sdk.components.parser import parse_camera_socket +from depthai_sdk import OakCamera, ArgsParser, RecordType +import threading -_save_choices = ("color", "left", "right", "disparity", "depth", "pointcloud") # TODO: IMU/ToF... -_quality_choices = tuple(str(q).split('.')[1] for q in EncodingQuality) +_quality_choices = ['BEST', 'HIGH', 'MEDIUM', 'LOW'] def checkQuality(value: str): if value.upper() in _quality_choices: @@ -23,123 +16,94 @@ def checkQuality(value: str): num = int(value) if 0 <= num <= 100: return num - raise argparse.ArgumentTypeError(f"{value} is not a valid quality. Either use number 0-100 or {'/'.join(_quality_choices)}.") + raise argparse.ArgumentTypeError(f"{value} is not a valid quality. Either {'/'.join(_quality_choices)}, or a number 0-100.") -parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) +# parse arguments +parser = argparse.ArgumentParser() parser.add_argument('-p', '--path', default="recordings", type=str, help="Path where to store the captured data") -parser.add_argument('-save', '--save', default=["color", "left", "right"], nargs="+", choices=_save_choices, - help="Choose which streams to save. Default: %(default)s") -# parser.add_argument('-f', '--fps', type=float, default=30, -# help='Camera sensor FPS, applied to all cams') +parser.add_argument('-save', '--save', default=["color", "left", "right"], nargs="+", help="Choose which streams to save. Default: %(default)s") +# parser.add_argument('-fc', '--frame_cnt', type=int, default=-1, +# help='Number of frames to record. Record until stopped by default.') parser.add_argument('-q', '--quality', default="HIGH", type=checkQuality, help='Selects the quality of the recording. Default: %(default)s') -parser.add_argument('-fc', '--frame_cnt', type=int, default=-1, - help='Number of frames to record. Record until stopped by default.') -parser.add_argument('-tl', '--timelapse', type=int, default=-1, - help='Number of seconds between frames for timelapse recording. Default: timelapse disabled') -parser.add_argument('-mcap', '--mcap', action="store_true", help="MCAP file format") - -args = ArgsManager.parseArgs(parser) -if args.rgbFps != args.monoFps: +parser.add_argument('-type', '--type', default="VIDEO", help="Recording type. Default: %(default)s", choices=['VIDEO', 'ROSBAG', 'DB3']) +parser.add_argument('--disable_preview', action='store_true', help="Disable preview output to reduce resource usage. By default, all streams are displayed.") + +args = ArgsParser.parseArgs(parser) + +sockets = [] +for i, stream in enumerate(args['save']): + stream: str = stream.lower() + args['save'][i] = stream + if stream in ['disparity', 'depth']: + # All good + continue + sockets.append(parse_camera_socket(stream)) + +if args['rgbFps'] != args['monoFps']: raise ValueError('RGB and MONO FPS must be the same when recording for now!') -args.fps = args.rgbFps -# TODO: make camera resolutions configrable -save_path = Path.cwd() / args.path - -# Host side sequence number syncing -def checkSync(queues, sequenceNum: int): - matching_frames = [] - for q in queues: - for i, msg in enumerate(q['msgs']): - if msg.getSequenceNum() == sequenceNum: - matching_frames.append(i) - break - - if len(matching_frames) == len(queues): - # We have all frames synced. Remove the excess ones - for i, q in enumerate(queues): - q['msgs'] = q['msgs'][matching_frames[i]:] - return True - else: - return False - -def run(): - with contextlib.ExitStack() as stack: - # Record from all available devices - device_infos = dai.Device.getAllAvailableDevices() - - if len(device_infos) == 0: - raise RuntimeError("No devices found!") - else: - print("Found", len(device_infos), "devices") - - devices = [] - # TODO: allow users to specify which available devices should record - for device_info in device_infos: - openvino_version = dai.OpenVINO.Version.VERSION_2021_4 - device = stack.enter_context(dai.Device(openvino_version, device_info, usb2Mode=False)) - - # Create recording object for this device - recording = Record(save_path, device, args) - # Set recording configuration - # TODO: add support for specifying resolution - recording.setTimelapse(args.timelapse) - recording.setRecordStreams(args.save) - recording.setQuality(args.quality) - recording.setMcap(args.mcap) - - devices.append(recording) - - for recording in devices: - recording.start() # Start recording - - timelapse = 0 - def roundUp(value, divisibleBy: float): - return int(divisibleBy * math.ceil(value / divisibleBy)) - # If H265, we want to start recording with the keyframe (default keyframe freq is 30 frames) - SKIP_FRAMES = roundUp(1.5 * args.fps, 30 if args.quality == "LOW" else 1) - args.frame_cnt += SKIP_FRAMES - - # Terminate app handler - quitEvent = threading.Event() - signal.signal(signal.SIGTERM, lambda *_args: quitEvent.set()) - print("\nRecording started. Press 'Ctrl+C' to stop.") - - while not quitEvent.is_set(): - try: - for recording in devices: - if 0 < args.timelapse and time.time() - timelapse < args.timelapse: - continue - # Loop through device streams - for q in recording.queues: - new_msg = q['q'].tryGet() - if new_msg is not None: - q['msgs'].append(new_msg) - if checkSync(recording.queues, new_msg.getSequenceNum()): - # Wait for Auto focus/exposure/white-balance - recording.frameCntr += 1 - if recording.frameCntr <= SKIP_FRAMES: # 1.5 sec - continue - # Timelapse - if 0 < args.timelapse: timelapse = time.time() - if args.frame_cnt == recording.frameCntr: - quitEvent.set() - - frames = dict() - for stream in recording.queues: - frames[stream['name']] = stream['msgs'].pop(0) - recording.frame_q.put(frames) - - time.sleep(0.001) # 1ms, avoid lazy looping - except KeyboardInterrupt: - break - - print('') # For new line in terminal - for recording in devices: - recording.frame_q.put(None) - recording.process.join() # Terminate the process - print("All recordings have stopped successfuly. Exiting the app.") - -if __name__ == '__main__': - run() +def create_cam(socket: dai.CameraBoardSocket): + if args['quality'] == 'LOW': + cam = oak.create_camera(socket, encode=dai.VideoEncoderProperties.Profile.H265_MAIN) + cam.config_encoder_h26x(bitrate_kbps=10000) + return cam + + cam = oak.create_camera(socket, encode=dai.VideoEncoderProperties.Profile.MJPEG) + + if args['quality'].isdigit(): + cam.config_encoder_mjpeg(quality=int(args['quality'])) + elif args['quality'] == 'BEST': + cam.config_encoder_mjpeg(lossless=True) + elif args['quality'] == 'HIGH': + cam.config_encoder_mjpeg(quality=97) + elif args['quality'] == 'MEDIUM': + cam.config_encoder_mjpeg(quality=93) + return cam + +save_path = Path(__file__).parent / args['path'] + +print('save path', save_path) + +with OakCamera(args=args) as oak: + calib = oak.device.readCalibrationOrDefault() + + recording_list = [] + + if 'disparity' in args['save'] or 'depth' in args['save']: + left_socket = calib.getStereoLeftCameraId() + right_socket = calib.getStereoRightCameraId() + + left = create_cam(left_socket) + right = create_cam(right_socket) + + if left_socket in sockets: + sockets.remove(left_socket) + recording_list.append(left) + if right_socket in sockets: + sockets.remove(right_socket) + recording_list.append(right) + + stereo = oak.create_stereo(left=left, right=right) + + if 'disparity' in args['save']: + recording_list.append(stereo.out.disparity) + if 'depth' in args['save']: + recording_list.append(stereo.out.depth) + + for socket in sockets: + cam = create_cam(socket) + recording_list.append(cam) + if not args['disable_preview']: + oak.visualize(cam, scale=2/3, fps=True) + + oak.record(recording_list, path=save_path, record_type=getattr(RecordType, args['type'])) + + oak.start(blocking=False) + + quitEvent = threading.Event() + signal.signal(signal.SIGTERM, lambda *_args: quitEvent.set()) + print("\nRecording started. Press 'Ctrl+C' to stop.") + + while oak.running() and not quitEvent.is_set(): + oak.poll() diff --git a/gen2-record-replay/replay.py b/gen2-record-replay/replay.py index f9f5cca78..b6d0117c1 100644 --- a/gen2-record-replay/replay.py +++ b/gen2-record-replay/replay.py @@ -1,101 +1,21 @@ #!/usr/bin/env python3 -import argparse -import cv2 -import depthai as dai -import blobconverter -import numpy as np -from depthai_sdk import Replay -from depthai_sdk.utils import frameNorm, cropToAspectRatio - -labelMap = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", - "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] - -parser = argparse.ArgumentParser() -parser.add_argument('-p', '--path', default="data", type=str, help="Path where to store the captured data") -args = parser.parse_args() - -# Create Replay objects -replay = Replay(args.path) - -replay.disableStream('depth') # In case depth was saved (mcap) -# Resize color frames prior to sending them to the device -replay.setResizeColor((304, 304)) -# Keep aspect ratio when resizing the color frames. This will crop -# the color frame to the desired aspect ratio (in our case 300x300) -replay.keepAspectRatio(True) - -# Initializes the pipeline. This will create required XLinkIn's and connect them together -# Creates StereoDepth node, if both left and right streams are recorded -pipeline, nodes = replay.initPipeline() - -nodes.stereo.setSubpixel(True) - -manip = pipeline.create(dai.node.ImageManip) -manip.initialConfig.setResize(300,300) -manip.setMaxOutputFrameSize(300*300*3) -nodes.color.out.link(manip.inputImage) - -nn = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork) -nn.setBoundingBoxScaleFactor(0.3) -nn.setDepthLowerThreshold(100) -nn.setDepthUpperThreshold(5000) - -nn.setBlobPath(blobconverter.from_zoo(name="mobilenet-ssd", shaves=6)) -nn.setConfidenceThreshold(0.5) -nn.input.setBlocking(False) - -# Link required inputs to the Spatial detection network -manip.out.link(nn.input) -nodes.stereo.depth.link(nn.inputDepth) - -detOut = pipeline.create(dai.node.XLinkOut) -detOut.setStreamName("det_out") -nn.out.link(detOut.input) - -depthOut = pipeline.create(dai.node.XLinkOut) -depthOut.setStreamName("depth_out") -nodes.stereo.disparity.link(depthOut.input) - -with dai.Device(pipeline) as device: - replay.createQueues(device) - - depthQ = device.getOutputQueue(name="depth_out", maxSize=4, blocking=False) - detQ = device.getOutputQueue(name="det_out", maxSize=4, blocking=False) - - disparityMultiplier = 255 / nodes.stereo.initialConfig.getMaxDisparity() - color = (255, 0, 0) - # Read rgb/mono frames, send them to device and wait for the spatial object detection results - while replay.sendFrames(): - rgbFrame = cropToAspectRatio(replay.frames['color'], (300,300)) - - depthFrame = depthQ.get().getFrame() - depthFrameColor = (depthFrame*disparityMultiplier).astype(np.uint8) - # depthFrameColor = cv2.normalize(depthFrame, None, 255, 0, cv2.NORM_INF, cv2.CV_8UC1) - # depthFrameColor = cv2.equalizeHist(depthFrameColor) - depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_JET) - - inDet = detQ.tryGet() - if inDet is not None: - # Display (spatial) object detections on the color frame - for detection in inDet.detections: - # Denormalize bounding box - bbox = frameNorm(rgbFrame, (detection.xmin, detection.ymin, detection.xmax, detection.ymax)) - try: - label = labelMap[detection.label] - except: - label = detection.label - cv2.putText(rgbFrame, str(label), (bbox[0] + 10, bbox[1] + 20), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) - cv2.putText(rgbFrame, "{:.2f}".format(detection.confidence*100), (bbox[0] + 10, bbox[1] + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) - cv2.putText(rgbFrame, f"X: {int(detection.spatialCoordinates.x)} mm", (bbox[0] + 10, bbox[1] + 50), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) - cv2.putText(rgbFrame, f"Y: {int(detection.spatialCoordinates.y)} mm", (bbox[0] + 10, bbox[1] + 65), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) - cv2.putText(rgbFrame, f"Z: {int(detection.spatialCoordinates.z)} mm", (bbox[0] + 10, bbox[1] + 80), cv2.FONT_HERSHEY_TRIPLEX, 0.5, color) - - cv2.rectangle(rgbFrame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, cv2.FONT_HERSHEY_SIMPLEX) - - cv2.imshow("rgb", rgbFrame) - cv2.imshow("depth", depthFrameColor) - - if cv2.waitKey(1) == ord('q'): - break - print('End of the recording') - +from depthai_sdk import OakCamera + +# Here, instead of using one of the public depthai recordings +# https://docs.luxonis.com/projects/sdk/en/latest/features/replaying/#public-depthai-recordings +# We can specify path to our recording, eg. OakCamera(replay='recordings/1-184430102127631200') +with OakCamera(replay='people-tracking-above-05') as oak: + oak.replay.set_loop(True) + left = oak.create_camera('CAM_A') # CAM_A.mp4 + right = oak.create_camera('CAM_C') # CAM_C.mp4 + + # TODO: Use a better suited model that was specifically trained on top-down view images of people. + nn = oak.create_nn('yolov8n_coco_640x352', right, tracker=True) + + stereo = oak.create_stereo(left=left, right=right) + stereo.config_stereo(lr_check=True) + oak.visualize([stereo.out.rectified_right], fps=True) + oak.visualize(stereo.out.depth, fps=True) + + oak.visualize(nn, fps=True) + oak.start(blocking=True) diff --git a/gen2-record-replay/requirements.txt b/gen2-record-replay/requirements.txt index 79b438fb5..4b97e7bdc 100644 --- a/gen2-record-replay/requirements.txt +++ b/gen2-record-replay/requirements.txt @@ -1,5 +1 @@ -opencv-python -numpy -depthai==2.17.0.0 -blobconverter>=1.2.8 -depthai-sdk==1.2.3 \ No newline at end of file +depthai-sdk[record]==1.11 \ No newline at end of file