diff --git a/lib/ex_webrtc/rtp/depayloader.ex b/lib/ex_webrtc/rtp/depayloader.ex index 79b067cd..de92e4dc 100644 --- a/lib/ex_webrtc/rtp/depayloader.ex +++ b/lib/ex_webrtc/rtp/depayloader.ex @@ -36,6 +36,7 @@ defmodule ExWebRTC.RTP.Depayloader do defp to_depayloader_module(mime_type) do case String.downcase(mime_type) do "video/vp8" -> {:ok, ExWebRTC.RTP.Depayloader.VP8} + "video/h264" -> {:ok, ExWebRTC.RTP.Depayloader.H264} "audio/opus" -> {:ok, ExWebRTC.RTP.Depayloader.Opus} "audio/pcma" -> {:ok, ExWebRTC.RTP.Depayloader.G711} "audio/pcmu" -> {:ok, ExWebRTC.RTP.Depayloader.G711} diff --git a/lib/ex_webrtc/rtp/h264/depayloader.ex b/lib/ex_webrtc/rtp/h264/depayloader.ex new file mode 100644 index 00000000..0c2c6fa7 --- /dev/null +++ b/lib/ex_webrtc/rtp/h264/depayloader.ex @@ -0,0 +1,109 @@ +defmodule ExWebRTC.RTP.Depayloader.H264 do + @moduledoc false + # Extracts H264 NAL Units from RTP packets. + # + # Based on [RFC 6184](https://tools.ietf.org/html/rfc6184). + # + # Supported types: Single NALU, FU-A, STAP-A. + + @behaviour ExWebRTC.RTP.Depayloader.Behaviour + + require Logger + + alias ExWebRTC.RTP.H264.{FU, NAL, StapA} + + @annexb_prefix <<1::32>> + + @type t() :: %__MODULE__{ + current_timestamp: non_neg_integer() | nil, + fu_parser_acc: [binary()] + } + + defstruct current_timestamp: nil, fu_parser_acc: [] + + @impl true + def new() do + %__MODULE__{} + end + + @impl true + def depayload(depayloader, %ExRTP.Packet{payload: <<>>, padding: true}), do: {nil, depayloader} + + def depayload(depayloader, packet) do + with {:ok, {header, _payload} = nal} <- NAL.Header.parse_unit_header(packet.payload), + unit_type = NAL.Header.decode_type(header), + {:ok, {nal, depayloader}} <- + do_depayload(unit_type, depayloader, packet, nal) do + {nal, depayloader} + else + {:error, reason} -> + Logger.warning(""" + Couldn't parse payload, reason: #{reason}. \ + Resetting depayloader state. Payload: #{inspect(packet.payload)}.\ + """) + + {nil, %{depayloader | current_timestamp: nil, fu_parser_acc: []}} + end + end + + defp do_depayload(:single_nalu, depayloader, packet, {_header, payload}) do + {:ok, + {prefix_annexb(payload), %__MODULE__{depayloader | current_timestamp: packet.timestamp}}} + end + + defp do_depayload( + :fu_a, + %{current_timestamp: current_timestamp, fu_parser_acc: fu_parser_acc}, + packet, + {_header, _payload} + ) + when fu_parser_acc != [] and current_timestamp != packet.timestamp do + Logger.warning(""" + received packet with fu-a type payload that is not a start of fragmentation unit with timestamp \ + different than last start and without finishing the previous fu. dropping fu.\ + """) + + {:error, "invalid timestamp inside fu-a"} + end + + defp do_depayload( + :fu_a, + %{fu_parser_acc: fu_parser_acc}, + packet, + {header, payload} + ) do + case FU.parse(payload, fu_parser_acc || []) do + {:ok, {data, type}} -> + data = NAL.Header.add_header(data, 0, header.nal_ref_idc, type) + + {:ok, + {prefix_annexb(data), + %__MODULE__{current_timestamp: packet.timestamp, fu_parser_acc: []}}} + + {:incomplete, fu} -> + {:ok, {nil, %__MODULE__{fu_parser_acc: fu, current_timestamp: packet.timestamp}}} + + {:error, _reason} = error -> + error + end + end + + defp do_depayload(:stap_a, depayloader, packet, {_header, payload}) do + with {:ok, result} <- StapA.parse(payload) do + nals = result |> Stream.map(&prefix_annexb/1) |> Enum.join() + {:ok, {nals, %__MODULE__{depayloader | current_timestamp: packet.timestamp}}} + end + end + + defp do_depayload(unsupported_type, _depayloader, _packet, _nal) do + Logger.warning(""" + Received packet with unsupported NAL type: #{unsupported_type}. Supported types are: Single NALU, STAP-A, FU-A. Dropping packet. + """) + + {:error, "Unsupported nal type #{unsupported_type}"} + end + + defp prefix_annexb(nal) do + @annexb_prefix <> nal + end +end diff --git a/lib/ex_webrtc/rtp/h264/nal_formats/fu.ex b/lib/ex_webrtc/rtp/h264/nal_formats/fu.ex new file mode 100644 index 00000000..18bdc68c --- /dev/null +++ b/lib/ex_webrtc/rtp/h264/nal_formats/fu.ex @@ -0,0 +1,48 @@ +defmodule ExWebRTC.RTP.H264.FU do + @moduledoc """ + Module responsible for parsing H264 Fragmentation Unit. + """ + alias __MODULE__ + alias ExWebRTC.RTP.H264.NAL + + @doc """ + Parses H264 Fragmentation Unit + + If a packet that is being parsed is not considered last then a `{:incomplete, t()}` + tuple will be returned. + In case of last packet `{:ok, {type, data}}` tuple will be returned, where data + is `NAL Unit` created by concatenating subsequent Fragmentation Units. + """ + @spec parse(binary(), [binary()]) :: + {:ok, {binary(), NAL.Header.rbsp_type()}} + | {:error, :packet_malformed | :invalid_first_packet} + | {:incomplete, [binary()]} + def parse(packet, acc) do + with {:ok, {header, value}} <- FU.Header.parse(packet) do + do_parse(header, value, acc) + end + end + + defp do_parse(header, packet, acc) + + defp do_parse(%FU.Header{start_bit: true}, data, []), + do: {:incomplete, [data]} + + defp do_parse(%FU.Header{start_bit: true}, _data, _acc), + do: {:error, :last_fu_not_finished} + + defp do_parse(%FU.Header{start_bit: false}, _data, []), + do: {:error, :invalid_first_packet} + + defp do_parse(%FU.Header{end_bit: true, type: type}, data, acc_data) do + result = + [data | acc_data] + |> Enum.reverse() + |> Enum.join() + + {:ok, {result, type}} + end + + defp do_parse(_header, data, acc_data), + do: {:incomplete, [data | acc_data]} +end diff --git a/lib/ex_webrtc/rtp/h264/nal_formats/fu/header.ex b/lib/ex_webrtc/rtp/h264/nal_formats/fu/header.ex new file mode 100644 index 00000000..037eb978 --- /dev/null +++ b/lib/ex_webrtc/rtp/h264/nal_formats/fu/header.ex @@ -0,0 +1,58 @@ +defmodule ExWebRTC.RTP.H264.FU.Header do + @moduledoc """ + Defines a structure representing Fragmentation Unit (FU) header + which is defined in [RFC6184](https://tools.ietf.org/html/rfc6184#page-31) + + ``` + +---------------+ + |0|1|2|3|4|5|6|7| + +-+-+-+-+-+-+-+-+ + |S|E|R| Type | + +---------------+ + ``` + """ + + alias ExWebRTC.RTP.H264.NAL + + @typedoc """ + MUST be set to true only in the first packet in a sequence. + """ + @type start_flag :: boolean() + + @typedoc """ + MUST be set to true only in the last packet in a sequence. + """ + @type end_flag :: boolean() + + @enforce_keys [:type] + defstruct start_bit: false, end_bit: false, type: 0 + + @type t :: %__MODULE__{ + start_bit: start_flag(), + end_bit: end_flag(), + type: NAL.Header.rbsp_type() + } + + defguardp valid_frame_boundary(start, finish) when start != 1 or finish != 1 + + @doc """ + Parses Fragmentation Unit Header + + It will fail if the Start bit and End bit are both set to one in the + same Fragmentation Unit Header, because a fragmented NAL unit + MUST NOT be transmitted in one FU. + """ + @spec parse(data :: binary()) :: {:error, :packet_malformed} | {:ok, {t(), nal :: binary()}} + def parse(<>) + when nal_type in 1..23 and valid_frame_boundary(start, finish) do + header = %__MODULE__{ + start_bit: start == 1, + end_bit: finish == 1, + type: nal_type + } + + {:ok, {header, rest}} + end + + def parse(_binary), do: {:error, :packet_malformed} +end diff --git a/lib/ex_webrtc/rtp/h264/nal_formats/stap_a.ex b/lib/ex_webrtc/rtp/h264/nal_formats/stap_a.ex new file mode 100644 index 00000000..b2c30227 --- /dev/null +++ b/lib/ex_webrtc/rtp/h264/nal_formats/stap_a.ex @@ -0,0 +1,39 @@ +defmodule ExWebRTC.RTP.H264.StapA do + @moduledoc """ + Module responsible for parsing Single Time Agregation Packets type A. + + Documented in [RFC6184](https://tools.ietf.org/html/rfc6184#page-22) + + ``` + 0 1 2 3 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | RTP Header | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |STAP-A NAL HDR | NALU 1 Size | NALU 1 HDR | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | NALU 1 Data | + : : + + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | NALU 2 Size | NALU 2 HDR | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | NALU 2 Data | + : : + | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | :...OPTIONAL RTP padding | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + ``` + """ + + @spec parse(binary()) :: {:ok, [binary()]} | {:error, :packet_malformed} + def parse(data) do + do_parse(data, []) + end + + defp do_parse(<<>>, acc), do: {:ok, Enum.reverse(acc)} + + defp do_parse(<>, acc), + do: do_parse(rest, [nalu | acc]) + + defp do_parse(_data, _acc), do: {:error, :packet_malformed} +end diff --git a/lib/ex_webrtc/rtp/h264/nal_header.ex b/lib/ex_webrtc/rtp/h264/nal_header.ex new file mode 100644 index 00000000..02a1cfe3 --- /dev/null +++ b/lib/ex_webrtc/rtp/h264/nal_header.ex @@ -0,0 +1,97 @@ +defmodule ExWebRTC.RTP.H264.NAL.Header do + @moduledoc """ + Defines a structure representing Network Abstraction Layer Unit Header + + Defined in [RFC 6184](https://tools.ietf.org/html/rfc6184#section-5.3) + + ``` + +---------------+ + |0|1|2|3|4|5|6|7| + +-+-+-+-+-+-+-+-+ + |F|NRI| Type | + +---------------+ + ``` + """ + + @typedoc """ + NRI stands for nal_ref_idc. This value represents importance of + frame that is being parsed. + + The higher the value the more important frame is (for example key + frames have nri value of 3) and a value of 00 indicates that the + content of the NAL unit is not used to reconstruct reference pictures + for inter picture prediction. NAL units with NRI equal 0 can be discarded + without risking the integrity of the reference pictures, although these + payloads might contain metadata. + """ + @type nri :: 0..3 + + @typedoc """ + Specifies the type of RBSP (Raw Byte Sequence Payload) data structure contained in the NAL unit. + + Types are defined as follows. + + | ID | RBSP Type | + |----------|----------------| + | 0 | Unspecified | + | 1-23 | NAL unit types | + | 24 | STAP-A | + | 25 | STAP-B | + | 26 | MTAP-16 | + | 27 | MTAP-24 | + | 28 | FU-A | + | 29 | FU-B | + | Reserved | 30-31 | + + """ + @type rbsp_type :: 1..31 + @type supported_types :: :stap_a | :fu_a | :single_nalu + @type unsupported_types :: :stap_b | :mtap_16 | :mtap_24 | :fu_b + @type types :: supported_types | unsupported_types | :reserved + + defstruct [:nal_ref_idc, :type] + + @type t :: %__MODULE__{ + nal_ref_idc: nri(), + type: rbsp_type() + } + + @spec parse_unit_header(binary()) :: {:error, :malformed_data} | {:ok, {t(), binary()}} + def parse_unit_header(raw_nal) + + def parse_unit_header(<<0::1, nri::2, type::5, rest::binary>>) do + nal = %__MODULE__{ + nal_ref_idc: nri, + type: type + } + + {:ok, {nal, rest}} + end + + # If first bit is not set to 0 packet is flagged as malformed + def parse_unit_header(_binary), do: {:error, :malformed_data} + + @doc """ + Adds NAL header to payload + """ + @spec add_header(binary(), 0 | 1, nri(), rbsp_type()) :: binary() + def add_header(payload, f, nri, type), + do: <> <> payload + + @doc """ + Parses type stored in NAL Header + """ + @spec decode_type(t) :: types() + def decode_type(%__MODULE__{type: type}), do: do_decode_type(type) + + defp do_decode_type(number) when number in 1..21, do: :single_nalu + defp do_decode_type(number) when number in [22, 23], do: :reserved + defp do_decode_type(24), do: :stap_a + defp do_decode_type(25), do: :stap_b + defp do_decode_type(26), do: :mtap_16 + defp do_decode_type(27), do: :mtap_24 + defp do_decode_type(28), do: :fu_a + defp do_decode_type(29), do: :fu_b + defp do_decode_type(number) when number in [30, 31], do: :reserved + defp do_decode_type(_), do: :invalid +end diff --git a/test/ex_webrtc/rtp/depayloader_test.exs b/test/ex_webrtc/rtp/depayloader_test.exs index f25df040..448dc043 100644 --- a/test/ex_webrtc/rtp/depayloader_test.exs +++ b/test/ex_webrtc/rtp/depayloader_test.exs @@ -75,9 +75,18 @@ defmodule ExWebRTC.RTP.DepayloaderTest do Depayloader.DTMF.depayload(depayloader, @packet) end + test "creates a H264 depayloader and dispatches calls to its module" do + assert {:ok, depayloader} = + %RTPCodecParameters{payload_type: 97, mime_type: "video/H264", clock_rate: 90_000} + |> Depayloader.new() + + assert Depayloader.depayload(depayloader, @packet) == + Depayloader.H264.depayload(depayloader, @packet) + end + test "returns error if no depayloader exists for given codec" do assert {:error, :no_depayloader_for_codec} = - %RTPCodecParameters{payload_type: 97, mime_type: "video/H264", clock_rate: 90_000} + %RTPCodecParameters{payload_type: 45, mime_type: "video/AV1", clock_rate: 90_000} |> Depayloader.new() end end diff --git a/test/ex_webrtc/rtp/h264/depayloader_test.exs b/test/ex_webrtc/rtp/h264/depayloader_test.exs new file mode 100644 index 00000000..adccc361 --- /dev/null +++ b/test/ex_webrtc/rtp/h264/depayloader_test.exs @@ -0,0 +1,170 @@ +defmodule ExWebRTC.RTP.H264.DepayloaderTest do + use ExUnit.Case, async: true + + alias ExWebRTC.RTP.Depayloader + + test "valid Single NAL Unit" do + payload_single = <<53, 131>> + payload_single_out = <<0, 0, 0, 1, 131>> + + depayloader = Depayloader.H264.new() + packet = ExRTP.Packet.new(payload_single, timestamp: 123) + + assert {^payload_single_out, %{current_timestamp: 123}} = + Depayloader.H264.depayload(depayloader, packet) + end + + test "valid STAP-A NAL" do + payload_stapa = <<56, 0, 1, 128, 0, 1, 129>> + payload_stapa_out = <<0, 0, 0, 1, 128, 0, 0, 0, 1, 129>> + + depayloader = Depayloader.H264.new() + packet = ExRTP.Packet.new(payload_stapa, timestamp: 123) + + assert {^payload_stapa_out, %{current_timestamp: 123}} = + Depayloader.H264.depayload(depayloader, packet) + end + + test "valid FU-A NAL" do + payload_fuas = <<60, 133, 128>> + payload_fua = <<60, 5, 129>> + payload_fuae = <<60, 69, 130>> + payload_fua_out = <<0, 0, 0, 1, 37, 128, 129, 130>> + + depayloader = Depayloader.H264.new() + + packet1 = ExRTP.Packet.new(payload_fuas, timestamp: 10) + packet2 = ExRTP.Packet.new(payload_fua, timestamp: 10) + packet3 = ExRTP.Packet.new(payload_fuae, timestamp: 10) + + {bin, depayloader} = Depayloader.H264.depayload(depayloader, packet1) + + assert {nil, %{current_timestamp: 10, fu_parser_acc: [<<128>>]}} = + {bin, depayloader} + + {bin, depayloader} = Depayloader.H264.depayload(depayloader, packet2) + + assert {nil, %{current_timestamp: 10, fu_parser_acc: [<<129>>, <<128>>]}} = + {bin, depayloader} + + assert {^payload_fua_out, %{current_timestamp: 10, fu_parser_acc: []}} = + Depayloader.H264.depayload(depayloader, packet3) + end + + test "colliding timestamps in one FU-A" do + payload_fuas = <<60, 133, 128>> + payload_fua = <<60, 5, 129>> + + depayloader = Depayloader.H264.new() + + packet1 = ExRTP.Packet.new(payload_fuas, timestamp: 10) + packet2 = ExRTP.Packet.new(payload_fua, timestamp: 11) + + {bin, depayloader} = Depayloader.H264.depayload(depayloader, packet1) + + assert {nil, %{current_timestamp: 10, fu_parser_acc: [<<128>>]}} = + {bin, depayloader} + + {bin, depayloader} = Depayloader.H264.depayload(depayloader, packet2) + + assert {nil, %{current_timestamp: nil, fu_parser_acc: []}} = + {bin, depayloader} + end + + test "starting new without ending previous FU-A" do + payload_fuas1 = <<60, 133, 128>> + payload_fuas2 = <<60, 133, 129>> + + depayloader = Depayloader.H264.new() + + packet1 = ExRTP.Packet.new(payload_fuas1, timestamp: 10) + packet2 = ExRTP.Packet.new(payload_fuas2, timestamp: 10) + + {bin, depayloader} = Depayloader.H264.depayload(depayloader, packet1) + + assert {nil, %{current_timestamp: 10, fu_parser_acc: [<<128>>]}} = + {bin, depayloader} + + {bin, depayloader} = Depayloader.H264.depayload(depayloader, packet2) + + assert {nil, %{current_timestamp: nil, fu_parser_acc: []}} = + {bin, depayloader} + end + + test "non-start packet without starting FU-A beforehand" do + payload_fua = <<60, 5, 128>> + + depayloader = Depayloader.H264.new() + + packet = ExRTP.Packet.new(payload_fua, timestamp: 10) + + assert {nil, %{current_timestamp: nil, fu_parser_acc: []}} = + Depayloader.H264.depayload(depayloader, packet) + end + + test "non-fragmented FU-A (start and end bits set to 1)" do + payload_fua = <<60, 197, 129>> + + depayloader = Depayloader.H264.new() + + packet = ExRTP.Packet.new(payload_fua, timestamp: 10) + + assert {nil, %{current_timestamp: nil, fu_parser_acc: []}} = + Depayloader.H264.depayload(depayloader, packet) + end + + test "all unsupported NAL types" do + # reserved (22, 23, 30, 31) and unsupported NALu types (STAP-B: 25, MTAP-16: 26, MTAP-24: 27, FU-B: 29) + payloads_nalu_unsupported = [ + <<54, 131>>, + <<55, 131>>, + <<62, 131>>, + <<63, 131>>, + <<57, 131>>, + <<58, 131>>, + <<59, 131>>, + <<61, 131>> + ] + + depayloader = Depayloader.H264.new() + + Enum.map(payloads_nalu_unsupported, fn payload -> + packet = ExRTP.Packet.new(payload, timestamp: 123) + + assert {nil, %{current_timestamp: nil, fu_parser_acc: []}} = + Depayloader.H264.depayload(depayloader, packet) + end) + end + + test "drop RTP padding packets" do + payload_empty = <<>> + + depayloader = Depayloader.H264.new() + packet = ExRTP.Packet.new(payload_empty, padding: true, timestamp: 123) + + assert {nil, %{current_timestamp: nil, fu_parser_acc: []}} = + Depayloader.H264.depayload(depayloader, packet) + end + + test "drop malformed NAL" do + # forbidden zero bit set to 1 + payload_invalid = <<181, 0>> + + depayloader = Depayloader.H264.new() + packet = ExRTP.Packet.new(payload_invalid, timestamp: 123) + + assert {nil, %{current_timestamp: nil, fu_parser_acc: []}} = + Depayloader.H264.depayload(depayloader, packet) + end + + test "drop malformed STAP-A" do + # malformed STAP-A payload. First NAL should be 1-byte long, but is 2-bytes long + payload_invalid = <<56, 0, 1, 128, 12, 0, 1, 129>> + + depayloader = Depayloader.H264.new() + packet = ExRTP.Packet.new(payload_invalid, timestamp: 123) + + assert {nil, %{current_timestamp: nil, fu_parser_acc: []}} = + Depayloader.H264.depayload(depayloader, packet) + end +end