diff --git a/lib/mail/encoders/quoted_printable.ex b/lib/mail/encoders/quoted_printable.ex index 1e5863e..051e8fc 100644 --- a/lib/mail/encoders/quoted_printable.ex +++ b/lib/mail/encoders/quoted_printable.ex @@ -19,7 +19,7 @@ defmodule Mail.Encoders.QuotedPrintable do "fa=C3=A7ade" """ @spec encode(binary) :: binary - @spec encode(binary, integer, list, non_neg_integer) :: binary + @spec encode(binary, integer, binary, non_neg_integer) :: binary def encode(string, max_length \\ @max_length, acc \\ <<>>, line_length \\ 0) def encode(<<>>, _, acc, _), do: acc @@ -72,7 +72,7 @@ defmodule Mail.Encoders.QuotedPrintable do ## Examples - Mail.QuotedPrintable.decode("fa=C3=A7ade") + Mail.Encoders.QuotedPrintable.decode("fa=C3=A7ade") "façade" """ @spec decode(binary) :: binary diff --git a/lib/mail/renderers/rfc_2822.ex b/lib/mail/renderers/rfc_2822.ex index 1dcc053..2c252fe 100644 --- a/lib/mail/renderers/rfc_2822.ex +++ b/lib/mail/renderers/rfc_2822.ex @@ -134,9 +134,14 @@ defmodule Mail.Renderers.RFC2822 do |> Enum.join(" ") end - defp render_header_value(_key, [value | subtypes]), - do: - Enum.join([encode_header_value(value, :quoted_printable) | render_subtypes(subtypes)], "; ") + defp render_header_value(key, [value | subtypes]) do + encoded_header_value = + value + |> encode_header_value(key) + |> fold_header_value(key) + + Enum.join([encoded_header_value | render_subtypes(subtypes)], "; ") + end defp render_header_value(key, value), do: render_header_value(key, List.wrap(value)) @@ -155,7 +160,7 @@ defmodule Mail.Renderers.RFC2822 do end defp render_address({name, email}), - do: "#{encode_header_value(~s("#{name}"), :quoted_printable)} <#{validate_address(email)}>" + do: "#{encode_header_value(~s("#{name}"))} <#{validate_address(email)}>" defp render_address(email), do: validate_address(email) @@ -170,7 +175,7 @@ defmodule Mail.Renderers.RFC2822 do defp render_subtypes([{key, value} | subtypes]) do key = String.replace(key, "_", "-") - value = encode_header_value(value, :quoted_printable) + value = encode_header_value(value) value = if value =~ ~r/[\s()<>@,;:\\<\/\[\]?=]/ do @@ -204,20 +209,82 @@ defmodule Mail.Renderers.RFC2822 do |> Enum.join("\r\n") end - # As stated at https://datatracker.ietf.org/doc/html/rfc2047#section-2, encoded words must be - # split in 76 chars including its surroundings and delimmiters. - # Since enclosing starts with =?UTF-8?Q? and ends with ?=, max length should be 64 - defp encode_header_value(header_value, :quoted_printable) do - case Mail.Encoders.QuotedPrintable.encode(header_value, 64) do - ^header_value -> header_value - encoded -> wrap_encoded_words(encoded) + defp encode_header_value(header_value, header \\ "") do + if ascii_string?(header_value) do + header_value + else + # From RFC2047 §2 https://datatracker.ietf.org/doc/html/rfc2047#section-2 + # An 'encoded-word' may not be more than 75 characters long, including + # 'charset', 'encoding', 'encoded-text', and delimiters. If it is + # desirable to encode more text than will fit in an 'encoded-word' of + # 75 characters, multiple 'encoded-word's (separated by CRLF SPACE) may + # be used. + + # From RFC2047 §5 https://datatracker.ietf.org/doc/html/rfc2047#section-5 + # ... an 'encoded-word' that appears in a + # header field defined as '*text' MUST be separated from any adjacent + # 'encoded-word' or 'text' by 'linear-white-space'. + + header_value + |> Mail.Encoders.QuotedPrintable.encode( + # 75 is maximum length, subtract wrapping, add trailing "=" we strip out + 75 - byte_size("=?UTF-8?Q?") - byte_size("?=") + byte_size("="), + <<>>, + byte_size(header) + byte_size(": ") + ) + |> :binary.split("=\r\n", [:global]) + |> Enum.map(fn chunk -> + # SPACE must be encoded as "_" and then everything wrapped + # to indicate an 'encoded-word' + chunk = String.replace(chunk, " ", "_") + <<"=?UTF-8?Q?", chunk::binary, "?=">> + end) + |> Enum.join(" ") end end - defp wrap_encoded_words(value) do - :binary.split(value, "=\r\n", [:global]) - |> Enum.map(fn chunk -> <<"=?UTF-8?Q?", chunk::binary, "?=">> end) - |> Enum.join() + # Returns `true` if string only contains 7-bit characters or is empty + defp ascii_string?(value) when is_binary(value), do: is_nil(Regex.run(~r/[^\x00-\x7F]+/, value)) + + defp fold_header_value(header_value, header) do + # This _should_ handle most cases of header folding, but the RFC mentions for + # structured headers that contain email addresses, that folding should occur + # after commas (so avoiding folding in the middle of the name/email-address pair, + # even if there's foldable spaces there). As such, this is currently not + # used on fields that are known to have that structure. + + # desired header line limit is 78 characters + limit = 78 + + # Split on SPACE or HTAB but only if followed by non-whitespace, so each + # subsequent part starts with a whitespace we can potentially fold on. + # Trailing whitespace removed to prevent case where final line is only whitespace. + [first_part | remaining_parts] = + header_value + |> String.trim_trailing() + |> then(&Regex.split(~r/[ \t]+[^ \t]+/, &1, include_captures: true, trim: true)) + + {lines, current, _prefix_length} = + Enum.reduce( + remaining_parts, + {[], first_part, byte_size(header) + byte_size(": ")}, + fn part, {lines, current, prefix_length} -> + if prefix_length + byte_size(current) + byte_size(part) <= limit do + {lines, current <> part, prefix_length} + else + # Adding chunks together are too long, so put `current` part into `lines` + # and `part` in the accumulator for the next iteration. + # Note: also includes case where `current` is too long on its own (because + # it can't be divided) + {[current | lines], part, 0} + end + end + ) + + # add final line and then join with CRLF + [current | lines] + |> Enum.reverse() + |> Enum.join("\r\n") end @doc """ diff --git a/test/mail/message_test.exs b/test/mail/message_test.exs index 3cf9344..7840d94 100644 --- a/test/mail/message_test.exs +++ b/test/mail/message_test.exs @@ -219,7 +219,7 @@ defmodule Mail.MessageTest do |> Mail.put_subject(subject) |> Mail.render() - encoded_subject = "=?UTF-8?Q?" <> Mail.Encoders.QuotedPrintable.encode(subject) <> "?=" + encoded_subject = "=?UTF-8?Q?" <> String.replace(Mail.Encoders.QuotedPrintable.encode(subject), " ", "_") <> "?=" assert String.contains?(txt, encoded_subject) assert %Mail.Message{headers: %{"subject" => ^subject}} = Mail.Parsers.RFC2822.parse(txt) @@ -245,10 +245,10 @@ defmodule Mail.MessageTest do |> Mail.render() encoded_from = - ~s(From: =?UTF-8?Q?"#{Mail.Encoders.QuotedPrintable.encode(elem(from, 0))}"?= <#{elem(from, 1)}>) + ~s(From: =?UTF-8?Q?"#{elem(from, 0) |> Mail.Encoders.QuotedPrintable.encode() |> String.replace(" ", "_")}"?= <#{elem(from, 1)}>) encoded_to = - ~s(To: =?UTF-8?Q?"#{Mail.Encoders.QuotedPrintable.encode(elem(to, 0))}"?= <#{elem(to, 1)}>) + ~s(To: =?UTF-8?Q?"#{elem(to, 0) |> Mail.Encoders.QuotedPrintable.encode() |> String.replace(" ", "_")}"?= <#{elem(to, 1)}>) assert txt =~ encoded_from assert txt =~ encoded_to @@ -273,18 +273,119 @@ defmodule Mail.MessageTest do end test "long UTF-8 in subject" do + # begin value with simple ASCII so each character is encoded into a single character subject = - "über alles\nnew ?= line some очень-очень-очень-очень-очень-очень-очень-очень-очень-очень-очень-очень long line" + "123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890 über alles\nnew ?= line some очень-очень-очень-очень-очень-очень-очень-очень-очень-очень-очень-очень long line" txt = Mail.build() |> Mail.put_subject(subject) |> Mail.render() + # Each encoded word has a maximum length of 75 characters, minus the wrapping + # 12 characters for the wrapping, that leaves 63 characters internally. The + # first line's header is 7 characters plus 1 space and 1 colon, so that leaves + # 54 internal charcaters within the encoded word encoded_subject = - "=?UTF-8?Q?=C3=BCber alles=0Anew =3F=3D line some =D0=BE=D1=87=D0=B5=D0=BD?==?UTF-8?Q?=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD?==?UTF-8?Q?=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD?==?UTF-8?Q?=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD?==?UTF-8?Q?=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD?==?UTF-8?Q?=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD?==?UTF-8?Q?=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD=D1=8C long line?=" + """ + Subject: =?UTF-8?Q?123456789012345678901234567890123456789012345678901234?= + =?UTF-8?Q?567890123456789012345678901234567890123456789012345678901234567?= + =?UTF-8?Q?890_=C3=BCber_alles=0Anew_=3F=3D_line_some_=D0=BE=D1=87=D0=B5?= + =?UTF-8?Q?=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5?= + =?UTF-8?Q?=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5?= + =?UTF-8?Q?=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5?= + =?UTF-8?Q?=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5?= + =?UTF-8?Q?=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5?= + =?UTF-8?Q?=D0=BD=D1=8C-=D0=BE=D1=87=D0=B5=D0=BD=D1=8C_long_line?= + """ + |> String.replace("\n", "\r\n") assert String.contains?(txt, encoded_subject) assert %Mail.Message{headers: %{"subject" => ^subject}} = Mail.Parsers.RFC2822.parse(txt) end + + test "UTF-8 in header with extremely long name" do + header_name = "x-this-is-a-ridiculously-long-header-value-that-should-never-happen-in-practice" + value = "123太长了" + + txt = + Mail.build() + |> Mail.Message.put_header(header_name, value) + |> Mail.render() + + # Header is too long, so initial value is an empty encoded word of "=?UTF-8?Q??=". + encoded_header = + """ + X-This-Is-A-Ridiculously-Long-Header-Value-That-Should-Never-Happen-In-Practice: =?UTF-8?Q??= + =?UTF-8?Q?123=E5=A4=AA=E9=95=BF=E4=BA=86?= + """ + |> String.replace("\n", "\r\n") + + assert String.contains?(txt, encoded_header) + assert %Mail.Message{headers: %{^header_name => ^value}} = Mail.Parsers.RFC2822.parse(txt) + end + + test "simple ASCII in subject that's folded" do + subject = + "Here's some regular text that contains enough characters that the header should be wrapped." + + txt = + Mail.build() + |> Mail.put_subject(subject) + |> Mail.render() + + # Each line has a maximum desired length of 78 characters (excluding trailing CRLF). + encoded_subject = + """ + Subject: Here's some regular text that contains enough characters that the + header should be wrapped. + """ + |> String.replace("\n", "\r\n") + + assert String.contains?(txt, encoded_subject) + assert %Mail.Message{headers: %{"subject" => ^subject}} = Mail.Parsers.RFC2822.parse(txt) + end + + test "long ASCII chunk in subject" do + subject = + "12345678901234567890123456789012345678901234567890123456789012345678901234567890 1234567890123456789012345678901234567890 Here's some regular text that continue to yet another line" + + txt = + Mail.build() + |> Mail.put_subject(subject) + |> Mail.render() + + # Each line has a maximum desired length of 78 characters (excluding trailing CRLF). + encoded_subject = + """ + Subject: 12345678901234567890123456789012345678901234567890123456789012345678901234567890 + 1234567890123456789012345678901234567890 Here's some regular text that + continue to yet another line + """ + |> String.replace("\n", "\r\n") + + assert String.contains?(txt, encoded_subject) + assert %Mail.Message{headers: %{"subject" => ^subject}} = Mail.Parsers.RFC2822.parse(txt) + end + + test "ASCII in header with extremely long name" do + header_name = "x-this-is-a-ridiculously-long-header-value-that-should-never-happen-in-practice" + value = "123 too long" + + txt = + Mail.build() + |> Mail.Message.put_header(header_name, value) + |> Mail.render() + + # header is too long, so header is folded on the next available foldable whitespace + encoded_header = + """ + X-This-Is-A-Ridiculously-Long-Header-Value-That-Should-Never-Happen-In-Practice: 123 + too long + """ + |> String.replace("\n", "\r\n") + + assert String.contains?(txt, encoded_header) + assert %Mail.Message{headers: %{^header_name => ^value}} = Mail.Parsers.RFC2822.parse(txt) + end end