From af6f48999aeb7d8651a336b2825e4eaddecc3c7a Mon Sep 17 00:00:00 2001 From: Aaron Janse Date: Sun, 30 Jun 2019 18:33:43 -0700 Subject: [PATCH 1/8] implement fancier char escaping --- README.md | 10 ++-- src/compiler/Error.elm | 5 ++ src/compiler/Stage/Parse/Parser.elm | 73 ++++++++++++++++++++++++----- tests/ParserTest.elm | 30 ++++++++++++ 4 files changed, 101 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 5e44845f..c8ebe777 100644 --- a/README.md +++ b/README.md @@ -72,8 +72,8 @@ Oh God please yes! :heart: Feel free to look around the [help wanted] | ----------------- | -------------------- | -------------------- | ------------------ | ------------------ | -------------------- | ------------------ | ------------------ | | integers | :heavy_check_mark: | :warning: [[1]](#f1) | :heavy_check_mark: | :heavy_check_mark: | :warning: [[2]](#f2) | :heavy_check_mark: | :heavy_check_mark: | | floats | :x: [[3]](#f3) | :x: [[4]](#f4) | :x: [[3]](#f3) | :x: [[3]](#f3) | :x: [[5]](#f5) | :x: [[3]](#f3) | :x: [[3]](#f3) | -| characters | :warning: [[6]](#f6) | :warning: [[7]](#f7) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | -| strings | :warning: [[8]](#f8) | :warning: [[9]](#f9) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| characters | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| strings | :warning: [[6]](#f6) | :warning: [[7]](#f7) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | booleans | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | variables | :warning: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :warning: | :heavy_check_mark: | :heavy_check_mark: | | lists | :x: | :x: | :x: | :x: | :x: | :x: | :x: | @@ -99,10 +99,8 @@ Oh God please yes! :heart: Feel free to look around the [help wanted] 3. Not implemented; tracked in [#17](https://github.com/elm-in-elm/compiler/issues/17) 4. Not implemented; not tracked yet 5. To be optimized the same way Ints are; not tracked yet -6. Comprehensive tests missing; will be fixed in [#15](https://github.com/elm-in-elm/compiler/pull/15) -7. Escape sequences not implemented; not tracked yet -8. Comprehensive tests missing; not tracked yet -9. Multiline strings (and maybe more) missing; not tracked yet +6. Comprehensive tests missing; not tracked yet +7. Multiline strings (and maybe more) missing; not tracked yet ## Prerequisites diff --git a/src/compiler/Error.elm b/src/compiler/Error.elm index ef177f8e..30c90d1b 100644 --- a/src/compiler/Error.elm +++ b/src/compiler/Error.elm @@ -81,6 +81,11 @@ type ParseProblem | ExpectingHexadecimals | ExpectingSingleQuote | ExpectingChar + | ExpectingEscapeBackslash + | ExpectingEscapeCharacter + | ExpectingUnicodeEscapeLeftBrace + | ExpectingUnicodeEscapeRightBrace + | InvalidUnicodeCodePoint | ExpectingDoubleQuote | ExpectingPlusOperator | ExpectingModuleDot -- `import Foo>. P.inContext InLiteralInt -{-| TODO escapes -TODO Unicode escapes --} +-- for literalChar and, in the future, literalString +stringHelp = + P.oneOf + [ P.succeed (identity) + |. P.token (P.Token "\\" ExpectingEscapeBackslash) + |= P.oneOf + [ P.map (\_ -> '\"') (P.token (P.Token "\"" ExpectingEscapeCharacter)) + , P.map (\_ -> '\'') (P.token (P.Token "'" ExpectingEscapeCharacter)) + , P.map (\_ -> '\n') (P.token (P.Token "n" ExpectingEscapeCharacter)) + , P.map (\_ -> '\t') (P.token (P.Token "t" ExpectingEscapeCharacter)) + , P.map (\_ -> '\r') (P.token (P.Token "r" ExpectingEscapeCharacter)) + , P.succeed identity + |. P.token (P.Token "u" ExpectingEscapeCharacter) + |. P.token (P.Token "{" ExpectingUnicodeEscapeLeftBrace) + |= unicode + |. P.token (P.Token "}" ExpectingUnicodeEscapeRightBrace) + ] + , P.succeed identity + |= P.getChompedString (P.chompIf (always True) ExpectingChar) + |> P.andThen (\string -> + string + |> String.uncons + |> Maybe.map (Tuple.first >> P.succeed) + |> Maybe.withDefault (P.problem (CompilerBug "Multiple characters chomped in `literalChar`")) + ) + ] + + literalChar : Parser_ Literal literalChar = (P.succeed identity |. P.symbol (P.Token "'" ExpectingSingleQuote) - |= P.getChompedString (P.chompIf (always True) ExpectingChar) + |= stringHelp |. P.symbol (P.Token "'" ExpectingSingleQuote) ) - |> P.andThen - (\string -> - string - |> String.uncons - |> Maybe.map (Tuple.first >> Char >> P.succeed) - |> Maybe.withDefault (P.problem (CompilerBug "Multiple characters chomped in `literalChar`")) - ) + |> P.map (\n -> Char n) + +unicode : Parser_ Char +unicode = + P.getChompedString (P.chompWhile Char.isHexDigit) + |> P.andThen codeToChar + + +codeToChar : String -> Parser_ Char +codeToChar str = + let + length = String.length str + code = String.foldl addHex 0 str + in + if length < 4 || length > 6 then + P.problem InvalidUnicodeCodePoint + else if 0 <= code && code <= 0x10FFFF then + P.succeed (Char.fromCode code) + else + P.problem InvalidUnicodeCodePoint + + +addHex : Char -> Int -> Int +addHex char total = + let + code = Char.toCode char + in + if 0x30 <= code && code <= 0x39 then + 16 * total + (code - 0x30) + else if 0x41 <= code && code <= 0x46 then + 16 * total + (10 + code - 0x41) + else + 16 * total + (10 + code - 0x61) {-| TODO escapes diff --git a/tests/ParserTest.elm b/tests/ParserTest.elm index cba5c4ea..64aaf13b 100644 --- a/tests/ParserTest.elm +++ b/tests/ParserTest.elm @@ -551,6 +551,36 @@ expr = , "'A'" , Ok (Literal (Char 'A')) ) + -- https://github.com/elm/compiler/blob/dcbe51fa22879f83b5d94642e117440cb5249bb1/compiler/src/Parse/String.hs#L279-L285 + , ( "escape n" + , "'\\n'" + , Ok (Literal (Char '\n')) + ) + , ( "escape r" + , "'\\r'" + , Ok (Literal (Char '\r')) + ) + , ( "escape t" + , "'\\t'" + , Ok (Literal (Char '\t')) + ) + , ( "double quote" + , "'\\\"'" + , Ok (Literal (Char '"')) -- " + ) -- ^ workaround for official elm + -- vscode syntax highlighter + , ( "single quote" + , "'\\\''" + , Ok (Literal (Char '\'')) + ) + , ( "emoji" + , "'😃'" + , Ok (Literal (Char '😃')) + ) + , ( "escaped unicode code point" + , "'\\u{1F648}'" + , Ok (Literal (Char '🙈')) + ) ] ) , ( "literal string" From 458a99941cec8ddbe505f2d63861ca9e0f30c054 Mon Sep 17 00:00:00 2001 From: Aaron Janse Date: Sun, 30 Jun 2019 18:52:44 -0700 Subject: [PATCH 2/8] add aaronjanse to README --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index c8ebe777..a62775cd 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,12 @@ Runs `elm-test` on the test suite (gasp!)
Maxime Dantec + + +
+ Aaron Janse + From 657fa28321055d477fe637fcf45d8212664cefc6 Mon Sep 17 00:00:00 2001 From: Aaron Janse Date: Wed, 3 Jul 2019 21:01:20 -0700 Subject: [PATCH 3/8] remove unnecessary parenthesis --- src/compiler/Stage/Parse/Parser.elm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/Stage/Parse/Parser.elm b/src/compiler/Stage/Parse/Parser.elm index 9bca48e9..da146404 100644 --- a/src/compiler/Stage/Parse/Parser.elm +++ b/src/compiler/Stage/Parse/Parser.elm @@ -413,7 +413,7 @@ literalInt = -- for literalChar and, in the future, literalString stringHelp = P.oneOf - [ P.succeed (identity) + [ P.succeed identity |. P.token (P.Token "\\" ExpectingEscapeBackslash) |= P.oneOf [ P.map (\_ -> '\"') (P.token (P.Token "\"" ExpectingEscapeCharacter)) From 483bf8a270f7a5bb677bb0e09091de2f58655e78 Mon Sep 17 00:00:00 2001 From: Aaron Janse Date: Wed, 3 Jul 2019 21:03:15 -0700 Subject: [PATCH 4/8] rename `stringHelp` to `character` --- src/compiler/Stage/Parse/Parser.elm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/compiler/Stage/Parse/Parser.elm b/src/compiler/Stage/Parse/Parser.elm index da146404..91297060 100644 --- a/src/compiler/Stage/Parse/Parser.elm +++ b/src/compiler/Stage/Parse/Parser.elm @@ -411,7 +411,7 @@ literalInt = -- for literalChar and, in the future, literalString -stringHelp = +character = P.oneOf [ P.succeed identity |. P.token (P.Token "\\" ExpectingEscapeBackslash) @@ -433,7 +433,7 @@ stringHelp = string |> String.uncons |> Maybe.map (Tuple.first >> P.succeed) - |> Maybe.withDefault (P.problem (CompilerBug "Multiple characters chomped in `literalChar`")) + |> Maybe.withDefault (P.problem (CompilerBug "Multiple characters chomped in `character`")) ) ] @@ -442,7 +442,7 @@ literalChar : Parser_ Literal literalChar = (P.succeed identity |. P.symbol (P.Token "'" ExpectingSingleQuote) - |= stringHelp + |= character |. P.symbol (P.Token "'" ExpectingSingleQuote) ) |> P.map (\n -> Char n) From b7a3cf40afbfd735403bd6129e49426ac531887e Mon Sep 17 00:00:00 2001 From: Aaron Janse Date: Wed, 3 Jul 2019 21:05:06 -0700 Subject: [PATCH 5/8] remove unnecessary lambda --- src/compiler/Stage/Parse/Parser.elm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/Stage/Parse/Parser.elm b/src/compiler/Stage/Parse/Parser.elm index 91297060..b55c2c3a 100644 --- a/src/compiler/Stage/Parse/Parser.elm +++ b/src/compiler/Stage/Parse/Parser.elm @@ -445,7 +445,7 @@ literalChar = |= character |. P.symbol (P.Token "'" ExpectingSingleQuote) ) - |> P.map (\n -> Char n) + |> P.map Char unicode : Parser_ Char unicode = From 09fa46c23d95826016cbe894793732729001da4b Mon Sep 17 00:00:00 2001 From: Aaron Janse Date: Wed, 3 Jul 2019 21:30:01 -0700 Subject: [PATCH 6/8] use Hex.fromString --- src/compiler/Stage/Parse/Parser.elm | 42 ++++++++++------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/src/compiler/Stage/Parse/Parser.elm b/src/compiler/Stage/Parse/Parser.elm index 0cb252dc..3bd77bdc 100644 --- a/src/compiler/Stage/Parse/Parser.elm +++ b/src/compiler/Stage/Parse/Parser.elm @@ -468,34 +468,20 @@ literalChar = unicode : Parser_ Char unicode = P.getChompedString (P.chompWhile Char.isHexDigit) - |> P.andThen codeToChar - - -codeToChar : String -> Parser_ Char -codeToChar str = - let - length = String.length str - code = String.foldl addHex 0 str - in - if length < 4 || length > 6 then - P.problem InvalidUnicodeCodePoint - else if 0 <= code && code <= 0x10FFFF then - P.succeed (Char.fromCode code) - else - P.problem InvalidUnicodeCodePoint - - -addHex : Char -> Int -> Int -addHex char total = - let - code = Char.toCode char - in - if 0x30 <= code && code <= 0x39 then - 16 * total + (code - 0x30) - else if 0x41 <= code && code <= 0x46 then - 16 * total + (10 + code - 0x41) - else - 16 * total + (10 + code - 0x61) + |> P.andThen (\str -> + let + len = String.length str + in + if len < 4 || len > 6 then + P.problem InvalidUnicodeCodePoint + else + str + |> String.toLower + |> Hex.fromString + |> Result.map Char.fromCode + |> Result.map P.succeed + |> Result.withDefault (P.problem InvalidUnicodeCodePoint) + ) {-| TODO escapes From 48e9ae414278004aebef425ad11970aec35bdc6b Mon Sep 17 00:00:00 2001 From: Aaron Janse Date: Wed, 3 Jul 2019 21:33:11 -0700 Subject: [PATCH 7/8] make elm-format happy --- src/compiler/Stage/Parse/Parser.elm | 78 ++++++++++++++++------------- tests/ParserTest.elm | 13 ++--- 2 files changed, 50 insertions(+), 41 deletions(-) diff --git a/src/compiler/Stage/Parse/Parser.elm b/src/compiler/Stage/Parse/Parser.elm index 3bd77bdc..dafac11b 100644 --- a/src/compiler/Stage/Parse/Parser.elm +++ b/src/compiler/Stage/Parse/Parser.elm @@ -428,32 +428,36 @@ literalInt = |> P.inContext InLiteralInt + -- for literalChar and, in the future, literalString -character = + + +character = P.oneOf - [ P.succeed identity - |. P.token (P.Token "\\" ExpectingEscapeBackslash) - |= P.oneOf - [ P.map (\_ -> '\"') (P.token (P.Token "\"" ExpectingEscapeCharacter)) - , P.map (\_ -> '\'') (P.token (P.Token "'" ExpectingEscapeCharacter)) - , P.map (\_ -> '\n') (P.token (P.Token "n" ExpectingEscapeCharacter)) - , P.map (\_ -> '\t') (P.token (P.Token "t" ExpectingEscapeCharacter)) - , P.map (\_ -> '\r') (P.token (P.Token "r" ExpectingEscapeCharacter)) - , P.succeed identity - |. P.token (P.Token "u" ExpectingEscapeCharacter) - |. P.token (P.Token "{" ExpectingUnicodeEscapeLeftBrace) - |= unicode - |. P.token (P.Token "}" ExpectingUnicodeEscapeRightBrace) - ] - , P.succeed identity - |= P.getChompedString (P.chompIf (always True) ExpectingChar) - |> P.andThen (\string -> + [ P.succeed identity + |. P.token (P.Token "\\" ExpectingEscapeBackslash) + |= P.oneOf + [ P.map (\_ -> '"') (P.token (P.Token "\"" ExpectingEscapeCharacter)) + , P.map (\_ -> '\'') (P.token (P.Token "'" ExpectingEscapeCharacter)) + , P.map (\_ -> '\n') (P.token (P.Token "n" ExpectingEscapeCharacter)) + , P.map (\_ -> '\t') (P.token (P.Token "t" ExpectingEscapeCharacter)) + , P.map (\_ -> '\u{000D}') (P.token (P.Token "r" ExpectingEscapeCharacter)) + , P.succeed identity + |. P.token (P.Token "u" ExpectingEscapeCharacter) + |. P.token (P.Token "{" ExpectingUnicodeEscapeLeftBrace) + |= unicode + |. P.token (P.Token "}" ExpectingUnicodeEscapeRightBrace) + ] + , P.succeed identity + |= P.getChompedString (P.chompIf (always True) ExpectingChar) + |> P.andThen + (\string -> string |> String.uncons |> Maybe.map (Tuple.first >> P.succeed) |> Maybe.withDefault (P.problem (CompilerBug "Multiple characters chomped in `character`")) ) - ] + ] literalChar : Parser_ Literal @@ -463,25 +467,29 @@ literalChar = |= character |. P.symbol (P.Token "'" ExpectingSingleQuote) ) - |> P.map Char + |> P.map Char + unicode : Parser_ Char unicode = - P.getChompedString (P.chompWhile Char.isHexDigit) - |> P.andThen (\str -> - let - len = String.length str - in - if len < 4 || len > 6 then - P.problem InvalidUnicodeCodePoint - else - str - |> String.toLower - |> Hex.fromString - |> Result.map Char.fromCode - |> Result.map P.succeed - |> Result.withDefault (P.problem InvalidUnicodeCodePoint) - ) + P.getChompedString (P.chompWhile Char.isHexDigit) + |> P.andThen + (\str -> + let + len = + String.length str + in + if len < 4 || len > 6 then + P.problem InvalidUnicodeCodePoint + + else + str + |> String.toLower + |> Hex.fromString + |> Result.map Char.fromCode + |> Result.map P.succeed + |> Result.withDefault (P.problem InvalidUnicodeCodePoint) + ) {-| TODO escapes diff --git a/tests/ParserTest.elm b/tests/ParserTest.elm index ac55023a..efd1928a 100644 --- a/tests/ParserTest.elm +++ b/tests/ParserTest.elm @@ -555,14 +555,15 @@ expr = , "'A'" , Ok (Literal (Char 'A')) ) - -- https://github.com/elm/compiler/blob/dcbe51fa22879f83b5d94642e117440cb5249bb1/compiler/src/Parse/String.hs#L279-L285 + + -- https://github.com/elm/compiler/blob/dcbe51fa22879f83b5d94642e117440cb5249bb1/compiler/src/Parse/String.hs#L279-L285 , ( "escape n" , "'\\n'" , Ok (Literal (Char '\n')) ) , ( "escape r" , "'\\r'" - , Ok (Literal (Char '\r')) + , Ok (Literal (Char '\u{000D}')) ) , ( "escape t" , "'\\t'" @@ -570,11 +571,11 @@ expr = ) , ( "double quote" , "'\\\"'" - , Ok (Literal (Char '"')) -- " - ) -- ^ workaround for official elm - -- vscode syntax highlighter + , Ok (Literal (Char '"')) + -- " (for vscode-elm bug) + ) , ( "single quote" - , "'\\\''" + , "'\\''" , Ok (Literal (Char '\'')) ) , ( "emoji" From 7c495f7661bc8a5641eb8b9a962872d81dce7f05 Mon Sep 17 00:00:00 2001 From: Aaron Janse Date: Thu, 4 Jul 2019 11:01:41 -0700 Subject: [PATCH 8/8] rework ExpectingEscapeCharacter --- src/compiler/Error.elm | 2 +- src/compiler/Stage/Parse/Parser.elm | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/compiler/Error.elm b/src/compiler/Error.elm index be4d0a42..49b73035 100644 --- a/src/compiler/Error.elm +++ b/src/compiler/Error.elm @@ -81,7 +81,7 @@ type ParseProblem | ExpectingSingleQuote | ExpectingChar | ExpectingEscapeBackslash - | ExpectingEscapeCharacter + | ExpectingEscapeCharacter Char | ExpectingUnicodeEscapeLeftBrace | ExpectingUnicodeEscapeRightBrace | InvalidUnicodeCodePoint diff --git a/src/compiler/Stage/Parse/Parser.elm b/src/compiler/Stage/Parse/Parser.elm index dafac11b..ed8b218f 100644 --- a/src/compiler/Stage/Parse/Parser.elm +++ b/src/compiler/Stage/Parse/Parser.elm @@ -437,13 +437,13 @@ character = [ P.succeed identity |. P.token (P.Token "\\" ExpectingEscapeBackslash) |= P.oneOf - [ P.map (\_ -> '"') (P.token (P.Token "\"" ExpectingEscapeCharacter)) - , P.map (\_ -> '\'') (P.token (P.Token "'" ExpectingEscapeCharacter)) - , P.map (\_ -> '\n') (P.token (P.Token "n" ExpectingEscapeCharacter)) - , P.map (\_ -> '\t') (P.token (P.Token "t" ExpectingEscapeCharacter)) - , P.map (\_ -> '\u{000D}') (P.token (P.Token "r" ExpectingEscapeCharacter)) + [ P.map (\_ -> '"') (P.token (P.Token "\"" (ExpectingEscapeCharacter '"'))) -- " (elm-vscode workaround) + , P.map (\_ -> '\'') (P.token (P.Token "'" (ExpectingEscapeCharacter '\''))) + , P.map (\_ -> '\n') (P.token (P.Token "n" (ExpectingEscapeCharacter 'n'))) + , P.map (\_ -> '\t') (P.token (P.Token "t" (ExpectingEscapeCharacter 't'))) + , P.map (\_ -> '\u{000D}') (P.token (P.Token "r" (ExpectingEscapeCharacter 'r'))) , P.succeed identity - |. P.token (P.Token "u" ExpectingEscapeCharacter) + |. P.token (P.Token "u" (ExpectingEscapeCharacter 'u')) |. P.token (P.Token "{" ExpectingUnicodeEscapeLeftBrace) |= unicode |. P.token (P.Token "}" ExpectingUnicodeEscapeRightBrace)