diff --git a/CHANGES.md b/CHANGES.md index c0fc84a43e..f3af883c99 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -41,7 +41,8 @@ Core Grammars: - fix(swift) - Fixed syntax highlighting for class func/var declarations [guuido] - fix(yaml) - Fixed wrong escaping behavior in single quoted strings [guuido] - enh(nim) - Add `concept` and `defer` to list of Nim keywords [Jake Leahy] - +- fix(ruby) - Fix non-interpolabale Ruby strings [Boris Verkhovskiy][] + New Grammars: - added 3rd party TTCN-3 grammar to SUPPORTED_LANGUAGES [Osmocom][] @@ -85,7 +86,7 @@ CONTRIBUTORS [guuido]: https://github.com/guuido [clsource]: https://github.com/clsource [Jake Leahy]: https://github.com/ire4ever1190 - +[Boris Verkhovskiy]: https://github.com/verhovsky ## Version 11.10.0 diff --git a/src/highlight.js b/src/highlight.js index ac44a1e115..e8aa0c60e2 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -1014,6 +1014,7 @@ const HLJS = function(hljs) { hljs.regex = { concat: regex.concat, lookahead: regex.lookahead, + escape: regex.escape, either: regex.either, optional: regex.optional, anyNumberOfTimes: regex.anyNumberOfTimes diff --git a/src/languages/ruby.js b/src/languages/ruby.js index 91b2cb527d..d4039d24d1 100644 --- a/src/languages/ruby.js +++ b/src/languages/ruby.js @@ -15,8 +15,7 @@ export default function(hljs) { /\b([A-Z]+[a-z0-9]+)+/, // ends in caps /\b([A-Z]+[a-z0-9]+)+[A-Z]+/, - ) - ; + ); const CLASS_NAME_WITH_NAMESPACE_RE = regex.concat(CLASS_NAME_RE, /(::\w+)*/) // very popular ruby built-ins that one might even assume // are actual keywords (despite that not being the case) @@ -122,56 +121,128 @@ export default function(hljs) { end: /\}/, keywords: RUBY_KEYWORDS }; - const STRING = { + + function string_variants(prefix, delimiters) { + return delimiters.map((d) => { + return { + begin: regex.concat(prefix, regex.escape(d.charAt(0))), + end: regex.escape(d.charAt(1)) + } + }) + } + + const STRING_DELIMITERS = [ + "()", + "[]", + "{}", + "<>", + "\\/\\/", + "%%", + "--" + ]; + + const SINGLE_QUOTED_STRING = { className: 'string', - contains: [ - hljs.BACKSLASH_ESCAPE, - SUBST - ], + contains: [ hljs.BACKSLASH_ESCAPE ], variants: [ { begin: /'/, end: /'/ }, + ...string_variants("%q", STRING_DELIMITERS) + ] + } + + const DOUBLE_QUOTED_STRING = { + className: 'string', + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ], + variants: [ { begin: /"/, end: /"/ }, + ...string_variants("%Q", STRING_DELIMITERS) + ] + } + + // TODO: continue to break these out into smaller more discrete modes + const OLD_STRINGS_TOO_MANY_VARIANTS = { + className: 'string', + contains: [ hljs.BACKSLASH_ESCAPE ], + variants: [ { begin: /`/, - end: /`/ + end: /`/, + contains: [ + SUBST + ] }, { - begin: /%[qQwWx]?\(/, - end: /\)/ + begin: /%[wWx]?\(/, + end: /\)/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { - begin: /%[qQwWx]?\[/, - end: /\]/ + begin: /%[wWx]?\[/, + end: /\]/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { - begin: /%[qQwWx]?\{/, - end: /\}/ + begin: /%[wWx]?\{/, + end: /\}/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { - begin: /%[qQwWx]?/ + begin: /%[wWx]?/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { - begin: /%[qQwWx]?\//, - end: /\// + begin: /%[wWx]?\//, + end: /\//, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { - begin: /%[qQwWx]?%/, - end: /%/ + begin: /%[wWx]?%/, + end: /%/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { - begin: /%[qQwWx]?-/, - end: /-/ + begin: /%[wWx]?-/, + end: /-/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, { - begin: /%[qQwWx]?\|/, - end: /\|/ + begin: /%[wWx]?\|/, + end: /\|/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] }, // in the following expressions, \B in the beginning suppresses recognition of ?-sequences // where ? is the last character of a preceding identifier, as in: `func?4` @@ -181,27 +252,35 @@ export default function(hljs) { { begin: /\B\?(\\M-\\C-|\\M-\\c|\\c\\M-|\\M-|\\C-\\M-)[\x20-\x7e]/ }, { begin: /\B\?\\(c|C-)[\x20-\x7e]/ }, { begin: /\B\?\\?\S/ }, - // heredocs - { - // this guard makes sure that we have an entire heredoc and not a false - // positive (auto-detect, etc.) - begin: regex.concat( - /<<[-~]?'?/, - regex.lookahead(/(\w+)(?=\W)[^\n]*\n(?:[^\n]*\n)*?\s*\1\b/) - ), + ] + }; + + const HEREDOC = { + scope: "string", + // this guard makes sure that we have an entire heredoc and not a false + // positive (auto-detect, etc.) + begin: regex.concat( + /<<[-~]?'?/, + regex.lookahead(/(\w+)(?=\W)[^\n]*\n(?:[^\n]*\n)*?\s*\1\b/) + ), + contains: [ + hljs.END_SAME_AS_BEGIN({ + begin: /(\w+)/, + end: /(\w+)/, contains: [ - hljs.END_SAME_AS_BEGIN({ - begin: /(\w+)/, - end: /(\w+)/, - contains: [ - hljs.BACKSLASH_ESCAPE, - SUBST - ] - }) + hljs.BACKSLASH_ESCAPE, + SUBST ] - } + }) ] - }; + } + + const STRINGS = [ + SINGLE_QUOTED_STRING, + DOUBLE_QUOTED_STRING, + HEREDOC, + OLD_STRINGS_TOO_MANY_VARIANTS + ] // Ruby syntax is underdocumented, but this grammar seems to be accurate // as of version 2.7.2 (confirmed with (irb and `Ripper.sexp(...)`) @@ -316,8 +395,34 @@ export default function(hljs) { scope: "title.class" }; + const SYMBOL = { + className: 'symbol', + variants: [ + { + begin: regex.concat(/:/, RUBY_METHOD_RE) + }, + { + begin: /:"/, + end: /"/, + contains: [ + hljs.BACKSLASH_ESCAPE, + SUBST + ] + }, + { + begin: /:'/, + end: /'/, + contains: [ + hljs.BACKSLASH_ESCAPE + ] + } + ], + relevance: 0 + }; + const RUBY_DEFAULT_CONTAINS = [ - STRING, + SYMBOL, + ...STRINGS, CLASS_DEFINITION, INCLUDE_EXTEND, OBJECT_CREATION, @@ -332,15 +437,6 @@ export default function(hljs) { begin: hljs.UNDERSCORE_IDENT_RE + '(!|\\?)?:', relevance: 0 }, - { - className: 'symbol', - begin: ':(?!\\s)', - contains: [ - STRING, - { begin: RUBY_METHOD_RE } - ], - relevance: 0 - }, NUMBER, { // negative-look forward attempts to prevent false matches like: diff --git a/test/markup/erb/default.expect.txt b/test/markup/erb/default.expect.txt index 2473ac1b61..2de768b933 100644 --- a/test/markup/erb/default.expect.txt +++ b/test/markup/erb/default.expect.txt @@ -6,6 +6,6 @@ <%- available_things = things.select(&:available?) -%> <%%- x = 1 + 2 -%%> -<%% value = 'real string #{@value}' %%> +<%% value = "real string #{@value}" %%> <%%= available_things.inspect %%> \ No newline at end of file diff --git a/test/markup/erb/default.txt b/test/markup/erb/default.txt index f7ea6203f5..e173dc16ae 100644 --- a/test/markup/erb/default.txt +++ b/test/markup/erb/default.txt @@ -6,5 +6,5 @@ <%- available_things = things.select(&:available?) -%> <%%- x = 1 + 2 -%%> -<%% value = 'real string #{@value}' %%> +<%% value = "real string #{@value}" %%> <%%= available_things.inspect %%> diff --git a/test/markup/ruby/strings.expect.txt b/test/markup/ruby/strings.expect.txt index 07b8c51197..7915798cbd 100644 --- a/test/markup/ruby/strings.expect.txt +++ b/test/markup/ruby/strings.expect.txt @@ -22,9 +22,21 @@ c = ?\c\M-x # me c = ?\c? # delete, ASCII 7Fh (DEL) c = ?\C-? # delete, ASCII 7Fh (DEL) +# symbols +c = :booger #=> :booger +c = :"booger" #=> :booger +c = :'booger' #=> :booger +c = :"b#{yum}ger" #=> :burger + # Unicode character(s) of type \u{nnnn ....}, where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F]) c = ?\u{00AF09} c = ?\u{0AF09} c = ?\u{AF9} c = ?\u{F9} -c = ?\u{F} \ No newline at end of file +c = ?\u{F} + +# Interpolation +c = 'a#{1}b' #=> "a\#{1}b" +c = "a#{1}b" #=> "a1b" +c = %q(a#{1}b) #=> "a\#{1}b" +c = %Q{a#{1}b} #=> "a1b" diff --git a/test/markup/ruby/strings.txt b/test/markup/ruby/strings.txt index 43d35d656b..a2c89921fe 100644 --- a/test/markup/ruby/strings.txt +++ b/test/markup/ruby/strings.txt @@ -22,9 +22,21 @@ c = ?\c\M-x # meta control character, where x is an ASCII printable characte c = ?\c? # delete, ASCII 7Fh (DEL) c = ?\C-? # delete, ASCII 7Fh (DEL) +# symbols +c = :booger #=> :booger +c = :"booger" #=> :booger +c = :'booger' #=> :booger +c = :"b#{yum}ger" #=> :burger + # Unicode character(s) of type \u{nnnn ....}, where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F]) c = ?\u{00AF09} c = ?\u{0AF09} c = ?\u{AF9} c = ?\u{F9} -c = ?\u{F} \ No newline at end of file +c = ?\u{F} + +# Interpolation +c = 'a#{1}b' #=> "a\#{1}b" +c = "a#{1}b" #=> "a1b" +c = %q(a#{1}b) #=> "a\#{1}b" +c = %Q{a#{1}b} #=> "a1b"