diff --git a/pom.xml b/pom.xml index 7d6d23e1..d6f2da6f 100644 --- a/pom.xml +++ b/pom.xml @@ -77,7 +77,7 @@ com.shapesecurity shape-functional-java - 2.3.3 + 2.5.1 com.google.code.findbugs diff --git a/src/main/java/com/shapesecurity/shift/es2016/parser/GenericParser.java b/src/main/java/com/shapesecurity/shift/es2016/parser/GenericParser.java index de7003cb..636654bd 100644 --- a/src/main/java/com/shapesecurity/shift/es2016/parser/GenericParser.java +++ b/src/main/java/com/shapesecurity/shift/es2016/parser/GenericParser.java @@ -1702,6 +1702,9 @@ protected Either3 parsePrimaryEx throw this.createErrorWithLocation(this.getLocation(), "Invalid regular expression flags"); } } + if (!PatternAcceptor.acceptRegex(pattern, gFlag, iFlag, mFlag, yFlag, uFlag)) { + throw this.createErrorWithLocation(this.getLocation(), "Invalid regular expression"); + } return Either3.left(this.finishNode(startState, new LiteralRegExpExpression(pattern, gFlag, iFlag, mFlag, yFlag, uFlag))); default: throw this.createUnexpected(this.lookahead); diff --git a/src/main/java/com/shapesecurity/shift/es2016/parser/PatternAcceptor.java b/src/main/java/com/shapesecurity/shift/es2016/parser/PatternAcceptor.java new file mode 100644 index 00000000..eef60387 --- /dev/null +++ b/src/main/java/com/shapesecurity/shift/es2016/parser/PatternAcceptor.java @@ -0,0 +1,852 @@ +/** + * Copyright 2018 Shape Security, Inc.

Licensed under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by + * applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See + * the License for the specific language governing permissions and limitations under the License. + */ +package com.shapesecurity.shift.es2016.parser; + +import com.shapesecurity.functional.F; +import com.shapesecurity.functional.data.ImmutableSet; +import com.shapesecurity.functional.data.Maybe; + +import javax.annotation.Nonnull; + +import java.util.*; + +import static com.shapesecurity.shift.es2016.utils.Utils.isIdentifierPart; +import static com.shapesecurity.shift.es2016.utils.Utils.isIdentifierStart; + +public class PatternAcceptor { + + public final String pattern; + public final boolean gFlag; + public final boolean iFlag; + public final boolean mFlag; + public final boolean yFlag; + public final boolean uFlag; + + private static final String[] decimalDigits = new String[]{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}; + private static final String[] octalDigits = new String[]{"0", "1", "2", "3", "4", "5", "6", "7"}; + private static final String[] hexDigits = new String[]{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "A", "B", "C", "D", "E", "F"}; + private static final String syntaxCharacters = "^$\\.*+?()[]{}|"; + private static final String[] syntaxCharacterArray = "^$\\.*+?()[]{}|".split(""); + private static final String extendedSyntaxCharacters = "^$\\.*+?()[|"; + private static final String[] controlCharacters = new String[]{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"}; + + private static final HashSet utf16GeneralCategoryValues = new HashSet<>(Arrays.asList("Cased_Letter", "LC", "Close_Punctuation", "Pe", "Connector_Punctuation", "Pc", "Control", "Cc", "cntrl", "Currency_Symbol", "Sc", "Dash_Punctuation", "Pd", "Decimal_Number", "Nd", "digit", "Enclosing_Mark", "Me", "Final_Punctuation", "Pf", "Format", "Cf", "Initial_Punctuation", "Pi", "Letter", "L", "Letter_Number", "Nl", "Line_Separator", "Zl", "Lowercase_Letter", "Ll", "Mark", "M", "Combining_Mark", "Math_Symbol", "Sm", "Modifier_Letter", "Lm", "Modifier_Symbol", "Sk", "Nonspacing_Mark", "Mn", "Number", "N", "Open_Punctuation", "Ps", "Other", "C", "Other_Letter", "Lo", "Other_Number", "No", "Other_Punctuation", "Po", "Other_Symbol", "So", "Paragraph_Separator", "Zp", "Private_Use", "Co", "Punctuation", "P", "punct", "Separator", "Z", "Space_Separator", "Zs", "Spacing_Mark", "Mc", "Surrogate", "Cs", "Symbol", "S", "Titlecase_Letter", "Lt", "Unassigned", "Cn", "Uppercase_Letter", "Lu")); + private static final HashSet utf16ScriptCategoryValues = new HashSet<>(Arrays.asList("Adlam", "Adlm", "Ahom", "Anatolian_Hieroglyphs", "Hluw", "Arabic", "Arab", "Armenian", "Armn", "Avestan", "Avst", "Balinese", "Bali", "Bamum", "Bamu", "Bassa_Vah", "Bass", "Batak", "Batk", "Bengali", "Beng", "Bhaiksuki", "Bhks", "Bopomofo", "Bopo", "Brahmi", "Brah", "Braille", "Brai", "Buginese", "Bugi", "Buhid", "Buhd", "Canadian_Aboriginal", "Cans", "Carian", "Cari", "Caucasian_Albanian", "Aghb", "Chakma", "Cakm", "Cham", "Cherokee", "Cher", "Common", "Zyyy", "Coptic", "Copt", "Qaac", "Cuneiform", "Xsux", "Cypriot", "Cprt", "Cyrillic", "Cyrl", "Deseret", "Dsrt", "Devanagari", "Deva", "Dogra", "Dogr", "Duployan", "Dupl", "Egyptian_Hieroglyphs", "Egyp", "Elbasan", "Elba", "Ethiopic", "Ethi", "Georgian", "Geor", "Glagolitic", "Glag", "Gothic", "Goth", "Grantha", "Gran", "Greek", "Grek", "Gujarati", "Gujr", "Gunjala_Gondi", "Gong", "Gurmukhi", "Guru", "Han", "Hani", "Hangul", "Hang", "Hanifi_Rohingya", "Rohg", "Hanunoo", "Hano", "Hatran", "Hatr", "Hebrew", "Hebr", "Hiragana", "Hira", "Imperial_Aramaic", "Armi", "Inherited", "Zinh", "Qaai", "Inscriptional_Pahlavi", "Phli", "Inscriptional_Parthian", "Prti", "Javanese", "Java", "Kaithi", "Kthi", "Kannada", "Knda", "Katakana", "Kana", "Kayah_Li", "Kali", "Kharoshthi", "Khar", "Khmer", "Khmr", "Khojki", "Khoj", "Khudawadi", "Sind", "Lao", "Laoo", "Latin", "Latn", "Lepcha", "Lepc", "Limbu", "Limb", "Linear_A", "Lina", "Linear_B", "Linb", "Lisu", "Lycian", "Lyci", "Lydian", "Lydi", "Mahajani", "Mahj", "Makasar", "Maka", "Malayalam", "Mlym", "Mandaic", "Mand", "Manichaean", "Mani", "Marchen", "Marc", "Medefaidrin", "Medf", "Masaram_Gondi", "Gonm", "Meetei_Mayek", "Mtei", "Mende_Kikakui", "Mend", "Meroitic_Cursive", "Merc", "Meroitic_Hieroglyphs", "Mero", "Miao", "Plrd", "Modi", "Mongolian", "Mong", "Mro", "Mroo", "Multani", "Mult", "Myanmar", "Mymr", "Nabataean", "Nbat", "New_Tai_Lue", "Talu", "Newa", "Nko", "Nkoo", "Nushu", "Nshu", "Ogham", "Ogam", "Ol_Chiki", "Olck", "Old_Hungarian", "Hung", "Old_Italic", "Ital", "Old_North_Arabian", "Narb", "Old_Permic", "Perm", "Old_Persian", "Xpeo", "Old_Sogdian", "Sogo", "Old_South_Arabian", "Sarb", "Old_Turkic", "Orkh", "Oriya", "Orya", "Osage", "Osge", "Osmanya", "Osma", "Pahawh_Hmong", "Hmng", "Palmyrene", "Palm", "Pau_Cin_Hau", "Pauc", "Phags_Pa", "Phag", "Phoenician", "Phnx", "Psalter_Pahlavi", "Phlp", "Rejang", "Rjng", "Runic", "Runr", "Samaritan", "Samr", "Saurashtra", "Saur", "Sharada", "Shrd", "Shavian", "Shaw", "Siddham", "Sidd", "SignWriting", "Sgnw", "Sinhala", "Sinh", "Sogdian", "Sogd", "Sora_Sompeng", "Sora", "Soyombo", "Soyo", "Sundanese", "Sund", "Syloti_Nagri", "Sylo", "Syriac", "Syrc", "Tagalog", "Tglg", "Tagbanwa", "Tagb", "Tai_Le", "Tale", "Tai_Tham", "Lana", "Tai_Viet", "Tavt", "Takri", "Takr", "Tamil", "Taml", "Tangut", "Tang", "Telugu", "Telu", "Thaana", "Thaa", "Thai", "Tibetan", "Tibt", "Tifinagh", "Tfng", "Tirhuta", "Tirh", "Ugaritic", "Ugar", "Vai", "Vaii", "Warang_Citi", "Wara", "Yi", "Yiii", "Zanabazar_Square", "Zanb")); + + private static HashSet constructUtf16LonePropertyValues() { + HashSet set = new HashSet<>(Arrays.asList("ASCII", "ASCII_Hex_Digit", "AHex", "Alphabetic", "Alpha", "Any", "Assigned", "Bidi_Control", "Bidi_C", "Bidi_Mirrored", "Bidi_M", "Case_Ignorable", "CI", "Cased", "Changes_When_Casefolded", "CWCF", "Changes_When_Casemapped", "CWCM", "Changes_When_Lowercased", "CWL", "Changes_When_NFKC_Casefolded", "CWKCF", "Changes_When_Titlecased", "CWT", "Changes_When_Uppercased", "CWU", "Dash", "Default_Ignorable_Code_Point", "DI", "Deprecated", "Dep", "Diacritic", "Dia", "Emoji", "Emoji_Component", "Emoji_Modifier", "Emoji_Modifier_Base", "Emoji_Presentation", "Extended_Pictographic", "Extender", "Ext", "Grapheme_Base", "Gr_Base", "Grapheme_Extend", "Gr_Ext", "Hex_Digit", "Hex", "IDS_Binary_Operator", "IDSB", "IDS_Trinary_Operator", "IDST", "ID_Continue", "IDC", "ID_Start", "IDS", "Ideographic", "Ideo", "Join_Control", "Join_C", "Logical_Order_Exception", "LOE", "Lowercase", "Lower", "Math", "Noncharacter_Code_Point", "NChar", "Pattern_Syntax", "Pat_Syn", "Pattern_White_Space", "Pat_WS", "Quotation_Mark", "QMark", "Radical", "Regional_Indicator", "RI", "Sentence_Terminal", "STerm", "Soft_Dotted", "SD", "Terminal_Punctuation", "Term", "Unified_Ideograph", "UIdeo", "Uppercase", "Upper", "Variation_Selector", "VS", "White_Space", "space", "XID_Continue", "XIDC", "XID_Start", "XIDS")); + set.addAll(utf16GeneralCategoryValues); + return set; + } + + private static final HashSet utf16LonePropertyValues = constructUtf16LonePropertyValues(); + + private static HashMap constructControlEscapeCharacterValues() { + HashMap map = new HashMap<>(); + map.put("f", (int) '\f'); + map.put("n", (int) '\n'); + map.put("r", (int) '\r'); + map.put("t", (int) '\t'); + map.put("v", 0x11); // \v in javascript + return map; + } + + private static final HashMap controlEscapeCharacterValues = constructControlEscapeCharacterValues(); + + private static final String[] controlEscapeCharacters = controlEscapeCharacterValues.keySet().toArray(new String[0]); + + private static HashMap> constructUtf16NonBinaryPropertyNames() { + HashMap> map = new HashMap<>(); + map.put("General_Category", utf16GeneralCategoryValues); + map.put("gc", utf16GeneralCategoryValues); + map.put("Script", utf16ScriptCategoryValues); + map.put("sc", utf16ScriptCategoryValues); + map.put("Script_Extensions", utf16ScriptCategoryValues); + map.put("scx", utf16ScriptCategoryValues); + return map; + } + + private static final HashMap> utf16NonBinaryPropertyNames = constructUtf16NonBinaryPropertyNames(); + + private static class RegexException extends RuntimeException { + public RegexException(String message) { + super(message); + } + } + + private class Context { + private int index; + private ImmutableSet backreferenceNames; + private ImmutableSet groupingNames; + private ImmutableSet backreferences; + private int nParenthesis; + + private Context(@Nonnull Context context) { + this.index = context.index; + this.backreferenceNames = context.backreferenceNames; + this.groupingNames = context.groupingNames; + this.backreferences = context.backreferences; + this.nParenthesis = context.nParenthesis; + } + + public Context() { + this.index = 0; + this.backreferenceNames = ImmutableSet.emptyUsingEquality(); + this.groupingNames = ImmutableSet.emptyUsingEquality(); + this.backreferences = ImmutableSet.emptyUsingEquality(); + this.nParenthesis = 0; + } + + public boolean addGrouping(@Nonnull Maybe name) { + if (name.isJust()) { + System.out.println("xxxxx"); + if (this.groupingNames.contains(name.fromJust())) { + System.out.println("nnnnn"); + return false; + } + this.groupingNames = this.groupingNames.put(name.fromJust()); + } + this.nParenthesis++; + return true; + } + + public void backreferenceName(@Nonnull String name) { + this.backreferenceNames = this.backreferenceNames.put(name); + } + + public void backreference(int num) { + this.backreferences = this.backreferences.put(num); + } + + public boolean verifyBackreferences() { + if (uFlag) { + for (Integer backreference : this.backreferences) { + if (backreference > nParenthesis) { + return false; + } + } + } + for (String backreferenceName : this.backreferenceNames) { + if (!groupingNames.contains(backreferenceName)) { + return false; + } + } + return true; + } + + public Context goDeeper() { + return new Context(this); + } + + public boolean goDeeper(F predicate) { + try { + Context context = this.goDeeper(); + boolean accepted = predicate.apply(context); + if (accepted) { + this.absorb(context); + } + return accepted; + } catch (RegexException e) { + return false; + } + } + + public Maybe goDeeperExtended(F> predicate) { + try { + Context context = this.goDeeper(); + Maybe accepted = predicate.apply(context); + if (accepted.isJust()) { + this.absorb(context); + } + return accepted; + } catch (RegexException e) { + return Maybe.empty(); + } + } + + private void absorb(Context otherContext) { + this.index = otherContext.index; + this.backreferenceNames = otherContext.backreferenceNames; + this.backreferences = otherContext.backreferences; + this.groupingNames = otherContext.groupingNames; + this.nParenthesis = otherContext.nParenthesis; + } + + public Maybe nextCodePoint() { + return this.index >= pattern.length() ? Maybe.empty() : Maybe.of(new String(Character.toChars(pattern.codePointAt(this.index)))); + } + + public void skip(int n) { + for (int i = 0; i < n && this.index < pattern.length(); i++) { + this.index += this.nextCodePoint().fromJust().length(); + } + if (this.index > pattern.length()) { + this.index = pattern.length(); + } + } + + public boolean eat(String str) { + if (this.index + str.length() > pattern.length() || !pattern.startsWith(str, this.index)) { + return false; + } + this.index += str.length(); + return true; + } + + public Maybe eatIdentifierStart() { + if (this.index >= pattern.length()) { + return Maybe.empty(); + } + return this.goDeeperExtended(context -> { + int characterValue = 0; + if (context.match("\\u")) { + context.skip(1); + characterValue = acceptUnicodeEscape(context).fromJust(); + } else { + characterValue = pattern.codePointAt(context.index); + context.index += Character.toChars(characterValue).length; + } + String character = new String(Character.toChars(characterValue)); + if (character.equals("_") || character.equals("$") || isIdentifierStart(characterValue)) { + return Maybe.of(characterValue); + } + return Maybe.empty(); + }); + } + + public Maybe eatIdentifierPart() { + if (this.index >= pattern.length()) { + return Maybe.empty(); + } + return this.goDeeperExtended(context -> { + int characterValue = 0; + if (context.match("\\u")) { + context.skip(1); + characterValue = acceptUnicodeEscape(context).fromJust(); + } else { + characterValue = pattern.codePointAt(context.index); + context.index += Character.toChars(characterValue).length; + } + String character = new String(Character.toChars(characterValue)); + if (character.equals("\\u200C") || character.equals("\\u200D") || character.equals("$") || isIdentifierPart(characterValue)) { + return Maybe.of(characterValue); + } + return Maybe.empty(); + }); + } + + public Maybe eatAny(@Nonnull String... strings) { + for (String string : strings) { + if (this.eat(string)) { + return Maybe.of(string); + } + } + return Maybe.empty(); + } + + public Maybe eatAny(@Nonnull String[]... stringArrays) { + for (String[] strings : stringArrays) { + for (String string : strings) { + if (this.eat(string)) { + return Maybe.of(string); + } + } + } + return Maybe.empty(); + } + + public String collect(@Nonnull String[]... stringArrays) { + return collect(-1, stringArrays); + } + + public String collect(int limit, @Nonnull String[]... stringArrays) { + StringBuilder stringBuilder = new StringBuilder(); + + masterLoop: + for (int i = 0; limit < 0 || i < limit; i++) { + for (String[] strings : stringArrays) { + for (String string : strings) { + if (this.eat(string)) { + stringBuilder.append(string); + continue masterLoop; + } + } + } + break; + } + return stringBuilder.toString(); + } + + public void expect(@Nonnull String str) { + if (!this.eat(str)) { + throw new RegexException("Expected \"" + str + "\" at index " + this.index + ", not found"); + } + } + + public boolean match(@Nonnull String str) { + return this.index + str.length() <= pattern.length() && pattern.startsWith(str, this.index); + } + + public boolean matchAny(@Nonnull String... strings) { + for (String string : strings) { + if (this.match(string)) { + return true; + } + } + return false; + } + + public boolean empty() { + return this.index >= pattern.length(); + } + + } + + private PatternAcceptor(@Nonnull String pattern, boolean gFlag, boolean iFlag, boolean mFlag, boolean yFlag, boolean uFlag) { + this.pattern = pattern; + this.gFlag = gFlag; + this.iFlag = iFlag; + this.mFlag = mFlag; + this.yFlag = yFlag; + this.uFlag = uFlag; + } + + public static boolean acceptRegex(@Nonnull String pattern, boolean gFlag, boolean iFlag, boolean mFlag, boolean yFlag, boolean uFlag) { + PatternAcceptor acceptor = new PatternAcceptor(pattern, gFlag, iFlag, mFlag, yFlag, uFlag); + return acceptor.acceptRegex(); + } + + private boolean acceptRegex() { + Context context = new Context(); + try { + if (!acceptDisjunction(context, Maybe.empty())) { + return false; + } + } catch (RegexException e) { + return false; + } + return context.verifyBackreferences(); + } + + private F> maybeLogicalOr(F>... expressions) { + return context -> { + for (F> expression : expressions) { + Maybe value = expression.apply(context); + if (value.isJust()) { + return value; + } + } + return Maybe.empty(); + }; + } + + private boolean acceptDisjunction(Context context, Maybe terminator) { + do { + if (terminator.isJust() && context.eat(terminator.fromJust())) { + return true; + } else if (context.match("|")) { + continue; + } + if (!acceptAlternative(context, terminator)) { + return false; + } + } while(context.eat("|")); + if (terminator.isJust()) { + context.expect(terminator.fromJust()); + } + return true; + } + + private boolean acceptAlternative(Context context, Maybe terminator) { + while (!context.match("|") && !context.empty() && (terminator.isNothing() || !context.match(terminator.fromJust()))) { + if (!acceptTerm(context)) { + return false; + } + } + return true; + } + + private boolean acceptTerm(Context context) { + // non-quantified references are rolled into quantified accepts to improve performance significantly. + if (this.uFlag) { + return acceptAssertion(context) || + acceptQuantified(this::acceptAtom).apply(context); + } + return acceptQuantified(this::acceptQuantifiableAssertion).apply(context) || + acceptAssertion(context) || + acceptQuantified(this::acceptAtom).apply(context); + } + + private F acceptLabeledGroup(F predicate) { + return currentContext -> currentContext.goDeeper(context -> { + context.expect("("); + if (predicate.apply(context)) { + return acceptDisjunction(context, Maybe.of(")")); + } + return false; + }); + } + + private boolean acceptAssertion(Context context) { + return context.eatAny("^", "$", "\\b", "\\B").isJust() || + acceptLabeledGroup(subContext -> { + if (uFlag) { + return subContext.eatAny("?=", "?!", "?<=", "? subContext.eatAny("?=", "?!").isJust()).apply(context); + } + + private boolean acceptDecimal(Context context) { + return context.collect(decimalDigits).length() > 0; + } + + private F acceptQuantified(F acceptor) { + return superContext -> superContext.goDeeper(context ->{ + if (!acceptor.apply(context)) { + return false; + } + if (context.match("{")) { + return context.goDeeper(subContext -> { + subContext.expect("{"); + String decimal1 = subContext.collect(decimalDigits); + if (decimal1.length() == 0) { + return false; + } + if (subContext.eat(",") && subContext.matchAny(decimalDigits)) { + String decimal2 = subContext.collect(decimalDigits); + if (Integer.parseInt(decimal1) > Integer.parseInt(decimal2)) { + return false; + } + } + subContext.expect("}"); + subContext.eat("?"); + return true; + }) || !uFlag; + } else if (context.eatAny("*", "+", "?").isJust()) { + context.eat("?"); + } + return true; + }); + } + + private boolean acceptPatternCharacter(Context context) { + Maybe nextCodePoint = context.nextCodePoint(); + if (nextCodePoint.isNothing() || syntaxCharacters.contains(nextCodePoint.fromJust())) { + return false; + } + context.skip(nextCodePoint.orJust("").length()); + return true; + } + + private boolean acceptExtendedPatternCharacter(Context context) { + Maybe nextCodePoint = context.nextCodePoint(); + if (nextCodePoint.isNothing() || extendedSyntaxCharacters.contains(nextCodePoint.fromJust())) { + return false; + } + context.skip(nextCodePoint.orJust("").length()); + return true; + } + + private boolean acceptInvalidBracedQuantifier(Context context) { + return context.goDeeper(subContext -> { + subContext.expect("{"); + if (!acceptDecimal(subContext)) { + return false; + } + if (subContext.eat(",") && subContext.matchAny(decimalDigits) && !acceptDecimal(subContext)) { + return false; + } + subContext.expect("}"); + return true; + }); + } + + private boolean acceptAtom(Context context) { + if (this.uFlag) { + return acceptPatternCharacter(context) || + context.eat(".") || + context.goDeeper(subContext -> { + subContext.expect("\\"); + return acceptAtomEscape(subContext); + }) || + acceptCharacterClass(context) || + acceptLabeledGroup(subContext -> subContext.eat("?:")).apply(context) || + acceptGrouping(context); + } + boolean matched = context.eat(".") || + context.goDeeper(subContext -> { + subContext.expect("\\"); + if (subContext.match("c")) { + return true; + } + return acceptAtomEscape(subContext); + }) || + acceptCharacterClass(context) || + acceptLabeledGroup(subContext -> subContext.eat("?:")).apply(context) || + acceptGrouping(context); + if (!matched && acceptInvalidBracedQuantifier(context)) { + return false; + } + return matched || acceptExtendedPatternCharacter(context); + } + + private boolean acceptGrouping(Context superContext) { + return superContext.goDeeper(context -> { + context.expect("("); + String[] groupName = new String[1]; + context.goDeeper(subContext -> { + subContext.expect("?"); + Maybe maybeGroupName = acceptGroupName(subContext); + if (maybeGroupName.isJust()) { + groupName[0] = maybeGroupName.fromJust(); + return true; + } + return false; + }); + if (!acceptDisjunction(context, Maybe.of(")"))) { + return false; + } + return context.addGrouping(Maybe.fromNullable(groupName[0])); + }); + } + + private boolean acceptAtomEscape(Context context) { + return acceptDecimalEscape(context) || + acceptCharacterClassEscape(context) || + acceptCharacterEscape(context).map(i -> true).orJust(false) || + acceptGroupNameBackreference(context); + } + + private boolean acceptDecimalEscape(Context superContext) { + return superContext.goDeeper(context -> { + StringBuilder digits = new StringBuilder(); + Maybe firstDecimal = context.eatAny(decimalDigits); + if (firstDecimal.isNothing() || firstDecimal.fromJust().equals("0")) { + return false; + } + digits.append(firstDecimal.fromJust()); + Maybe digit = Maybe.empty(); + while ((digit = context.eatAny(decimalDigits)).isJust()) { + digits.append(digit.fromJust()); + } + context.backreference(Integer.parseInt(digits.toString())); + return true; + }); + } + + private boolean acceptCharacterClassEscape(Context context) { + if (context.eatAny("d", "D", "s", "S", "w", "W").isJust()) { + return true; + } + return this.uFlag && context.goDeeper(subContext -> { + if(!(subContext.eat("p{") || subContext.eat("P{"))) { + return false; + } + if (!acceptUnicodePropertyValueExpression(subContext)) { + return false; + } + return context.eat("}"); + }); + } + + private String acceptUnicodePropertyName(Context context) { + return context.collect(controlCharacters, new String[]{"_"}); + } + + private String acceptUnicodePropertyValue(Context context) { + return context.collect(controlCharacters, decimalDigits, new String[]{"_"}); + } + + private boolean acceptLoneUnicodePropertyNameOrValue(Context context) { + return utf16LonePropertyValues.contains(acceptUnicodePropertyValue(context)); + } + + private boolean acceptUnicodePropertyValueExpression(Context superContext) { + return superContext.goDeeper(context -> { + String name = acceptUnicodePropertyName(context); + if (name.length() == 0) { + return false; + } + context.expect("="); + String value = acceptUnicodePropertyValue(context); + if (value.length() == 0) { + return false; + } + return utf16NonBinaryPropertyNames.get(name).contains(value); + }) || superContext.goDeeper(this::acceptLoneUnicodePropertyNameOrValue); + } + + private Maybe acceptUnicodeEscape(Context superContext) { + return superContext.goDeeperExtended(context -> { + context.expect("u"); + if (uFlag && context.eat("{")) { + String hex = context.collect(hexDigits); + context.expect("}"); + int value = Integer.parseInt(hex, 16); + return value > 0x10FFFF ? Maybe.empty() : Maybe.of(value); + } + String hex = context.collect(4, hexDigits); + if (hex.length() != 4) { + return Maybe.empty(); + } + int value = Integer.parseInt(hex, 16); + + if (value >= 0xD800 && value <= 0xDBFF) { + Maybe surrogatePairValue = context.goDeeperExtended(subContext -> { + subContext.expect("\\u"); + String hex2 = subContext.collect(4, hexDigits); + if (hex2.length() != 4) { + return Maybe.empty(); + } + int value2 = Integer.parseInt(hex2, 16); + if (value2 < 0xDC00 || value2 >= 0xE000) { + return Maybe.empty(); + } + return Maybe.of(0x10000 + ((value & 0x3FF) << 10) + (value2 & 0x03FF)); + }); + if (surrogatePairValue.isJust()) { + return surrogatePairValue; + } + } + return Maybe.of(value); + }); + } + + private Maybe acceptCharacterEscape(Context superContext) { + return maybeLogicalOr( + context -> { + Maybe escaped = context.eatAny(controlEscapeCharacters); + if (escaped.isNothing() || !controlEscapeCharacterValues.containsKey(escaped.fromJust())) { + return Maybe.empty(); + } + return Maybe.of(controlEscapeCharacterValues.get(escaped.fromJust())); + }, + context -> context.goDeeperExtended(subContext -> { + subContext.expect("c"); + Maybe character = subContext.eatAny(controlCharacters); + if (character.isNothing()) { + return Maybe.empty(); + } + return Maybe.of(character.fromJust().codePointAt(0) % 32); + }), + context -> context.goDeeperExtended(subContext -> { + subContext.expect("0"); + if (subContext.eatAny(decimalDigits).isJust()) { + return Maybe.empty(); + } + return Maybe.of(0); + }), + context -> context.goDeeperExtended(subContext -> { + subContext.expect("x"); + String hex = subContext.collect(2, hexDigits); + if (hex.length() != 2) { + return Maybe.empty(); + } + return Maybe.of(Integer.parseInt(hex, 16)); + }), + this::acceptUnicodeEscape, + context -> context.goDeeperExtended(subContext -> { + if (uFlag) { + return Maybe.empty(); + } + F> acceptOctalDigit = subContext2 -> subContext2.goDeeperExtended(subContext3 -> { + Maybe octal2 = subContext3.eatAny(octalDigits); + if (octal2.isNothing()) { + return Maybe.empty(); + } + return Maybe.of(Integer.parseInt(octal2.fromJust(), 8)); + }); + Maybe octal1 = acceptOctalDigit.apply(subContext); + if (octal1.isNothing()) { + return Maybe.empty(); + } + Maybe octal2 = acceptOctalDigit.apply(subContext); + if (octal2.isNothing()) { + return octal1; + } else if (octal1.fromJust() < 4) { + Maybe octal3 = acceptOctalDigit.apply(subContext); + if (octal3.isNothing()) { + return Maybe.of(octal1.fromJust() << 3 | octal2.fromJust()); + } + return Maybe.of(octal1.fromJust() << 6 | octal2.fromJust() << 3 | octal1.fromJust()); + } else { + return Maybe.of(octal1.fromJust() << 3 | octal2.fromJust()); + } + }), + context -> context.goDeeperExtended(subContext -> { + if (!uFlag) { + return Maybe.empty(); + } + Maybe maybeCharacter = subContext.eatAny(syntaxCharacterArray); + return maybeCharacter.map(character -> character.codePointAt(0)); + }), + context -> { + if(uFlag && context.eat("/")) { + return Maybe.of((int) "/".charAt(0)); + } + return Maybe.empty(); + }, + context -> context.goDeeperExtended(subContext -> { + if (uFlag) { + return Maybe.empty(); + } + Maybe maybeCharacter = subContext.nextCodePoint(); + if (maybeCharacter.isJust() && !maybeCharacter.fromJust().equals("c") && !maybeCharacter.fromJust().equals("k")) { + subContext.skip(1); + return Maybe.of(maybeCharacter.fromJust().codePointAt(0)); + } + return Maybe.empty(); + }) + ).apply(superContext); + } + + private boolean acceptGroupNameBackreference(Context superContext) { + return superContext.goDeeper(context -> { + context.expect("k"); + Maybe name = acceptGroupName(context); + if (name.isNothing()) { + return false; + } + context.backreferenceName(name.fromJust()); + return true; + }); + } + + private Maybe acceptGroupName(Context superContext) { + return superContext.goDeeperExtended(context -> { + context.expect("<"); + Maybe start = context.eatIdentifierStart().map(i -> new String(Character.toChars(i))); + if (start.isNothing()) { + return Maybe.empty(); + } + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append(start.fromJust()); + Maybe part; + while ((part = context.eatIdentifierPart().map(i -> new String(Character.toChars(i)))).isJust()) { + stringBuilder.append(part.fromJust()); + } + context.expect(">"); + return Maybe.of(stringBuilder.toString()); + }); + } + + private Maybe acceptClassEscape(Context superContext) { + return this.maybeLogicalOr( + context -> context.goDeeperExtended(subContext -> { + subContext.expect("b"); + return Maybe.of(0x0008); // backspace + }), + context -> { + if (uFlag && context.eat("-")) { + return Maybe.of((int)"-".charAt(0)); + } + return Maybe.empty(); + }, + context -> context.goDeeperExtended(subContext -> { + if (uFlag || !subContext.eat("c")) { + return Maybe.empty(); + } + return subContext.eatAny(decimalDigits, new String[]{"_"}).map(str -> str.codePointAt(0) % 32); + }), + context -> acceptCharacterClassEscape(context) ? Maybe.of(-1) : Maybe.empty(), + this::acceptCharacterEscape + ).apply(superContext); + } + + private Maybe acceptClassAtomNoDash(Context context) { + if (context.eat("\\")) { + return this.maybeLogicalOr( + this::acceptClassEscape, + subContext -> subContext.goDeeperExtended(subContext2 -> { + if (subContext2.match("c")) { + return Maybe.of(0x005C); // reverse solidus + } + return Maybe.empty(); + }) + ).apply(context); + } + Maybe nextCodePoint = context.nextCodePoint(); + if (nextCodePoint.isNothing() || nextCodePoint.fromJust().equals("]") || nextCodePoint.fromJust().equals("-")) { + return Maybe.empty(); + } + context.skip(nextCodePoint.fromJust().length()); + return Maybe.of(nextCodePoint.fromJust().codePointAt(0)); + } + + private Maybe acceptClassAtom(Context context) { + if (context.eat("-")) { + return Maybe.of((int)"-".charAt(0)); + } + return acceptClassAtomNoDash(context); + } + + private Maybe finishClassRange(Context context, int atom) { + if (context.eat("-")) { + if (context.match("]")) { + return Maybe.of(-1); // termination sentinel + } + Maybe otherAtom = acceptClassAtom(context); + if (otherAtom.isNothing()) { + return Maybe.empty(); + } + if (this.uFlag && (atom == -1 || otherAtom.fromJust() == -1)) { + return Maybe.empty(); + } else if (!(!this.uFlag && (atom == -1 || otherAtom.fromJust() == -1)) && atom > otherAtom.fromJust()) { + return Maybe.empty(); + } else if (context.match("]")) { + return Maybe.of(-1); + } + return acceptNonEmptyClassRanges(context); + } + if (context.match("]")) { + return Maybe.of(-1); + } + return acceptNonEmptyClassRangesNoDash(context); + } + + private Maybe acceptNonEmptyClassRanges(Context context) { + Maybe atom = acceptClassAtom(context); + if (atom.isNothing()) { + return Maybe.empty(); + } + return finishClassRange(context, atom.fromJust()); + } + + private Maybe acceptNonEmptyClassRangesNoDash(Context context) { + if (context.eat("-") && !context.match("]")) { + return Maybe.empty(); + } + Maybe atom = acceptClassAtomNoDash(context); + if (atom.isNothing()) { + return Maybe.empty(); + } + return finishClassRange(context, atom.fromJust()); + } + + private boolean acceptCharacterClass(Context superContext) { + return superContext.goDeeper(context -> { + context.expect("["); + context.eat("^"); + if (context.eat("]")) { + return true; + } + if (acceptNonEmptyClassRanges(context).isJust()) { + context.expect("]"); + return true; + } + return false; + }); + } + +} diff --git a/src/test/java/com/shapesecurity/shift/es2016/Serialization/DeserializerTest.java b/src/test/java/com/shapesecurity/shift/es2016/Serialization/DeserializerTest.java index 4d545014..fd8790e3 100644 --- a/src/test/java/com/shapesecurity/shift/es2016/Serialization/DeserializerTest.java +++ b/src/test/java/com/shapesecurity/shift/es2016/Serialization/DeserializerTest.java @@ -136,7 +136,7 @@ public void testLiterals() throws IllegalAccessException, NoSuchMethodException, testHelperFromScriptCode("0"); testHelperFromScriptCode("1.5"); testHelperFromScriptCode("/[a-z]/i"); - testHelperFromScriptCode("/(?!.){0,}?/u"); + testHelperFromScriptCode("/(?!.){0,}?/"); testHelperFromScriptCode("('x')"); testHelperFromScriptCode("('\\\n')"); } diff --git a/src/test/java/com/shapesecurity/shift/es2016/Test262/PassTest.java b/src/test/java/com/shapesecurity/shift/es2016/Test262/PassTest.java index 63139a6d..c754851c 100644 --- a/src/test/java/com/shapesecurity/shift/es2016/Test262/PassTest.java +++ b/src/test/java/com/shapesecurity/shift/es2016/Test262/PassTest.java @@ -53,7 +53,12 @@ public class PassTest { static final String expectationsDir = "src/test/resources/shift-parser-expectations/expectations/"; static final Set xfail = new HashSet<>(Arrays.asList( - "" // empty line to make git diffs nicer + // Invalid tests + // https://github.com/tc39/test262-parser-tests/issues/20 + "e4a43066905a597b.js", + "78c215fabdf13bae.js", + "bf49ec8d96884562.js", + "66e383bfd18e66ab.js" )); static void assertTreesEqual(Program expected, Program actual) { diff --git a/src/test/java/com/shapesecurity/shift/es2016/parser/ParserTestCase.java b/src/test/java/com/shapesecurity/shift/es2016/parser/ParserTestCase.java index 65b0710e..9e92b97c 100644 --- a/src/test/java/com/shapesecurity/shift/es2016/parser/ParserTestCase.java +++ b/src/test/java/com/shapesecurity/shift/es2016/parser/ParserTestCase.java @@ -117,6 +117,16 @@ public static void testScriptFailureML(@Nonnull String source, int line, int col fail("Parsing error not found"); } + public static void testScriptFailure(@Nonnull String source, @Nonnull String error) { + try { + Parser.parseScript(source); + } catch (JsError jsError) { + assertEquals(error, jsError.getDescription()); + return; + } + fail("Parsing error not found"); + } + public static void testScriptFailure(@Nonnull String source, int index, @Nonnull String error) { testScriptFailureML(source, 1, index, index, error); } diff --git a/src/test/java/com/shapesecurity/shift/es2016/parser/expressions/literals/LiteralRegExpExpressionTest.java b/src/test/java/com/shapesecurity/shift/es2016/parser/expressions/literals/LiteralRegExpExpressionTest.java index 0772e0ba..60401d07 100644 --- a/src/test/java/com/shapesecurity/shift/es2016/parser/expressions/literals/LiteralRegExpExpressionTest.java +++ b/src/test/java/com/shapesecurity/shift/es2016/parser/expressions/literals/LiteralRegExpExpressionTest.java @@ -5,14 +5,200 @@ import com.shapesecurity.shift.es2016.parser.ParserTestCase; import com.shapesecurity.shift.es2016.parser.JsError; +import com.shapesecurity.shift.es2016.parser.PatternAcceptor; import org.junit.Test; +import javax.annotation.Nonnull; + public class LiteralRegExpExpressionTest extends ParserTestCase { + + private static final String[] expectedToPass = new String[] { + "/./", + "/.|./", + "/.||./", + "/|/", + "/|.||.|/", + "/^$\\b\\B/", + "/^X/", + "/X$/", + "/\\bX/", + "/\\BX/", + "/(?=t|v|X|.|$||)/", + "/(?!t|v|X|.|$||)/", + "/(?<=t|v|X|.|$||)/", + "/(?)\\k/", + "/\\ud800\\u1000/u", + "/\\u{10}/u", + "/[\\1]/", + "/[\\7]/", + "/[\\15]/", + "/[\\153]/", + "/[\\72]/" + }; + + private static final String[] expectedToFail = new String[] { + "/(?=t|v|X|.|$||)*/u", + "/(?!t|v|X|.|$||)*/u", + "/(?<=t|v|X|.|$||)*/", + "/(?)/", + "/(?)(?)/", + "/\\k<\">/", + "/\\k/", + "/\\xZZ/u", + "/\\ud800\\uZZ/u", + "/\\uZZ/u", + "/\\u{ZZ}/u", + "/5{5,1G}/u" + }; + @Test public void testLiteralRegExpExpressionTest() throws JsError { testScript("/a/", new LiteralRegExpExpression("a", false, false, false, false, false)); testScript("/\\0/", new LiteralRegExpExpression("\\0", false, false, false, false, false)); - testScript("/\\1/u", new LiteralRegExpExpression("\\1", false, false, false, false, true)); + testScript("/\\1()/u", new LiteralRegExpExpression("\\1()", false, false, false, false, true)); testScript("/a/;", new LiteralRegExpExpression("a", false, false, false, false, false)); testScript("/a/i", new LiteralRegExpExpression("a", false, true, false, false, false)); testScript("/a/i;", new LiteralRegExpExpression("a", false, true, false, false, false)); @@ -39,14 +225,25 @@ public void testLiteralRegExpExpressionTest() throws JsError { testScript("/{/;", new LiteralRegExpExpression("{", false, false, false, false, false)); testScript("/}/;", new LiteralRegExpExpression("}", false, false, false, false, false)); - testScript("/}?/u;", new LiteralRegExpExpression("}?", false, false, false, false, true)); - testScript("/{*/u;", new LiteralRegExpExpression("{*", false, false, false, false, true)); + testScriptFailure("/}?/u;", 5, "Invalid regular expression"); + testScriptFailure("/{*/u;", 5, "Invalid regular expression"); testScript("/{}/;", new LiteralRegExpExpression("{}", false, false, false, false, false)); testScript("/.{.}/;", new LiteralRegExpExpression(".{.}", false, false, false, false, false)); testScript("/[\\w-\\s]/;", new LiteralRegExpExpression("[\\w-\\s]", false, false, false, false, false)); testScript("/[\\s-\\w]/;", new LiteralRegExpExpression("[\\s-\\w]", false, false, false, false, false)); testScript("/(?=.)*/;", new LiteralRegExpExpression("(?=.)*", false, false, false, false, false)); testScript("/(?!.){0,}?/;", new LiteralRegExpExpression("(?!.){0,}?", false, false, false, false, false)); - testScript("/(?!.){0,}?/u", new LiteralRegExpExpression("(?!.){0,}?", false, false, false, false, true)); + testScriptFailure("/(?!.){0,}?/u", 13, "Invalid regular expression"); + + assertTrue(PatternAcceptor.acceptRegex("]", false, false, false, false, false)); + + + for (String regex : expectedToPass) { + testScript(regex); + } + + for (String regex : expectedToFail) { + testScriptFailure(regex, "Invalid regular expression"); + } } } diff --git a/src/test/java/com/shapesecurity/shift/es2016/reducer/CloneReducerTest.java b/src/test/java/com/shapesecurity/shift/es2016/reducer/CloneReducerTest.java index 1e2fcf94..559e2c9d 100644 --- a/src/test/java/com/shapesecurity/shift/es2016/reducer/CloneReducerTest.java +++ b/src/test/java/com/shapesecurity/shift/es2016/reducer/CloneReducerTest.java @@ -114,7 +114,9 @@ public void testNumericExpression() throws JsError{ public void testLiteralRegExpExpression() throws JsError{ cloneTestScript("/a/"); cloneTestScript("/\\0/"); - cloneTestScript("/\\1/u"); + cloneTestScript("/\\0/u"); + cloneTestScript("/\\1/"); + cloneTestScript("/\\1()/u"); cloneTestScript("/a/;"); cloneTestScript("/a/i"); cloneTestScript("/a/i;"); @@ -139,15 +141,12 @@ public void testLiteralRegExpExpression() throws JsError{ cloneTestScript("/0/g.test"); cloneTestScript("/{/;"); cloneTestScript("/}/;"); - cloneTestScript("/}?/u;"); - cloneTestScript("/{*/u;"); cloneTestScript("/{}/;"); cloneTestScript("/.{.}/;"); cloneTestScript("/[\\w-\\s]/;"); cloneTestScript("/[\\s-\\w]/;"); cloneTestScript("/(?=.)*/;"); cloneTestScript("/(?!.){0,}?/;"); - cloneTestScript("/(?!.){0,}?/u"); } @Test