From f5e12996ed70f39927e294f66c3162ec0678d5f3 Mon Sep 17 00:00:00 2001 From: Napalys Date: Thu, 21 Nov 2024 13:44:55 +0100 Subject: [PATCH 1/5] JS: Added ecma2021, thus extractor now can deal with RegExp v flag --- javascript/extractor/src/com/semmle/jcorn/Parser.java | 1 + .../src/com/semmle/js/extractor/ExtractorConfig.java | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/javascript/extractor/src/com/semmle/jcorn/Parser.java b/javascript/extractor/src/com/semmle/jcorn/Parser.java index 79a63e9a1217..e55567123f94 100644 --- a/javascript/extractor/src/com/semmle/jcorn/Parser.java +++ b/javascript/extractor/src/com/semmle/jcorn/Parser.java @@ -788,6 +788,7 @@ private Token readRegexp() { String validFlags = "gim"; if (this.options.ecmaVersion() >= 6) validFlags = "gimuy"; if (this.options.ecmaVersion() >= 9) validFlags = "gimsuy"; + if (this.options.ecmaVersion() >= 12) validFlags = "gimsuyv"; if (!mods.matches("^[" + validFlags + "]*$")) this.raise(start, "Invalid regular expression flag"); if (mods.indexOf('u') >= 0) { diff --git a/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java b/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java index cb04f3074bfe..95057467a791 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java +++ b/javascript/extractor/src/com/semmle/js/extractor/ExtractorConfig.java @@ -41,7 +41,8 @@ public static enum ECMAVersion { ECMA2017(2017, 8), ECMA2018(2018, 9), ECMA2019(2019, 10), - ECMA2020(2020, 11); + ECMA2020(2020, 11), + ECMA2021(2021, 12); private final int version; public final int legacyVersion; @@ -232,7 +233,7 @@ public Set getPredefinedGlobals() { private VirtualSourceRoot virtualSourceRoot; public ExtractorConfig(boolean experimental) { - this.ecmaVersion = experimental ? ECMAVersion.ECMA2020 : ECMAVersion.ECMA2019; + this.ecmaVersion = experimental ? ECMAVersion.ECMA2021 : ECMAVersion.ECMA2019; this.platform = Platform.AUTO; this.jsx = true; this.sourceType = SourceType.AUTO; From 2bcb7cfde406941d0bc98ccca844da55fbd644ca Mon Sep 17 00:00:00 2001 From: Napalys Date: Thu, 21 Nov 2024 13:48:56 +0100 Subject: [PATCH 2/5] JS: Added predicate to check if v flag is used on regular expression --- javascript/ql/lib/semmle/javascript/Expr.qll | 3 +++ javascript/ql/lib/semmle/javascript/Regexp.qll | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/javascript/ql/lib/semmle/javascript/Expr.qll b/javascript/ql/lib/semmle/javascript/Expr.qll index 0049c5f5aca7..e8ec55f0174d 100644 --- a/javascript/ql/lib/semmle/javascript/Expr.qll +++ b/javascript/ql/lib/semmle/javascript/Expr.qll @@ -481,6 +481,9 @@ class RegExpLiteral extends @regexp_literal, Literal, RegExpParent { /** Holds if this regular expression has an `s` flag. */ predicate isDotAll() { RegExp::isDotAll(this.getFlags()) } + /** Holds if this regular expression has an `v` flag. */ + predicate isUnicodeSets() { RegExp::isUnicodeSets(this.getFlags()) } + override string getAPrimaryQlClass() { result = "RegExpLiteral" } } diff --git a/javascript/ql/lib/semmle/javascript/Regexp.qll b/javascript/ql/lib/semmle/javascript/Regexp.qll index dc7b0190c916..acfc888756e8 100644 --- a/javascript/ql/lib/semmle/javascript/Regexp.qll +++ b/javascript/ql/lib/semmle/javascript/Regexp.qll @@ -1162,6 +1162,10 @@ module RegExp { bindingset[flags] predicate isDotAll(string flags) { flags.matches("%s%") } + /** Holds if `flags` includes the `v` flag. */ + bindingset[flags] + predicate isUnicodeSets(string flags) { flags.matches("%v%") } + /** Holds if `flags` includes the `m` flag or is the unknown flag `?`. */ bindingset[flags] predicate maybeMultiline(string flags) { flags = unknownFlag() or isMultiline(flags) } From 9bd3dc74e6e5d7cc6b56770dbabe98e304fa796c Mon Sep 17 00:00:00 2001 From: Napalys Date: Fri, 22 Nov 2024 14:37:17 +0100 Subject: [PATCH 3/5] JS: MissingUnicodeFlag now uses flags from RegExpLiteral, upgraded to accept v flag as well. --- javascript/ql/lib/semmle/javascript/Expr.qll | 3 +++ javascript/ql/lib/semmle/javascript/Regexp.qll | 8 ++++++++ .../RegExp/MissingUnicodeFlag/MissingUnicodeFlag.ql | 2 +- .../test/library-tests/RegExp/MissingUnicodeFlag/tst.js | 1 + 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/javascript/ql/lib/semmle/javascript/Expr.qll b/javascript/ql/lib/semmle/javascript/Expr.qll index e8ec55f0174d..e7028fdaac32 100644 --- a/javascript/ql/lib/semmle/javascript/Expr.qll +++ b/javascript/ql/lib/semmle/javascript/Expr.qll @@ -481,6 +481,9 @@ class RegExpLiteral extends @regexp_literal, Literal, RegExpParent { /** Holds if this regular expression has an `s` flag. */ predicate isDotAll() { RegExp::isDotAll(this.getFlags()) } + /** Holds if this regular expression has an `u` flag. */ + predicate isUnicode() { RegExp::isUnicode(this.getFlags()) } + /** Holds if this regular expression has an `v` flag. */ predicate isUnicodeSets() { RegExp::isUnicodeSets(this.getFlags()) } diff --git a/javascript/ql/lib/semmle/javascript/Regexp.qll b/javascript/ql/lib/semmle/javascript/Regexp.qll index acfc888756e8..81c5a7e6bdf6 100644 --- a/javascript/ql/lib/semmle/javascript/Regexp.qll +++ b/javascript/ql/lib/semmle/javascript/Regexp.qll @@ -1162,6 +1162,10 @@ module RegExp { bindingset[flags] predicate isDotAll(string flags) { flags.matches("%s%") } + /** Holds if `flags` includes the `u` flag. */ + bindingset[flags] + predicate isUnicode(string flags) { flags.matches("%u%") } + /** Holds if `flags` includes the `v` flag. */ bindingset[flags] predicate isUnicodeSets(string flags) { flags.matches("%v%") } @@ -1182,6 +1186,10 @@ module RegExp { bindingset[flags] predicate maybeDotAll(string flags) { flags = unknownFlag() or isDotAll(flags) } + /** Holds if `flags` includes the `s` flag or is the unknown flag `?`. */ + bindingset[flags] + predicate maybeUnicodeSets(string flags) { flags = unknownFlag() or isUnicodeSets(flags) } + /** Holds if `term` and all of its disjuncts are anchored on both ends. */ predicate isFullyAnchoredTerm(RegExpTerm term) { exists(RegExpSequence seq | term = seq | diff --git a/javascript/ql/test/library-tests/RegExp/MissingUnicodeFlag/MissingUnicodeFlag.ql b/javascript/ql/test/library-tests/RegExp/MissingUnicodeFlag/MissingUnicodeFlag.ql index e8d47d789f3d..53e98b662fb1 100644 --- a/javascript/ql/test/library-tests/RegExp/MissingUnicodeFlag/MissingUnicodeFlag.ql +++ b/javascript/ql/test/library-tests/RegExp/MissingUnicodeFlag/MissingUnicodeFlag.ql @@ -3,7 +3,7 @@ import javascript from RegExpLiteral literal, RegExpConstant wideConstant where wideConstant.getLiteral() = literal and - not literal.getFlags().matches("%u%") and + not (literal.isUnicode() or literal.isUnicodeSets()) and wideConstant.getValue().length() > 1 and ( wideConstant.getParent() instanceof RegExpCharacterClass diff --git a/javascript/ql/test/library-tests/RegExp/MissingUnicodeFlag/tst.js b/javascript/ql/test/library-tests/RegExp/MissingUnicodeFlag/tst.js index 0de632174a14..10dd0ed9f8a7 100644 --- a/javascript/ql/test/library-tests/RegExp/MissingUnicodeFlag/tst.js +++ b/javascript/ql/test/library-tests/RegExp/MissingUnicodeFlag/tst.js @@ -2,4 +2,5 @@ /[𒍀-𒍅]/u; // OK /𒍀+/; // NOT OK /𒍀+/u; // OK +/𒍀+/v; // OK /(𒍀)+/; // OK From 71eda9d0e407aad1208f2669566d1d03d0461630 Mon Sep 17 00:00:00 2001 From: Napalys Date: Thu, 28 Nov 2024 12:41:24 +0100 Subject: [PATCH 4/5] JS: Added small test case printing ast for RegExp with intersection op --- .../RegExp/Intersection/intersection.js | 2 ++ .../RegExp/Intersection/printAst.expected | 35 +++++++++++++++++++ .../RegExp/Intersection/printAst.ql | 2 ++ 3 files changed, 39 insertions(+) create mode 100644 javascript/ql/test/library-tests/RegExp/Intersection/intersection.js create mode 100644 javascript/ql/test/library-tests/RegExp/Intersection/printAst.expected create mode 100644 javascript/ql/test/library-tests/RegExp/Intersection/printAst.ql diff --git a/javascript/ql/test/library-tests/RegExp/Intersection/intersection.js b/javascript/ql/test/library-tests/RegExp/Intersection/intersection.js new file mode 100644 index 000000000000..97e3223f12de --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/Intersection/intersection.js @@ -0,0 +1,2 @@ +/\p{Script_Extensions=Greek}|\p{Letter}/v; +/\p{Script_Extensions=Greek}&&\p{Letter}/v; diff --git a/javascript/ql/test/library-tests/RegExp/Intersection/printAst.expected b/javascript/ql/test/library-tests/RegExp/Intersection/printAst.expected new file mode 100644 index 000000000000..6ecd40092142 --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/Intersection/printAst.expected @@ -0,0 +1,35 @@ +nodes +| intersection.js:1:1:1:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | semmle.label | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | +| intersection.js:1:1:1:42 | [ExprStmt] /\\p{Scr ... ter}/v; | semmle.label | [ExprStmt] /\\p{Scr ... ter}/v; | +| intersection.js:1:1:1:42 | [ExprStmt] /\\p{Scr ... ter}/v; | semmle.order | 1 | +| intersection.js:1:2:1:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | +| intersection.js:1:2:1:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | semmle.label | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | +| intersection.js:1:30:1:39 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Letter} | +| intersection.js:2:1:2:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | semmle.label | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | +| intersection.js:2:1:2:43 | [ExprStmt] /\\p{Scr ... ter}/v; | semmle.label | [ExprStmt] /\\p{Scr ... ter}/v; | +| intersection.js:2:1:2:43 | [ExprStmt] /\\p{Scr ... ter}/v; | semmle.order | 2 | +| intersection.js:2:2:2:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | +| intersection.js:2:2:2:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | semmle.label | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | +| intersection.js:2:29:2:30 | [RegExpNormalConstant] && | semmle.label | [RegExpNormalConstant] && | +| intersection.js:2:31:2:40 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Letter} | +edges +| intersection.js:1:1:1:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | intersection.js:1:2:1:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | semmle.label | 0 | +| intersection.js:1:1:1:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | intersection.js:1:2:1:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | semmle.order | 0 | +| intersection.js:1:1:1:42 | [ExprStmt] /\\p{Scr ... ter}/v; | intersection.js:1:1:1:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | semmle.label | 1 | +| intersection.js:1:1:1:42 | [ExprStmt] /\\p{Scr ... ter}/v; | intersection.js:1:1:1:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | semmle.order | 1 | +| intersection.js:1:2:1:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | intersection.js:1:2:1:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | 0 | +| intersection.js:1:2:1:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | intersection.js:1:2:1:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.order | 0 | +| intersection.js:1:2:1:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | intersection.js:1:30:1:39 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | 1 | +| intersection.js:1:2:1:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | intersection.js:1:30:1:39 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.order | 1 | +| intersection.js:2:1:2:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | intersection.js:2:2:2:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | semmle.label | 0 | +| intersection.js:2:1:2:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | intersection.js:2:2:2:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | semmle.order | 0 | +| intersection.js:2:1:2:43 | [ExprStmt] /\\p{Scr ... ter}/v; | intersection.js:2:1:2:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | semmle.label | 1 | +| intersection.js:2:1:2:43 | [ExprStmt] /\\p{Scr ... ter}/v; | intersection.js:2:1:2:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | semmle.order | 1 | +| intersection.js:2:2:2:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:2:2:2:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | 0 | +| intersection.js:2:2:2:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:2:2:2:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.order | 0 | +| intersection.js:2:2:2:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:2:29:2:30 | [RegExpNormalConstant] && | semmle.label | 1 | +| intersection.js:2:2:2:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:2:29:2:30 | [RegExpNormalConstant] && | semmle.order | 1 | +| intersection.js:2:2:2:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:2:31:2:40 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | 2 | +| intersection.js:2:2:2:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:2:31:2:40 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.order | 2 | +graphProperties +| semmle.graphKind | tree | diff --git a/javascript/ql/test/library-tests/RegExp/Intersection/printAst.ql b/javascript/ql/test/library-tests/RegExp/Intersection/printAst.ql new file mode 100644 index 000000000000..248ea7ad396b --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/Intersection/printAst.ql @@ -0,0 +1,2 @@ +import javascript +import semmle.javascript.PrintAst From 2c8d44c9c216d1f8485dc13b594d71ec8e186206 Mon Sep 17 00:00:00 2001 From: Napalys Date: Wed, 4 Dec 2024 08:48:56 +0100 Subject: [PATCH 5/5] JS: Added intersection suppoort - more of stash commit --- .../semmle/js/ast/regexp/Intersection.java | 23 ++++++ .../src/com/semmle/js/ast/regexp/Visitor.java | 2 + .../semmle/js/extractor/RegExpExtractor.java | 38 +++++---- .../com/semmle/js/parser/RegExpParser.java | 22 +++++- .../tests/es2021/input/regexpIntersection.js | 1 + .../output/trap/regexpIntersection.js.trap | 78 +++++++++++++++++++ .../javascript/MembershipCandidates.qll | 1 + .../ql/lib/semmle/javascript/Regexp.qll | 23 ++++++ .../ql/lib/semmlecode.javascript.dbscheme | 3 +- .../RegExpIntersection/PrintAst.expected | 35 +++++++++ .../RegExp/RegExpIntersection/PrintAst.ql | 1 + .../RegExp/RegExpIntersection/intersection.js | 2 + 12 files changed, 210 insertions(+), 19 deletions(-) create mode 100644 javascript/extractor/src/com/semmle/js/ast/regexp/Intersection.java create mode 100644 javascript/extractor/tests/es2021/input/regexpIntersection.js create mode 100644 javascript/extractor/tests/es2021/output/trap/regexpIntersection.js.trap create mode 100644 javascript/ql/test/library-tests/RegExp/RegExpIntersection/PrintAst.expected create mode 100644 javascript/ql/test/library-tests/RegExp/RegExpIntersection/PrintAst.ql create mode 100644 javascript/ql/test/library-tests/RegExp/RegExpIntersection/intersection.js diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/Intersection.java b/javascript/extractor/src/com/semmle/js/ast/regexp/Intersection.java new file mode 100644 index 000000000000..657242d1905d --- /dev/null +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/Intersection.java @@ -0,0 +1,23 @@ +package com.semmle.js.ast.regexp; + +import com.semmle.js.ast.SourceLocation; +import java.util.List; + +public class Intersection extends RegExpTerm { + private final List intersections; + + public Intersection(SourceLocation loc, List intersections) { + super(loc, "Intersection"); + this.intersections = intersections; + } + + @Override + public void accept(Visitor v) { + v.visit(this); + } + + /** The individual elements of the intersections. */ + public List getIntersections() { + return intersections; + } +} diff --git a/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java b/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java index 3671a55694be..95a0000fe7f9 100644 --- a/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java +++ b/javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java @@ -61,4 +61,6 @@ public interface Visitor { public void visit(ZeroWidthNegativeLookbehind nd); public void visit(UnicodePropertyEscape nd); + + public void visit(Intersection nd); } diff --git a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java index 41d7d446cfe3..1287894a5a57 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java @@ -22,6 +22,7 @@ import com.semmle.js.ast.regexp.Group; import com.semmle.js.ast.regexp.HexEscapeSequence; import com.semmle.js.ast.regexp.IdentityEscape; +import com.semmle.js.ast.regexp.Intersection; import com.semmle.js.ast.regexp.Literal; import com.semmle.js.ast.regexp.NamedBackReference; import com.semmle.js.ast.regexp.NonWordBoundary; @@ -92,24 +93,25 @@ public RegExpExtractor(TrapWriter trapwriter, LocationManager locationManager) { termkinds.put("ZeroWidthPositiveLookbehind", 25); termkinds.put("ZeroWidthNegativeLookbehind", 26); termkinds.put("UnicodePropertyEscape", 27); + termkinds.put("Intersection", 28); } private static final String[] errmsgs = new String[] { - "unexpected end of regular expression", - "unexpected character", - "expected digit", - "expected hexadecimal digit", - "expected control letter", - "expected ')'", - "expected '}'", - "trailing characters", - "octal escape sequence", - "invalid back reference", - "expected ']'", - "expected identifier", - "expected '>'" - }; + "unexpected end of regular expression", + "unexpected character", + "expected digit", + "expected hexadecimal digit", + "expected control letter", + "expected ')'", + "expected '}'", + "trailing characters", + "octal escape sequence", + "invalid back reference", + "expected ']'", + "expected identifier", + "expected '>'" + }; private Label extractTerm(RegExpTerm term, Label parent, int idx) { Label lbl = trapwriter.localID(term); @@ -344,6 +346,14 @@ public void visit(CharacterClassRange nd) { visit(nd.getLeft(), lbl, 0); visit(nd.getRight(), lbl, 1); } + + @Override + public void visit(Intersection nd) { + Label lbl = extractTerm(nd, parent, idx); + int i = 0; + for (RegExpTerm element : nd.getIntersections()) + visit(element, lbl, i++); + } } public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing) { diff --git a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java index 5b2177f3fffa..164e64321a0b 100644 --- a/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java +++ b/javascript/extractor/src/com/semmle/js/parser/RegExpParser.java @@ -18,6 +18,7 @@ import com.semmle.js.ast.regexp.Group; import com.semmle.js.ast.regexp.HexEscapeSequence; import com.semmle.js.ast.regexp.IdentityEscape; +import com.semmle.js.ast.regexp.Intersection; import com.semmle.js.ast.regexp.NamedBackReference; import com.semmle.js.ast.regexp.NonWordBoundary; import com.semmle.js.ast.regexp.OctalEscape; @@ -225,20 +226,33 @@ private T finishTerm(T term) { private RegExpTerm parseDisjunction() { SourceLocation loc = new SourceLocation(pos()); List disjuncts = new ArrayList<>(); - disjuncts.add(this.parseAlternative()); - while (this.match("|")) disjuncts.add(this.parseAlternative()); + disjuncts.add(this.parseIntersection()); + while (this.match("|")) { + disjuncts.add(this.parseIntersection()); + } if (disjuncts.size() == 1) return disjuncts.get(0); return this.finishTerm(new Disjunction(loc, disjuncts)); - } +} private RegExpTerm parseAlternative() { SourceLocation loc = new SourceLocation(pos()); List elements = new ArrayList<>(); - while (!this.lookahead(null, "|", ")")) elements.add(this.parseTerm()); + while (!this.lookahead(null, "|", "&&", ")")) elements.add(this.parseTerm()); if (elements.size() == 1) return elements.get(0); return this.finishTerm(new Sequence(loc, elements)); } + private RegExpTerm parseIntersection() { + SourceLocation loc = new SourceLocation(pos()); + List intersections = new ArrayList<>(); + intersections.add(this.parseAlternative()); + while (this.match("&&")) { + intersections.add(this.parseAlternative()); + } + if (intersections.size() == 1) return intersections.get(0); + return this.finishTerm(new Intersection(loc, intersections)); +} + private RegExpTerm parseTerm() { SourceLocation loc = new SourceLocation(pos()); diff --git a/javascript/extractor/tests/es2021/input/regexpIntersection.js b/javascript/extractor/tests/es2021/input/regexpIntersection.js new file mode 100644 index 000000000000..78370b0505cc --- /dev/null +++ b/javascript/extractor/tests/es2021/input/regexpIntersection.js @@ -0,0 +1 @@ +/\p{Script_Extensions=Greek}&&\p{Letter}/v; diff --git a/javascript/extractor/tests/es2021/output/trap/regexpIntersection.js.trap b/javascript/extractor/tests/es2021/output/trap/regexpIntersection.js.trap new file mode 100644 index 000000000000..ee4c0618c20e --- /dev/null +++ b/javascript/extractor/tests/es2021/output/trap/regexpIntersection.js.trap @@ -0,0 +1,78 @@ +#10000=@"/regexpIntersection.js;sourcefile" +files(#10000,"/regexpIntersection.js") +#10001=@"/;folder" +folders(#10001,"/") +containerparent(#10001,#10000) +#10002=@"loc,{#10000},0,0,0,0" +locations_default(#10002,#10000,0,0,0,0) +hasLocation(#10000,#10002) +#20000=@"global_scope" +scopes(#20000,0) +#20001=@"script;{#10000},1,1" +#20002=* +lines(#20002,#20001,"/\p{Script_Extensions=Greek}&&\p{Letter}/v;"," +") +#20003=@"loc,{#10000},1,1,1,43" +locations_default(#20003,#10000,1,1,1,43) +hasLocation(#20002,#20003) +numlines(#20001,1,1,0) +#20004=* +tokeninfo(#20004,5,#20001,0,"/\p{Script_Extensions=Greek}&&\p{Letter}/v") +#20005=@"loc,{#10000},1,1,1,42" +locations_default(#20005,#10000,1,1,1,42) +hasLocation(#20004,#20005) +#20006=* +tokeninfo(#20006,8,#20001,1,";") +#20007=@"loc,{#10000},1,43,1,43" +locations_default(#20007,#10000,1,43,1,43) +hasLocation(#20006,#20007) +#20008=* +tokeninfo(#20008,0,#20001,2,"") +#20009=@"loc,{#10000},2,1,2,0" +locations_default(#20009,#10000,2,1,2,0) +hasLocation(#20008,#20009) +toplevels(#20001,0) +#20010=@"loc,{#10000},1,1,2,0" +locations_default(#20010,#10000,1,1,2,0) +hasLocation(#20001,#20010) +#20011=* +stmts(#20011,2,#20001,0,"/\p{Scr ... ter}/v;") +hasLocation(#20011,#20003) +stmt_containers(#20011,#20001) +#20012=* +exprs(#20012,5,#20011,0,"/\p{Scr ... tter}/v") +hasLocation(#20012,#20005) +enclosing_stmt(#20012,#20011) +expr_containers(#20012,#20001) +literals("/\p{Script_Extensions=Greek}&&\p{Letter}/v","/\p{Script_Extensions=Greek}&&\p{Letter}/v",#20012) +#20013=* +regexpterm(#20013,28,#20012,0,"\p{Script_Extensions=Greek}&&\p{Letter}") +#20014=@"loc,{#10000},1,2,1,40" +locations_default(#20014,#10000,1,2,1,40) +hasLocation(#20013,#20014) +#20015=* +regexpterm(#20015,27,#20013,0,"\p{Script_Extensions=Greek}") +#20016=@"loc,{#10000},1,2,1,28" +locations_default(#20016,#10000,1,2,1,28) +hasLocation(#20015,#20016) +unicode_property_escapename(#20015,"Script_Extensions") +unicode_property_escapevalue(#20015,"Greek") +#20017=* +regexpterm(#20017,27,#20013,1,"\p{Letter}") +#20018=@"loc,{#10000},1,31,1,40" +locations_default(#20018,#10000,1,31,1,40) +hasLocation(#20017,#20018) +unicode_property_escapename(#20017,"Letter") +#20019=* +entry_cfg_node(#20019,#20001) +#20020=@"loc,{#10000},1,1,1,0" +locations_default(#20020,#10000,1,1,1,0) +hasLocation(#20019,#20020) +#20021=* +exit_cfg_node(#20021,#20001) +hasLocation(#20021,#20009) +successor(#20011,#20012) +successor(#20012,#20021) +successor(#20019,#20011) +numlines(#10000,1,1,0) +filetype(#10000,"javascript") diff --git a/javascript/ql/lib/semmle/javascript/MembershipCandidates.qll b/javascript/ql/lib/semmle/javascript/MembershipCandidates.qll index da9e90744ef0..45680c0d49f1 100644 --- a/javascript/ql/lib/semmle/javascript/MembershipCandidates.qll +++ b/javascript/ql/lib/semmle/javascript/MembershipCandidates.qll @@ -146,6 +146,7 @@ module MembershipCandidate { child instanceof RegExpDollar or child instanceof RegExpConstant or child instanceof RegExpAlt or + child instanceof RegExpIntersection or child instanceof RegExpGroup ) and // exclude "length matches" that match every string diff --git a/javascript/ql/lib/semmle/javascript/Regexp.qll b/javascript/ql/lib/semmle/javascript/Regexp.qll index 81c5a7e6bdf6..36a993de1e8c 100644 --- a/javascript/ql/lib/semmle/javascript/Regexp.qll +++ b/javascript/ql/lib/semmle/javascript/Regexp.qll @@ -301,6 +301,29 @@ class RegExpAlt extends RegExpTerm, @regexp_alt { override string getAPrimaryQlClass() { result = "RegExpAlt" } } +/** + * An intersection term, that is, a term of the form `a&&b`. + * + * Example: + * + * ``` + * /\p{Script_Extensions=Greek}&&\p{Letter}/v + * ``` + */ +class RegExpIntersection extends RegExpTerm, @regexp_intersection { + /** Gets an intersected term of this term. */ + RegExpTerm getIntersectedTerm() { result = this.getAChild() } + + /** Gets the number of intersected terms of this term. */ + int getNumIntersectedTerm() { result = this.getNumChild() } + + override predicate isNullable() { this.getIntersectedTerm().isNullable() } + + override string getAMatchedString() { result = this.getIntersectedTerm().getAMatchedString() } + + override string getAPrimaryQlClass() { result = "RegExpIntersection" } +} + /** * A sequence term. * diff --git a/javascript/ql/lib/semmlecode.javascript.dbscheme b/javascript/ql/lib/semmlecode.javascript.dbscheme index c88c69174bd0..eda7d3dc42ba 100644 --- a/javascript/ql/lib/semmlecode.javascript.dbscheme +++ b/javascript/ql/lib/semmlecode.javascript.dbscheme @@ -859,7 +859,8 @@ case @regexpterm.kind of | 24 = @regexp_char_range | 25 = @regexp_positive_lookbehind | 26 = @regexp_negative_lookbehind -| 27 = @regexp_unicode_property_escape; +| 27 = @regexp_unicode_property_escape +| 28 = @regexp_intersection; regexp_parse_errors (unique int id: @regexp_parse_error, int regexp: @regexpterm ref, diff --git a/javascript/ql/test/library-tests/RegExp/RegExpIntersection/PrintAst.expected b/javascript/ql/test/library-tests/RegExp/RegExpIntersection/PrintAst.expected new file mode 100644 index 000000000000..e58b2bf95e92 --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/RegExpIntersection/PrintAst.expected @@ -0,0 +1,35 @@ +nodes +| intersection.js:1:1:1:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | semmle.label | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | +| intersection.js:1:1:1:43 | [ExprStmt] /\\p{Scr ... ter}/v; | semmle.label | [ExprStmt] /\\p{Scr ... ter}/v; | +| intersection.js:1:1:1:43 | [ExprStmt] /\\p{Scr ... ter}/v; | semmle.order | 1 | +| intersection.js:1:2:1:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | +| intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | semmle.label | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | +| intersection.js:1:29:1:30 | [RegExpNormalConstant] && | semmle.label | [RegExpNormalConstant] && | +| intersection.js:1:31:1:40 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Letter} | +| intersection.js:2:1:2:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | semmle.label | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | +| intersection.js:2:1:2:42 | [ExprStmt] /\\p{Scr ... ter}/v; | semmle.label | [ExprStmt] /\\p{Scr ... ter}/v; | +| intersection.js:2:1:2:42 | [ExprStmt] /\\p{Scr ... ter}/v; | semmle.order | 2 | +| intersection.js:2:2:2:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | +| intersection.js:2:2:2:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | semmle.label | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | +| intersection.js:2:30:2:39 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | [RegExpUnicodePropertyEscape] \\p{Letter} | +edges +| intersection.js:1:1:1:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | semmle.label | 0 | +| intersection.js:1:1:1:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | semmle.order | 0 | +| intersection.js:1:1:1:43 | [ExprStmt] /\\p{Scr ... ter}/v; | intersection.js:1:1:1:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | semmle.label | 1 | +| intersection.js:1:1:1:43 | [ExprStmt] /\\p{Scr ... ter}/v; | intersection.js:1:1:1:42 | [RegExpLiteral] /\\p{Script_Extensions=Greek}&&\\p{Letter}/v | semmle.order | 1 | +| intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:1:2:1:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | 0 | +| intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:1:2:1:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.order | 0 | +| intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:1:29:1:30 | [RegExpNormalConstant] && | semmle.label | 1 | +| intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:1:29:1:30 | [RegExpNormalConstant] && | semmle.order | 1 | +| intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:1:31:1:40 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | 2 | +| intersection.js:1:2:1:40 | [RegExpSequence] \\p{Script_Extensions=Greek}&&\\p{Letter} | intersection.js:1:31:1:40 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.order | 2 | +| intersection.js:2:1:2:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | intersection.js:2:2:2:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | semmle.label | 0 | +| intersection.js:2:1:2:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | intersection.js:2:2:2:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | semmle.order | 0 | +| intersection.js:2:1:2:42 | [ExprStmt] /\\p{Scr ... ter}/v; | intersection.js:2:1:2:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | semmle.label | 1 | +| intersection.js:2:1:2:42 | [ExprStmt] /\\p{Scr ... ter}/v; | intersection.js:2:1:2:41 | [RegExpLiteral] /\\p{Script_Extensions=Greek}\|\\p{Letter}/v | semmle.order | 1 | +| intersection.js:2:2:2:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | intersection.js:2:2:2:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.label | 0 | +| intersection.js:2:2:2:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | intersection.js:2:2:2:28 | [RegExpUnicodePropertyEscape] \\p{Script_Extensions=Greek} | semmle.order | 0 | +| intersection.js:2:2:2:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | intersection.js:2:30:2:39 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.label | 1 | +| intersection.js:2:2:2:39 | [RegExpAlt] \\p{Script_Extensions=Greek}\|\\p{Letter} | intersection.js:2:30:2:39 | [RegExpUnicodePropertyEscape] \\p{Letter} | semmle.order | 1 | +graphProperties +| semmle.graphKind | tree | diff --git a/javascript/ql/test/library-tests/RegExp/RegExpIntersection/PrintAst.ql b/javascript/ql/test/library-tests/RegExp/RegExpIntersection/PrintAst.ql new file mode 100644 index 000000000000..8ceaf83964a3 --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/RegExpIntersection/PrintAst.ql @@ -0,0 +1 @@ +import semmle.javascript.PrintAst diff --git a/javascript/ql/test/library-tests/RegExp/RegExpIntersection/intersection.js b/javascript/ql/test/library-tests/RegExp/RegExpIntersection/intersection.js new file mode 100644 index 000000000000..298da6d1e59e --- /dev/null +++ b/javascript/ql/test/library-tests/RegExp/RegExpIntersection/intersection.js @@ -0,0 +1,2 @@ +/\p{Script_Extensions=Greek}&&\p{Letter}/v; +/\p{Script_Extensions=Greek}|\p{Letter}/v;