Skip to content

Commit

Permalink
Merge pull request #123 from nicolo-ribaudo/v-implies-u
Browse files Browse the repository at this point in the history
Acceppt the different `u` features in `v` mode
  • Loading branch information
jviereck authored Dec 23, 2021
2 parents 9ce48a9 + b3456d7 commit 75e5d31
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 15 deletions.
31 changes: 16 additions & 15 deletions parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@
function createCharacter(matches) {
var _char = matches[0];
var first = _char.charCodeAt(0);
if (hasUnicodeFlag) {
if (isUnicodeMode) {
var second;
if (_char.length === 1 && first >= 0xD800 && first <= 0xDBFF) {
second = lookahead().charCodeAt(0);
Expand Down Expand Up @@ -573,7 +573,7 @@

// If no unicode flag, then try to parse ExtendedAtom -> ExtendedPatternCharacter.
// ExtendedPatternCharacter
if (!hasUnicodeFlag && (res = matchReg(/^{/))) {
if (!isUnicodeMode && (res = matchReg(/^{/))) {
atom = createCharacter(res);
} else {
bail('Expected atom');
Expand Down Expand Up @@ -729,7 +729,7 @@
// PatternCharacter
return createCharacter(res);
}
else if (!hasUnicodeFlag && (res = matchReg(/^(?:]|})/))) {
else if (!isUnicodeMode && (res = matchReg(/^(?:]|})/))) {
// ExtendedPatternCharacter, first part. See parseTerm.
return createCharacter(res);
}
Expand All @@ -741,7 +741,7 @@
// \ AtomEscape
res = parseAtomEscape();
if (!res) {
if (!hasUnicodeFlag && lookahead() == 'c') {
if (!isUnicodeMode && lookahead() == 'c') {
// B.1.4 ExtendedAtom
// \[lookahead = c]
return createValue('symbol', 92, pos - 1, pos);
Expand Down Expand Up @@ -771,7 +771,7 @@
}

function parseUnicodeSurrogatePairEscape(firstEscape) {
if (hasUnicodeFlag) {
if (isUnicodeMode) {
var first, second;
if (firstEscape.kind == 'unicodeEscape' &&
(first = firstEscape.codePoint) >= 0xD800 && first <= 0xDBFF &&
Expand Down Expand Up @@ -824,17 +824,17 @@
return createEscaped('singleEscape', 0x0008, '\\b');
} else if (match('B')) {
bail('\\B not possible inside of CharacterClass', '', from);
} else if (!hasUnicodeFlag && (res = matchReg(/^c([0-9])/))) {
} else if (!isUnicodeMode && (res = matchReg(/^c([0-9])/))) {
// B.1.4
// c ClassControlLetter, ClassControlLetter = DecimalDigit
return createEscaped('controlLetter', res[1] + 16, res[1], 2);
} else if (!hasUnicodeFlag && (res = matchReg(/^c_/))) {
} else if (!isUnicodeMode && (res = matchReg(/^c_/))) {
// B.1.4
// c ClassControlLetter, ClassControlLetter = _
return createEscaped('controlLetter', 31, '_', 2);
}
// [+U] -
if (hasUnicodeFlag && match('-')) {
if (isUnicodeMode && match('-')) {
return createEscaped('singleEscape', 0x002d, '\\-');
}
}
Expand Down Expand Up @@ -918,7 +918,7 @@
}

function bailOctalEscapeIfUnicode(from, pos) {
if (hasUnicodeFlag || hasUnicodeSetFlag) {
if (isUnicodeMode) {
bail("Invalid decimal escape in unicode mode", null, from, pos);
}
}
Expand All @@ -928,7 +928,7 @@
var res;
if (res = matchReg(/^[dDsSwW]/)) {
return createCharacterClassEscape(res[0]);
} else if (features.unicodePropertyEscape && (hasUnicodeFlag || hasUnicodeSetFlag) && (res = matchReg(/^([pP])\{([^\}]+)\}/))) {
} else if (features.unicodePropertyEscape && isUnicodeMode && (res = matchReg(/^([pP])\{([^\}]+)\}/))) {
// https://github.com/jviereck/regjsparser/issues/77
return addRaw({
type: 'unicodePropertyEscape',
Expand Down Expand Up @@ -958,7 +958,7 @@
return parseUnicodeSurrogatePairEscape(
createEscaped('unicodeEscape', parseInt(res[1], 16), res[1], 2)
);
} else if (hasUnicodeFlag && (res = matchReg(/^u\{([0-9a-fA-F]+)\}/))) {
} else if (isUnicodeMode && (res = matchReg(/^u\{([0-9a-fA-F]+)\}/))) {
// RegExpUnicodeEscapeSequence (ES6 Unicode code point escape)
return createEscaped('unicodeCodePointEscape', parseInt(res[1], 16), res[1], 4);
}
Expand Down Expand Up @@ -1098,8 +1098,8 @@
var tmp;
var l = lookahead();
if (
(hasUnicodeFlag && /[\^\$\.\*\+\?\(\)\\\[\]\{\}\|\/]/.test(l)) ||
(!hasUnicodeFlag && l !== "c")
(isUnicodeMode && /[\^\$\.\*\+\?\(\)\\\[\]\{\}\|\/]/.test(l)) ||
(!isUnicodeMode && l !== "c")
) {
if (l === "k" && features.lookbehind) {
return null;
Expand Down Expand Up @@ -1172,7 +1172,7 @@

// Check if both the from and atomTo have codePoints.
if (!('codePoint' in atom) || !('codePoint' in atomTo)) {
if (!hasUnicodeFlag) {
if (!isUnicodeMode) {
// If not, don't create a range but treat them as
// `atom` `-` `atom` instead.
//
Expand Down Expand Up @@ -1365,7 +1365,7 @@
// NestedClass ::
// ...
// \ CharacterClassEscape[+U, +V]
if (res = parseCharacterClassEscape()) {
if (res = parseClassEscape()) {
start = res;
} else if (res = parseClassCharacterEscapedHelper()) {
return res;
Expand Down Expand Up @@ -1512,6 +1512,7 @@
var shouldReparse = false;
var hasUnicodeFlag = (flags || "").indexOf("u") !== -1;
var hasUnicodeSetFlag = (flags || "").indexOf("v") !== -1;
var isUnicodeMode = hasUnicodeFlag || hasUnicodeSetFlag;
var pos = 0;

if (hasUnicodeSetFlag && !features.unicodeSet) {
Expand Down
62 changes: 62 additions & 0 deletions test/test-data-unicode-set.json
Original file line number Diff line number Diff line change
Expand Up @@ -1499,5 +1499,67 @@
"name": "SyntaxError",
"message": "Invalid decimal escape in unicode mode at position 1\n \\1\n ^",
"input": "\\1"
},
"[\\u{14630}]": {
"type": "characterClass",
"kind": "union",
"body": [
{
"type": "value",
"kind": "unicodeCodePointEscape",
"codePoint": 83504,
"range": [
1,
10
],
"raw": "\\u{14630}"
}
],
"negative": false,
"range": [
0,
11
],
"raw": "[\\u{14630}]"
},
"[\\u{14630}-\\u{14633}]": {
"type": "characterClass",
"kind": "union",
"body": [
{
"type": "characterClassRange",
"min": {
"type": "value",
"kind": "unicodeCodePointEscape",
"codePoint": 83504,
"range": [
1,
10
],
"raw": "\\u{14630}"
},
"max": {
"type": "value",
"kind": "unicodeCodePointEscape",
"codePoint": 83507,
"range": [
11,
20
],
"raw": "\\u{14633}"
},
"range": [
1,
20
],
"raw": "\\u{14630}-\\u{14633}"
}
],
"negative": false,
"range": [
0,
21
],
"raw": "[\\u{14630}-\\u{14633}]"
}
}

0 comments on commit 75e5d31

Please sign in to comment.