Harden regexp parsing a bit

This commit is contained in:
dcodeIO 2018-01-07 18:15:21 +01:00
parent 7c8670ac35
commit 0de05b543b
7 changed files with 54 additions and 18 deletions

View File

@ -240,11 +240,11 @@ export abstract class Node {
return expr; return expr;
} }
static createRegexpLiteralExpression(pattern: string, modifiers: string, range: Range): RegexpLiteralExpression { static createRegexpLiteralExpression(pattern: string, flags: string, range: Range): RegexpLiteralExpression {
var expr = new RegexpLiteralExpression(); var expr = new RegexpLiteralExpression();
expr.range = range; expr.range = range;
expr.pattern = pattern; expr.pattern = pattern;
expr.modifiers = modifiers; expr.patternFlags = flags;
return expr; return expr;
} }
@ -1048,14 +1048,14 @@ export class RegexpLiteralExpression extends LiteralExpression {
/** Regular expression pattern. */ /** Regular expression pattern. */
pattern: string; pattern: string;
/** Regular expression modifiers. */ /** Regular expression flags. */
modifiers: string; patternFlags: string;
serialize(sb: string[]): void { serialize(sb: string[]): void {
sb.push("/"); sb.push("/");
sb.push(this.pattern); sb.push(this.pattern);
sb.push("/"); sb.push("/");
sb.push(this.modifiers); sb.push(this.patternFlags);
} }
} }

View File

@ -10,6 +10,7 @@ export enum DiagnosticCode {
Type_0_cannot_be_changed_to_type_1 = 106, Type_0_cannot_be_changed_to_type_1 = 106,
Structs_cannot_extend_classes_and_vice_versa = 107, Structs_cannot_extend_classes_and_vice_versa = 107,
Structs_cannot_implement_interfaces = 108, Structs_cannot_implement_interfaces = 108,
Invalid_regular_expression_flags = 109,
Unterminated_string_literal = 1002, Unterminated_string_literal = 1002,
Identifier_expected = 1003, Identifier_expected = 1003,
_0_expected = 1005, _0_expected = 1005,
@ -95,6 +96,7 @@ export function diagnosticCodeToString(code: DiagnosticCode): string {
case 106: return "Type '{0}' cannot be changed to type '{1}'."; case 106: return "Type '{0}' cannot be changed to type '{1}'.";
case 107: return "Structs cannot extend classes and vice-versa."; case 107: return "Structs cannot extend classes and vice-versa.";
case 108: return "Structs cannot implement interfaces."; case 108: return "Structs cannot implement interfaces.";
case 109: return "Invalid regular expression flags.";
case 1002: return "Unterminated string literal."; case 1002: return "Unterminated string literal.";
case 1003: return "Identifier expected."; case 1003: return "Identifier expected.";
case 1005: return "'{0}' expected."; case 1005: return "'{0}' expected.";

View File

@ -8,6 +8,7 @@
"Type '{0}' cannot be changed to type '{1}'.": 106, "Type '{0}' cannot be changed to type '{1}'.": 106,
"Structs cannot extend classes and vice-versa.": 107, "Structs cannot extend classes and vice-versa.": 107,
"Structs cannot implement interfaces.": 108, "Structs cannot implement interfaces.": 108,
"Invalid regular expression flags.": 109,
"Unterminated string literal.": 1002, "Unterminated string literal.": 1002,
"Identifier expected.": 1003, "Identifier expected.": 1003,

View File

@ -1426,8 +1426,11 @@ export class Parser extends DiagnosticEmitter {
return Node.createNewExpression((<CallExpression>operand).expression, (<CallExpression>operand).typeArguments, (<CallExpression>operand).arguments, tn.range(startPos, tn.pos)); return Node.createNewExpression((<CallExpression>operand).expression, (<CallExpression>operand).typeArguments, (<CallExpression>operand).arguments, tn.range(startPos, tn.pos));
this.error(DiagnosticCode.Operation_not_supported, tn.range()); this.error(DiagnosticCode.Operation_not_supported, tn.range());
return null; return null;
} else } else {
operand = this.parseExpression(tn, p); operand = this.parseExpression(tn, p);
if (!operand)
return null;
}
// UnaryPrefixExpression // UnaryPrefixExpression
if (token == Token.PLUS_PLUS || token == Token.MINUS_MINUS) if (token == Token.PLUS_PLUS || token == Token.MINUS_MINUS)
@ -1513,15 +1516,14 @@ export class Parser extends DiagnosticEmitter {
return Node.createFloatLiteralExpression(tn.readFloat(), tn.range(startPos, tn.pos)); return Node.createFloatLiteralExpression(tn.readFloat(), tn.range(startPos, tn.pos));
// RegexpLiteralExpression // RegexpLiteralExpression
// note that this also continues on invalid ones so the surrounding AST remains intact
case Token.SLASH: case Token.SLASH:
var regexpPattern = tn.readRegexpPattern(); var regexpPattern = tn.readRegexpPattern(); // also reports
if (regexpPattern == null)
return null;
if (!tn.skip(Token.SLASH)) { if (!tn.skip(Token.SLASH)) {
this.error(DiagnosticCode._0_expected, tn.range(), "/"); this.error(DiagnosticCode._0_expected, tn.range(), "/");
return null; return null;
} }
return Node.createRegexpLiteralExpression(regexpPattern, tn.readRegexpModifiers(), tn.range(startPos, tn.pos)); return Node.createRegexpLiteralExpression(regexpPattern, tn.readRegexpFlags() /* also reports */, tn.range(startPos, tn.pos));
default: default:
this.error(DiagnosticCode.Expression_expected, tn.range()); this.error(DiagnosticCode.Expression_expected, tn.range());

View File

@ -901,14 +901,14 @@ export class Tokenizer extends DiagnosticEmitter {
} }
} }
readRegexpPattern(): string | null { readRegexpPattern(): string {
var text = this.source.text; var text = this.source.text;
var start = this.pos; var start = this.pos;
var escaped = false; var escaped = false;
while (true) { while (true) {
if (this.pos >= this.end) { if (this.pos >= this.end) {
this.error(DiagnosticCode.Unterminated_regular_expression_literal, this.range(start, this.end)); this.error(DiagnosticCode.Unterminated_regular_expression_literal, this.range(start, this.end));
return null; break;
} }
if (text.charCodeAt(this.pos) == CharCode.BACKSLASH) { if (text.charCodeAt(this.pos) == CharCode.BACKSLASH) {
++this.pos; ++this.pos;
@ -920,7 +920,7 @@ export class Tokenizer extends DiagnosticEmitter {
break; break;
if (isLineBreak(c)) { if (isLineBreak(c)) {
this.error(DiagnosticCode.Unterminated_regular_expression_literal, this.range(start, this.pos)); this.error(DiagnosticCode.Unterminated_regular_expression_literal, this.range(start, this.pos));
return null; break;
} }
++this.pos; ++this.pos;
escaped = false; escaped = false;
@ -928,23 +928,35 @@ export class Tokenizer extends DiagnosticEmitter {
return text.substring(start, this.pos); return text.substring(start, this.pos);
} }
readRegexpModifiers(): string { readRegexpFlags(): string {
var text = this.source.text; var text = this.source.text;
var start = this.pos; var start = this.pos;
/a/ var flags = 0;
while (this.pos < this.end) { while (this.pos < this.end) {
switch (text.charCodeAt(this.pos)) { var c: i32 = text.charCodeAt(this.pos);
if (!isIdentifierPart(c))
case CharCode.g: break;
case CharCode.i:
case CharCode.m:
++this.pos; ++this.pos;
switch (c) {
// make sure each supported flag is unique
case CharCode.g:
flags |= select<i32>(1, -1, !(flags & 1));
break;
case CharCode.i:
flags |= select<i32>(2, -1, !(flags & 2));
break;
case CharCode.m:
flags |= select<i32>(4, -1, !(flags & 4));
break; break;
default: default:
return text.substring(start, this.pos); flags = -1;
break;
} }
} }
if (flags == -1)
this.error(DiagnosticCode.Invalid_regular_expression_flags, this.range(start, this.pos));
return text.substring(start, this.pos); return text.substring(start, this.pos);
} }

View File

@ -16,3 +16,15 @@ b/ig;
// just a comment // just a comment
//ig; //ig;
// duplicate flags
/(abc)\//iig;
// invalid flags
/(abc)\//iX;
// surrounding AST remains intact
false && /abc/gX.test(someString) || true;

View File

@ -3,4 +3,11 @@
let re = /(abc)\//ig; let re = /(abc)\//ig;
let noRe = !/(abc)\//i; let noRe = !/(abc)\//i;
b / ig; b / ig;
/(abc)\//iig;
/(abc)\//iX;
false && /abc/gX.test(someString) || true;
// ERROR 1161: "Unterminated regular expression literal." in regexp.ts @ 75,76 // ERROR 1161: "Unterminated regular expression literal." in regexp.ts @ 75,76
// ERROR 1005: "'/' expected." in regexp.ts @ 74,76
// ERROR 109: "Invalid regular expression flags." in regexp.ts @ 95,98
// ERROR 109: "Invalid regular expression flags." in regexp.ts @ 111,113
// ERROR 109: "Invalid regular expression flags." in regexp.ts @ 131,133