Harden regexp parsing a bit

This commit is contained in:
dcodeIO 2018-01-07 18:15:21 +01:00
parent 7c8670ac35
commit 0de05b543b
7 changed files with 54 additions and 18 deletions

View File

@ -240,11 +240,11 @@ export abstract class Node {
return expr;
}
static createRegexpLiteralExpression(pattern: string, modifiers: string, range: Range): RegexpLiteralExpression {
static createRegexpLiteralExpression(pattern: string, flags: string, range: Range): RegexpLiteralExpression {
var expr = new RegexpLiteralExpression();
expr.range = range;
expr.pattern = pattern;
expr.modifiers = modifiers;
expr.patternFlags = flags;
return expr;
}
@ -1048,14 +1048,14 @@ export class RegexpLiteralExpression extends LiteralExpression {
/** Regular expression pattern. */
pattern: string;
/** Regular expression modifiers. */
modifiers: string;
/** Regular expression flags. */
patternFlags: string;
serialize(sb: string[]): void {
sb.push("/");
sb.push(this.pattern);
sb.push("/");
sb.push(this.modifiers);
sb.push(this.patternFlags);
}
}

View File

@ -10,6 +10,7 @@ export enum DiagnosticCode {
Type_0_cannot_be_changed_to_type_1 = 106,
Structs_cannot_extend_classes_and_vice_versa = 107,
Structs_cannot_implement_interfaces = 108,
Invalid_regular_expression_flags = 109,
Unterminated_string_literal = 1002,
Identifier_expected = 1003,
_0_expected = 1005,
@ -95,6 +96,7 @@ export function diagnosticCodeToString(code: DiagnosticCode): string {
case 106: return "Type '{0}' cannot be changed to type '{1}'.";
case 107: return "Structs cannot extend classes and vice-versa.";
case 108: return "Structs cannot implement interfaces.";
case 109: return "Invalid regular expression flags.";
case 1002: return "Unterminated string literal.";
case 1003: return "Identifier expected.";
case 1005: return "'{0}' expected.";

View File

@ -8,6 +8,7 @@
"Type '{0}' cannot be changed to type '{1}'.": 106,
"Structs cannot extend classes and vice-versa.": 107,
"Structs cannot implement interfaces.": 108,
"Invalid regular expression flags.": 109,
"Unterminated string literal.": 1002,
"Identifier expected.": 1003,

View File

@ -1426,8 +1426,11 @@ export class Parser extends DiagnosticEmitter {
return Node.createNewExpression((<CallExpression>operand).expression, (<CallExpression>operand).typeArguments, (<CallExpression>operand).arguments, tn.range(startPos, tn.pos));
this.error(DiagnosticCode.Operation_not_supported, tn.range());
return null;
} else
} else {
operand = this.parseExpression(tn, p);
if (!operand)
return null;
}
// UnaryPrefixExpression
if (token == Token.PLUS_PLUS || token == Token.MINUS_MINUS)
@ -1513,15 +1516,14 @@ export class Parser extends DiagnosticEmitter {
return Node.createFloatLiteralExpression(tn.readFloat(), tn.range(startPos, tn.pos));
// RegexpLiteralExpression
// note that this also continues on invalid ones so the surrounding AST remains intact
case Token.SLASH:
var regexpPattern = tn.readRegexpPattern();
if (regexpPattern == null)
return null;
var regexpPattern = tn.readRegexpPattern(); // also reports
if (!tn.skip(Token.SLASH)) {
this.error(DiagnosticCode._0_expected, tn.range(), "/");
return null;
}
return Node.createRegexpLiteralExpression(regexpPattern, tn.readRegexpModifiers(), tn.range(startPos, tn.pos));
return Node.createRegexpLiteralExpression(regexpPattern, tn.readRegexpFlags() /* also reports */, tn.range(startPos, tn.pos));
default:
this.error(DiagnosticCode.Expression_expected, tn.range());

View File

@ -901,14 +901,14 @@ export class Tokenizer extends DiagnosticEmitter {
}
}
readRegexpPattern(): string | null {
readRegexpPattern(): string {
var text = this.source.text;
var start = this.pos;
var escaped = false;
while (true) {
if (this.pos >= this.end) {
this.error(DiagnosticCode.Unterminated_regular_expression_literal, this.range(start, this.end));
return null;
break;
}
if (text.charCodeAt(this.pos) == CharCode.BACKSLASH) {
++this.pos;
@ -920,7 +920,7 @@ export class Tokenizer extends DiagnosticEmitter {
break;
if (isLineBreak(c)) {
this.error(DiagnosticCode.Unterminated_regular_expression_literal, this.range(start, this.pos));
return null;
break;
}
++this.pos;
escaped = false;
@ -928,23 +928,35 @@ export class Tokenizer extends DiagnosticEmitter {
return text.substring(start, this.pos);
}
readRegexpModifiers(): string {
readRegexpFlags(): string {
var text = this.source.text;
var start = this.pos;
/a/
var flags = 0;
while (this.pos < this.end) {
switch (text.charCodeAt(this.pos)) {
var c: i32 = text.charCodeAt(this.pos);
if (!isIdentifierPart(c))
break;
++this.pos;
switch (c) {
// make sure each supported flag is unique
case CharCode.g:
flags |= select<i32>(1, -1, !(flags & 1));
break;
case CharCode.i:
flags |= select<i32>(2, -1, !(flags & 2));
break;
case CharCode.m:
++this.pos;
flags |= select<i32>(4, -1, !(flags & 4));
break;
default:
return text.substring(start, this.pos);
flags = -1;
break;
}
}
if (flags == -1)
this.error(DiagnosticCode.Invalid_regular_expression_flags, this.range(start, this.pos));
return text.substring(start, this.pos);
}

View File

@ -16,3 +16,15 @@ b/ig;
// just a comment
//ig;
// duplicate flags
/(abc)\//iig;
// invalid flags
/(abc)\//iX;
// surrounding AST remains intact
false && /abc/gX.test(someString) || true;

View File

@ -3,4 +3,11 @@
let re = /(abc)\//ig;
let noRe = !/(abc)\//i;
b / ig;
/(abc)\//iig;
/(abc)\//iX;
false && /abc/gX.test(someString) || true;
// ERROR 1161: "Unterminated regular expression literal." in regexp.ts @ 75,76
// ERROR 1005: "'/' expected." in regexp.ts @ 74,76
// ERROR 109: "Invalid regular expression flags." in regexp.ts @ 95,98
// ERROR 109: "Invalid regular expression flags." in regexp.ts @ 111,113
// ERROR 109: "Invalid regular expression flags." in regexp.ts @ 131,133