assemblyscript/src/tokenizer.ts

1159 lines
43 KiB
TypeScript
Raw Normal View History

2017-09-28 13:08:25 +02:00
/*
This is a modified version of TypeScript's scanner that doesn't perform
as much bookkeeping, simply skips over trivia and provides a more general
mark/reset mechanism for the parser to utilize on ambiguous tokens.
next() advances the token
peek() peeks for the next token
skip(token) skips over a token if possible
mark() marks at current token
reset() resets to marked state
range() gets the range of the current token
readFloat() on FLOATLITERAL
readIdentifier() on IDENTIFIER
readInteger() on INTEGERLITERAL
readRegexp() on REGEXPLITERAL // TODO
readString() on STRINGLITERAL
*/
import { DiagnosticCode, DiagnosticMessage, DiagnosticEmitter, formatDiagnosticMessage } from "./diagnostics";
import { Source } from "./reflection";
import { I64, CharCode, isLineBreak } from "./util";
export enum Token {
// keywords
// discarded: ANY, BOOLEAN, NEVER, NUMBER, STRING, SYMBOL, UNDEFINED, LESSTHAN_SLASH
ABSTRACT,
AS,
ASYNC,
AWAIT, // ES2017
BREAK, // ES2017
CASE, // ES2017
CATCH, // ES2017
CLASS, // ES2017
CONST, // ES2017
CONTINUE, // ES2017
CONSTRUCTOR,
DEBUGGER, // ES2017
DECLARE,
DEFAULT, // ES2017
DELETE, // ES2017
DO, // ES2017
ELSE, // ES2017
ENUM, // ES2017 future
EXPORT, // ES2017
EXTENDS, // ES2017
FALSE, // ES
FINALLY, // ES2017
FOR, // ES2017
FROM, // AS possible identifier
FUNCTION, // ES2017
GET,
IF, // ES2017
IMPLEMENTS, // ES2017 non-lexical
IMPORT, // ES2017
IN, // ES2017
INSTANCEOF, // ES2017
INTERFACE, // ES2017 non-lexical
IS,
KEYOF,
LET, // ES2017 non-lexical
MODULE, // AS possible identifier
NAMESPACE, // AS possible identifier
NEW, // ES2017
NULL, // ES
OF,
PACKAGE, // ES2017 non-lexical
PRIVATE, // ES2017 non-lexical
PROTECTED, // ES2017 non-lexical
PUBLIC, // ES2017 non-lexical
READONLY,
RETURN, // ES2017
SET,
STATIC, // ES2017 non-lexical
SUPER, // ES2017
SWITCH, // ES2017
THIS, // ES2017
THROW, // ES2017
TRUE, // ES
TRY, // ES2017
TYPE, // AS possible identifier
TYPEOF, // ES2017
VAR, // ES2017
VOID, // ES2017
WHILE, // ES2017
WITH, // ES2017
YIELD, // ES2017
// punctuation
OPENBRACE,
CLOSEBRACE,
OPENPAREN,
CLOSEPAREN,
OPENBRACKET,
CLOSEBRACKET,
DOT,
DOT_DOT_DOT,
SEMICOLON,
COMMA,
LESSTHAN,
GREATERTHAN,
LESSTHAN_EQUALS,
GREATERTHAN_EQUALS,
EQUALS_EQUALS,
EXCLAMATION_EQUALS,
EQUALS_EQUALS_EQUALS,
EXCLAMATION_EQUALS_EQUALS,
EQUALS_GREATERTHAN,
PLUS,
MINUS,
ASTERISK_ASTERISK,
ASTERISK,
SLASH,
PERCENT,
PLUS_PLUS,
MINUS_MINUS,
LESSTHAN_LESSTHAN,
GREATERTHAN_GREATERTHAN,
GREATERTHAN_GREATERTHAN_GREATERTHAN,
AMPERSAND,
BAR,
CARET,
EXCLAMATION,
TILDE,
AMPERSAND_AMPERSAND,
BAR_BAR,
QUESTION,
COLON,
EQUALS,
PLUS_EQUALS,
MINUS_EQUALS,
ASTERISK_EQUALS,
ASTERISK_ASTERISK_EQUALS,
SLASH_EQUALS,
PERCENT_EQUALS,
LESSTHAN_LESSTHAN_EQUALS,
GREATERTHAN_GREATERTHAN_EQUALS,
GREATERTHAN_GREATERTHAN_GREATERTHAN_EQUALS,
AMPERSAND_EQUALS,
BAR_EQUALS,
CARET_EQUALS,
AT,
// literals
IDENTIFIER,
STRINGLITERAL,
INTEGERLITERAL,
FLOATLITERAL,
REGEXPLITERAL,
// meta
INVALID,
ENDOFFILE
}
const textToKeywordToken: Map<string,Token> = new Map([
["abstract", Token.ABSTRACT],
["as", Token.AS],
["async", Token.ASYNC],
["await", Token.AWAIT],
["break", Token.BREAK],
["case", Token.CASE],
["catch", Token.CATCH],
["class", Token.CLASS],
["continue", Token.CONTINUE],
["const", Token.CONST],
["constructor", Token.CONSTRUCTOR],
["debugger", Token.DEBUGGER],
["declare", Token.DECLARE],
["default", Token.DEFAULT],
["delete", Token.DELETE],
["do", Token.DO],
["else", Token.ELSE],
["enum", Token.ENUM],
["export", Token.EXPORT],
["extends", Token.EXTENDS],
["false", Token.FALSE],
["finally", Token.FINALLY],
["for", Token.FOR],
["from", Token.FROM],
["function", Token.FUNCTION],
["get", Token.GET],
["if", Token.IF],
["implements", Token.IMPLEMENTS],
["import", Token.IMPORT],
["in", Token.IN],
["instanceof", Token.INSTANCEOF],
["interface", Token.INTERFACE],
["is", Token.IS],
["keyof", Token.KEYOF],
["let", Token.LET],
["module", Token.MODULE],
["namespace", Token.NAMESPACE],
["new", Token.NEW],
["null", Token.NULL],
["of", Token.OF],
["package", Token.PACKAGE],
["private", Token.PRIVATE],
["protected", Token.PROTECTED],
["public", Token.PUBLIC],
["readonly", Token.READONLY],
["return", Token.RETURN],
["set", Token.SET],
["static", Token.STATIC],
["super", Token.SUPER],
["switch", Token.SWITCH],
["this", Token.THIS],
["throw", Token.THROW],
["true", Token.TRUE],
["try", Token.TRY],
["type", Token.TYPE],
["typeof", Token.TYPEOF],
["var", Token.VAR],
["void", Token.VOID],
["while", Token.WHILE],
["with", Token.WITH],
["yield", Token.YIELD]
]);
export function operatorTokenToString(token: Token): string {
switch (token) {
case Token.DELETE: return "delete";
case Token.IN: return "in";
case Token.INSTANCEOF: return "instanceof";
case Token.NEW: return "new";
case Token.TYPEOF: return "typeof";
case Token.VOID: return "void";
case Token.YIELD: return "yield";
case Token.DOT_DOT_DOT: return "...";
case Token.COMMA: return ",";
case Token.LESSTHAN: return "<";
case Token.GREATERTHAN: return ">";
case Token.LESSTHAN_EQUALS: return "<=";
case Token.GREATERTHAN_EQUALS: return ">=";
case Token.EQUALS_EQUALS: return "==";
case Token.EXCLAMATION_EQUALS: return "!=";
case Token.EQUALS_EQUALS_EQUALS: return "===";
case Token.EXCLAMATION_EQUALS_EQUALS: return "!==";
case Token.PLUS: return "+";
case Token.MINUS: return "-";
case Token.ASTERISK_ASTERISK: return "**";
case Token.ASTERISK: return "*";
case Token.SLASH: return "/";
case Token.PERCENT: return "%";
case Token.PLUS_PLUS: return "++";
case Token.MINUS_MINUS: return "--";
case Token.LESSTHAN_LESSTHAN: return "<<";
case Token.GREATERTHAN_GREATERTHAN: return ">>";
case Token.GREATERTHAN_GREATERTHAN_GREATERTHAN: return ">>>";
case Token.AMPERSAND: return "&";
case Token.BAR: return "|";
case Token.CARET: return "^";
case Token.EXCLAMATION: return "!";
case Token.TILDE: return "~";
case Token.AMPERSAND_AMPERSAND: return "&&";
case Token.BAR_BAR: return "||";
case Token.EQUALS: return "=";
case Token.PLUS_EQUALS: return "+=";
case Token.MINUS_EQUALS: return "-=";
case Token.ASTERISK_EQUALS: return "*=";
case Token.ASTERISK_ASTERISK_EQUALS: return "**=";
case Token.SLASH_EQUALS: return "/=";
case Token.PERCENT_EQUALS: return "%=";
case Token.LESSTHAN_LESSTHAN_EQUALS: return "<<=";
case Token.GREATERTHAN_GREATERTHAN_EQUALS: return ">>=";
case Token.GREATERTHAN_GREATERTHAN_GREATERTHAN_EQUALS: return ">>>=";
case Token.AMPERSAND_EQUALS: return "&=";
case Token.BAR_EQUALS: return "|=";
case Token.CARET_EQUALS: return "^=";
default: return "INVALID";
}
}
const possibleIdentifiers: Set<string> = new Set([
"from",
"module",
"namespace",
"type"
]);
export class Range {
source: Source;
start: i32;
end: i32;
constructor(source: Source, start: i32, end: i32) {
this.source = source;
this.start = start;
this.end = end;
}
static join(a: Range, b: Range): Range {
if (a.source != b.source)
throw new Error("source mismatch");
return new Range(a.source, a.start < b.start ? a.start : b.start, a.end > b.end ? a.end : b.end);
}
}
export class Tokenizer extends DiagnosticEmitter {
source: Source;
end: i32 = 0;
pos: i32 = 0;
token: Token = -1;
tokenPos: i32 = 0;
markedPos: i32 = 0;
markedToken: Token = -1;
markedTokenPos: i32 = 0;
nextToken: Token = -1;
nextTokenOnNewLine: bool = false;
constructor(source: Source, diagnostics: DiagnosticMessage[] | null = null) {
super(diagnostics);
this.source = source;
this.pos = 0;
this.end = source.text.length;
this.diagnostics = diagnostics ? diagnostics : new Array();
const text: string = source.text;
// skip bom
if (this.pos < this.end && text.charCodeAt(this.pos) == CharCode.BYTEORDERMARK)
this.pos++;
// skip shebang
if (this.pos + 1 < this.end && text.charCodeAt(this.pos) == CharCode.HASH && text.charCodeAt(this.pos + 1) == CharCode.EXCLAMATION) {
this.pos += 2;
while (this.pos < this.end && text.charCodeAt(this.pos) != CharCode.LINEFEED)
this.pos++;
// 'next' now starts at lf or eof
}
}
next(preferIdentifier: bool = false): Token {
this.nextToken = -1;
return this.token = this.unsafeNext(preferIdentifier);
}
private unsafeNext(preferIdentifier: bool = false): Token {
const text: string = this.source.text;
while (true) {
if (this.pos >= this.end)
return Token.ENDOFFILE;
this.tokenPos = this.pos;
let c: i32 = text.charCodeAt(this.pos);
switch (c) {
case CharCode.CARRIAGERETURN:
if (++this.pos < this.end && text.charCodeAt(this.pos) == CharCode.LINEFEED)
this.pos++;
break;
case CharCode.LINEFEED:
case CharCode.TAB:
case CharCode.VERTICALTAB:
case CharCode.FORMFEED:
case CharCode.SPACE:
this.pos++;
break;
case CharCode.EXCLAMATION:
if (++this.pos < this.end && text.charCodeAt(this.pos) == CharCode.EQUALS) {
if (++this.pos < this.end && text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.EXCLAMATION_EQUALS_EQUALS;
}
return Token.EXCLAMATION_EQUALS;
}
return Token.EXCLAMATION;
case CharCode.DOUBLEQUOTE:
case CharCode.SINGLEQUOTE:
case CharCode.BACKTICK: // TODO
return Token.STRINGLITERAL; // expects a call to readString
case CharCode.PERCENT:
if (++this.pos < this.end && text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.PERCENT_EQUALS;
}
return Token.PERCENT;
case CharCode.AMPERSAND:
if (++this.pos < this.end) {
if (text.charCodeAt(this.pos) == CharCode.AMPERSAND) {
this.pos++;
return Token.AMPERSAND_AMPERSAND;
}
if (text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.AMPERSAND_EQUALS;
}
}
return Token.AMPERSAND;
case CharCode.OPENPAREN:
this.pos++;
return Token.OPENPAREN;
case CharCode.CLOSEPAREN:
this.pos++;
return Token.CLOSEPAREN;
case CharCode.ASTERISK:
if (++this.pos < this.end) {
if (text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.ASTERISK_EQUALS;
}
if (text.charCodeAt(this.pos) == CharCode.ASTERISK) {
if (++this.pos < this.end && text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.ASTERISK_ASTERISK_EQUALS;
}
return Token.ASTERISK_ASTERISK;
}
}
return Token.ASTERISK;
case CharCode.PLUS:
if (++this.pos < this.end) {
if (text.charCodeAt(this.pos) == CharCode.PLUS) {
this.pos++;
return Token.PLUS_PLUS;
}
if (text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.PLUS_EQUALS;
}
}
return Token.PLUS;
case CharCode.COMMA:
this.pos++;
return Token.COMMA;
case CharCode.MINUS:
if (++this.pos < this.end) {
if (text.charCodeAt(this.pos) == CharCode.MINUS) {
this.pos++;
return Token.MINUS_MINUS;
}
if (text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.MINUS_EQUALS;
}
}
return Token.MINUS;
case CharCode.DOT:
if (++this.pos < this.end) {
if (isDecimalDigit(text.charCodeAt(this.pos))) {
this.pos--;
return Token.FLOATLITERAL; // expects a call to readFloat
}
if (text.charCodeAt(this.pos) == CharCode.DOT && this.pos + 1 < this.end && text.charCodeAt(this.pos + 1) == CharCode.DOT) {
this.pos += 2;
return Token.DOT_DOT_DOT;
}
}
return Token.DOT;
case CharCode.SLASH:
if (++this.pos < this.end) {
if (text.charCodeAt(this.pos) == CharCode.SLASH) { // single-line comment
while (++this.pos < this.end) {
if (isLineBreak(text.charCodeAt(this.pos)))
break;
}
continue;
}
if (text.charCodeAt(this.pos) == CharCode.ASTERISK) { // multi-line comment
let closed: bool = false;
while (++this.pos < this.end) {
c = text.charCodeAt(this.pos);
if (c == CharCode.ASTERISK && this.pos + 1 < this.end && text.charCodeAt(this.pos + 1) == CharCode.SLASH) {
this.pos += 2;
closed = true;
break;
}
}
if (!closed)
this.error(DiagnosticCode._0_expected, this.range(this.pos), "*/");
continue;
}
if (text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.SLASH_EQUALS;
}
}
if (this.testRegexp())
return Token.REGEXPLITERAL; // expects a call to readRegexp
return Token.SLASH;
case CharCode._0:
case CharCode._1:
case CharCode._2:
case CharCode._3:
case CharCode._4:
case CharCode._5:
case CharCode._6:
case CharCode._7:
case CharCode._8:
case CharCode._9:
return this.testInteger()
? Token.INTEGERLITERAL // expects a call to readInteger
: Token.FLOATLITERAL; // expects a call to readFloat
case CharCode.COLON:
this.pos++;
return Token.COLON;
case CharCode.SEMICOLON:
this.pos++;
return Token.SEMICOLON;
case CharCode.LESSTHAN:
if (++this.pos < this.end) {
if (text.charCodeAt(this.pos) == CharCode.LESSTHAN) {
this.pos++;
if (this.pos < this.end && text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.LESSTHAN_LESSTHAN_EQUALS;
}
return Token.LESSTHAN_LESSTHAN;
}
if (text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.LESSTHAN_EQUALS;
}
}
return Token.LESSTHAN;
case CharCode.EQUALS:
if (++this.pos < this.end) {
if (text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
if (this.pos < this.end && text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.EQUALS_EQUALS_EQUALS;
}
return Token.EQUALS_EQUALS;
}
if (text.charCodeAt(this.pos) == CharCode.GREATERTHAN) {
this.pos++;
return Token.EQUALS_GREATERTHAN;
}
}
return Token.EQUALS;
case CharCode.GREATERTHAN:
if (++this.pos < this.end) {
if (text.charCodeAt(this.pos) == CharCode.GREATERTHAN) {
this.pos++;
if (this.pos < this.end) {
if (text.charCodeAt(this.pos) == CharCode.GREATERTHAN) {
this.pos++;
if (this.pos < this.end && text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.GREATERTHAN_GREATERTHAN_GREATERTHAN_EQUALS;
}
return Token.GREATERTHAN_GREATERTHAN_GREATERTHAN;
}
if (text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.GREATERTHAN_GREATERTHAN_EQUALS;
}
}
return Token.GREATERTHAN_GREATERTHAN;
}
if (text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.GREATERTHAN_EQUALS;
}
}
return Token.GREATERTHAN;
case CharCode.QUESTION:
this.pos++;
return Token.QUESTION;
case CharCode.OPENBRACKET:
this.pos++;
return Token.OPENBRACKET;
case CharCode.CLOSEBRACKET:
this.pos++;
return Token.CLOSEBRACKET;
case CharCode.CARET:
if (++this.pos < this.end && text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.CARET_EQUALS;
}
return Token.CARET;
case CharCode.OPENBRACE:
this.pos++;
return Token.OPENBRACE;
case CharCode.BAR:
if (++this.pos < this.end) {
if (text.charCodeAt(this.pos) == CharCode.BAR) {
this.pos++;
return Token.BAR_BAR;
}
if (text.charCodeAt(this.pos) == CharCode.EQUALS) {
this.pos++;
return Token.BAR_EQUALS;
}
}
return Token.BAR;
case CharCode.CLOSEBRACE:
this.pos++;
return Token.CLOSEBRACE;
case CharCode.TILDE:
this.pos++;
return Token.TILDE;
case CharCode.AT:
this.pos++;
return Token.AT;
default: {
if (isIdentifierStart(c)) {
if (isKeywordCharacter(c)) {
const posBefore: i32 = this.pos;
while (++this.pos < this.end && isIdentifierPart(c = text.charCodeAt(this.pos))) {
if (!isKeywordCharacter(c)) {
this.pos = posBefore;
return Token.IDENTIFIER;
}
}
const keywordText: string = text.substring(posBefore, this.pos);
if (textToKeywordToken.has(keywordText) && !(preferIdentifier && possibleIdentifiers.has(keywordText)))
return <Token>textToKeywordToken.get(keywordText);
this.pos = posBefore;
}
return Token.IDENTIFIER; // expects a call to readIdentifier
} else if (isWhiteSpace(c)) {
this.pos++;
break;
}
this.error(DiagnosticCode.Invalid_character, this.range(this.pos, this.pos + 1));
this.pos++;
return Token.INVALID;
}
}
}
}
peek(checkOnNewLine: bool = false): Token {
const text: string = this.source.text;
if (this.nextToken < 0) {
const posBefore: i32 = this.pos;
const tokenBefore: Token = this.token;
const tokenPosBefore: i32 = this.tokenPos;
this.nextToken = this.unsafeNext();
if (checkOnNewLine) {
this.nextTokenOnNewLine = false;
while (--this.tokenPos > posBefore) {
if (isLineBreak(text.charCodeAt(this.tokenPos))) {
this.nextTokenOnNewLine = true;
break;
}
}
}
this.pos = posBefore;
this.token = tokenBefore;
this.tokenPos = tokenPosBefore;
}
return this.nextToken;
}
skip(token: Token): bool {
const posBefore: i32 = this.pos;
const tokenBefore: Token = this.token;
const tokenPosBefore: i32 = this.tokenPos;
if ((this.nextToken = this.unsafeNext(token == Token.IDENTIFIER)) == token) {
this.nextToken = -1;
return true;
} else {
this.pos = posBefore;
this.token = tokenBefore;
this.tokenPos = tokenPosBefore;
return false;
}
}
mark(): void {
this.markedPos = this.pos;
this.markedToken = this.token;
this.markedTokenPos = this.tokenPos;
}
reset(): void {
this.pos = this.markedPos;
this.token = this.markedToken;
this.tokenPos = this.markedTokenPos;
this.nextToken = -1;
}
range(start: i32 = -1, end: i32 = -1): Range {
if (start < 0) {
start = this.tokenPos;
if (end < 0)
end = start;
} else if (end < 0)
end = this.pos;
return new Range(this.source, start, end);
}
readIdentifier(): string {
const text: string = this.source.text;
const start: i32 = this.pos;
while (++this.pos < this.end && isIdentifierPart(text.charCodeAt(this.pos)));
return text.substring(start, this.pos);
}
readString(): string {
const text: string = this.source.text;
const quote: i32 = text.charCodeAt(this.pos++);
let start: i32 = this.pos;
let result: string = "";
while (true) {
if (this.pos >= this.end) {
result += text.substring(start, this.pos);
this.error(DiagnosticCode.Unterminated_string_literal, this.range(start - 1, this.end));
break;
}
const c: i32 = text.charCodeAt(this.pos);
if (c == quote) {
result += text.substring(start, this.pos++);
break;
}
if (c == CharCode.BACKSLASH) {
result += text.substring(start, this.pos);
result += this.readEscapeSequence();
start = this.pos;
continue;
}
if (isLineBreak(c)) {
result += text.substring(start, this.pos);
this.error(DiagnosticCode.Unterminated_string_literal, this.range(start - 1, this.pos));
break;
}
this.pos++;
}
return result;
}
readEscapeSequence(): string {
if (++this.pos >= this.end) {
this.error(DiagnosticCode.Unexpected_end_of_text, this.range(this.end));
return "";
}
const text: string = this.source.text;
const c: i32 = text.charCodeAt(this.pos++);
switch (c) {
case CharCode._0:
return "\0";
case CharCode.b:
return "\b";
case CharCode.t:
return "\t";
case CharCode.n:
return "\n";
case CharCode.v:
return "\v";
case CharCode.f:
return "\f";
case CharCode.r:
return "\r";
case CharCode.SINGLEQUOTE:
return "'";
case CharCode.DOUBLEQUOTE:
return "\"";
case CharCode.u: {
if (this.pos < this.end && text.charCodeAt(this.pos) == CharCode.OPENBRACE) {
this.pos++;
return this.readExtendedUnicodeEscape(); // \u{DDDDDDDD}
}
return this.readUnicodeEscape(); // \uDDDD
}
case CharCode.CARRIAGERETURN:
if (this.pos < this.end && text.charCodeAt(this.pos) == CharCode.LINEFEED)
this.pos++;
// fall through
case CharCode.LINEFEED:
case CharCode.LINESEPARATOR:
case CharCode.PARAGRAPHSEPARATOR:
return "";
default:
return String.fromCharCode(c);
}
}
testRegexp(): bool {
// TODO: this'll require more context
return false;
}
readRegexp(): string {
let result: string = "";
let start: i32 = this.pos;
let escaped: bool = false;
const text: string = this.source.text;
while (true) {
if (this.pos >= this.end) {
result += text.substring(start, this.pos);
this.error(DiagnosticCode.Unterminated_regular_expression_literal, this.range(start, this.end));
break;
}
if (text.charCodeAt(this.pos) == CharCode.BACKSLASH) {
this.pos++;
escaped = true;
continue;
}
const c: i32 = text.charCodeAt(this.pos);
if (c == CharCode.SLASH) {
result += text.substring(start, this.pos);
this.pos++;
break;
}
if (isLineBreak(c)) {
result += text.substring(start, this.pos);
this.error(DiagnosticCode.Unterminated_regular_expression_literal, this.range(start, this.pos));
break;
}
this.pos++;
}
return result;
}
testInteger(): bool {
const text: string = this.source.text;
if (text.charCodeAt(this.pos) == CharCode._0 && this.pos + 1 < this.end) {
switch (text.charCodeAt(this.pos + 2)) {
case CharCode.X:
case CharCode.x:
case CharCode.B:
case CharCode.b:
case CharCode.O:
case CharCode.o:
return true;
}
}
let pos: i32 = this.pos;
while (pos < this.end) {
const c: i32 = text.charCodeAt(pos);
if (c == CharCode.DOT || c == CharCode.E || c == CharCode.e)
return false;
if (c < CharCode._0 || c > CharCode._9)
break;
pos++;
}
return true;
}
readInteger(): I64 {
const text: string = this.source.text;
if (text.charCodeAt(this.pos) == CharCode._0 && this.pos + 2 < this.end) {
switch (text.charCodeAt(this.pos + 1)) {
case CharCode.X:
case CharCode.x:
this.pos += 2;
return this.readHexInteger();
case CharCode.B:
case CharCode.b:
this.pos += 2;
return this.readBinaryInteger();
case CharCode.O:
case CharCode.o:
this.pos += 2;
return this.readOctalInteger();
}
if (isOctalDigit(text.charCodeAt(this.pos + 1))) {
const start: i32 = this.pos;
this.pos++;
const value: I64 = this.readOctalInteger();
this.error(DiagnosticCode.Octal_literals_are_not_allowed_in_strict_mode, this.range(start, this.pos));
return value;
}
}
return this.readDecimalInteger();
}
readHexInteger(): I64 {
const text: string = this.source.text;
const start: i32 = this.pos;
let value: I64 = new I64(0, 0);
while (this.pos < this.end) {
const c: i32 = text.charCodeAt(this.pos);
if (c >= CharCode._0 && c <= CharCode._9) {
// value = value * 16 + c - CharCode._0;
value.mul32(16);
value.add32(c - CharCode._0);
} else if (c >= CharCode.A && c <= CharCode.F) {
// value = value * 16 + 10 + c - CharCode.A;
value.mul32(16);
value.add32(10 + c - CharCode.A);
} else if (c >= CharCode.a && c <= CharCode.f) {
// value = value * 16 + 10 + c - CharCode.a;
value.mul32(16);
value.add32(10 + c - CharCode.a);
} else
break;
this.pos++;
}
if (this.pos == start)
this.error(DiagnosticCode.Hexadecimal_digit_expected, this.range(start));
return value;
}
readDecimalInteger(): I64 {
const text: string = this.source.text;
const start: i32 = this.pos;
let value: I64 = new I64(0, 0);
while (this.pos < this.end) {
const c: i32 = text.charCodeAt(this.pos);
if (c >= CharCode._0 && c <= CharCode._9) {
// value = value * 10 + c - CharCode._0;
value.mul32(10);
value.add32(c - CharCode._0);
} else
break;
this.pos++;
}
if (this.pos == start)
this.error(DiagnosticCode.Digit_expected, this.range(start));
return value;
}
readOctalInteger(): I64 {
const text: string = this.source.text;
const start: i32 = this.pos;
let value: I64 = new I64(0, 0);
while (this.pos < this.end) {
const c: i32 = text.charCodeAt(this.pos);
if (c >= CharCode._0 && c <= CharCode._7) {
// value = value * 8 + c - CharCode._0;
value.mul32(8);
value.add32(c - CharCode._0);
} else
break;
this.pos++;
}
if (this.pos == start)
this.error(DiagnosticCode.Octal_digit_expected, this.range(start));
return value;
}
readBinaryInteger(): I64 {
const text: string = this.source.text;
const start: i32 = this.pos;
let value: I64 = new I64();
while (this.pos < this.end) {
const c: i32 = text.charCodeAt(this.pos);
if (c == CharCode._0) {
// value = value * 2;
value.mul32(2);
} else if (c == CharCode._1) {
// value = value * 2 + 1;
value.mul32(2);
value.add32(1);
}
else
break;
this.pos++;
}
if (this.pos == start)
this.error(DiagnosticCode.Binary_digit_expected, this.range(start));
return value;
}
readFloat(): f64 {
let start: i32 = this.pos;
const text: string = this.source.text;
while (this.pos < this.end && isDecimalDigit(text.charCodeAt(this.pos)))
this.pos++;
if (this.pos < this.end && text.charCodeAt(this.pos) == CharCode.DOT) {
this.pos++;
while (this.pos < this.end && isDecimalDigit(text.charCodeAt(this.pos)))
this.pos++;
}
if (this.pos < this.end) {
const c: i32 = text.charCodeAt(this.pos);
if (c == CharCode.E || c == CharCode.e) {
if (++this.pos < this.end && text.charCodeAt(this.pos) == CharCode.MINUS)
this.pos++;
while (this.pos < this.end && isDecimalDigit(text.charCodeAt(this.pos)))
this.pos++;
}
}
return parseFloat(text.substring(start, this.pos));
}
readUnicodeEscape(): string {
let remain: i32 = 4;
let value: i32 = 0;
const text: string = this.source.text;
while (this.pos < this.end) {
const c: i32 = text.charCodeAt(this.pos++);
if (c >= CharCode._0 && c <= CharCode._9)
value = value * 16 + c - CharCode._0;
else if (c >= CharCode.A && c <= CharCode.F)
value = value * 16 + 10 + c - CharCode.A;
else if (c >= CharCode.a && c <= CharCode.f)
value = value * 16 + 10 + c - CharCode.a;
else {
this.error(DiagnosticCode.Hexadecimal_digit_expected, this.range(this.pos - 1, this.pos));
return "";
}
if (--remain == 0)
break;
}
if (remain) {
this.error(DiagnosticCode.Unexpected_end_of_text, this.range(this.pos));
return "";
}
return String.fromCharCode(value);
}
private readExtendedUnicodeEscape(): string {
const start: i32 = this.pos;
const value: I64 = this.readHexInteger();
let invalid: bool = false;
if (value.gt32(0x10FFFF)) {
this.error(DiagnosticCode.An_extended_Unicode_escape_value_must_be_between_0x0_and_0x10FFFF_inclusive, this.range(start, this.pos));
invalid = true;
}
const value32: i32 = value.toI32();
const text: string = this.source.text;
if (this.pos >= this.end) {
this.error(DiagnosticCode.Unexpected_end_of_text, this.range(start, this.end));
invalid = true;
} else if (text.charCodeAt(this.pos) == CharCode.CLOSEBRACE) {
this.pos++;
} else {
this.error(DiagnosticCode.Unterminated_Unicode_escape_sequence, this.range(start, this.pos));
invalid = true;
}
if (invalid)
return "";
return value32 < 65536
? String.fromCharCode(value32)
: String.fromCharCode((((value32 - 65536) / 1024 | 0) + 0xD800) as i32, ((value32 - 65536) % 1024 + 0xDC00) as i32);
}
}
function isWhiteSpace(c: i32): bool {
return c == CharCode.SPACE
|| c == CharCode.TAB
|| c == CharCode.VERTICALTAB
|| c == CharCode.FORMFEED
|| c == CharCode.NONBREAKINGSPACE
|| c == CharCode.NEXTLINE
|| c == CharCode.OGHAM
|| c >= CharCode.ENQUAD && c <= CharCode.ZEROWIDTHSPACE
|| c == CharCode.NARRINOBREAKSPACE
|| c == CharCode.MATHEMATICALSPACE
|| c == CharCode.IDEOGRAPHICSPACE
|| c == CharCode.BYTEORDERMARK;
}
function isDecimalDigit(c: i32): bool {
return c >= CharCode._0 && c <= CharCode._9;
}
function isOctalDigit(c: i32): bool {
return c >= CharCode._0 && c <= CharCode._7;
}
function isIdentifierStart(c: i32): bool {
return c >= CharCode.A && c <= CharCode.Z
|| c >= CharCode.a && c <= CharCode.z
|| c == CharCode.$
|| c == CharCode._
|| c > 0x7f && isUnicodeIdentifierStart(c);
}
function isKeywordCharacter(c: i32): bool {
return c >= CharCode.a && c <= CharCode.z;
}
function isIdentifierPart(c: i32): bool {
return c >= CharCode.A && c <= CharCode.Z
|| c >= CharCode.a && c <= CharCode.z
|| c >= CharCode._0 && c <= CharCode._9
|| c == CharCode.$
|| c == CharCode._
|| c > 0x7f && isUnicodeIdentifierPart(c);
}
// storing as u16 to save memory
const unicodeIdentifierStart: u16[] = [170, 170, 181, 181, 186, 186, 192, 214, 216, 246, 248, 705, 710, 721, 736, 740, 748, 748, 750, 750, 880, 884, 886, 887, 890, 893, 902, 902, 904, 906, 908, 908, 910, 929, 931, 1013, 1015, 1153, 1162, 1319, 1329, 1366, 1369, 1369, 1377, 1415, 1488, 1514, 1520, 1522, 1568, 1610, 1646, 1647, 1649, 1747, 1749, 1749, 1765, 1766, 1774, 1775, 1786, 1788, 1791, 1791, 1808, 1808, 1810, 1839, 1869, 1957, 1969, 1969, 1994, 2026, 2036, 2037, 2042, 2042, 2048, 2069, 2074, 2074, 2084, 2084, 2088, 2088, 2112, 2136, 2208, 2208, 2210, 2220, 2308, 2361, 2365, 2365, 2384, 2384, 2392, 2401, 2417, 2423, 2425, 2431, 2437, 2444, 2447, 2448, 2451, 2472, 2474, 2480, 2482, 2482, 2486, 2489, 2493, 2493, 2510, 2510, 2524, 2525, 2527, 2529, 2544, 2545, 2565, 2570, 2575, 2576, 2579, 2600, 2602, 2608, 2610, 2611, 2613, 2614, 2616, 2617, 2649, 2652, 2654, 2654, 2674, 2676, 2693, 2701, 2703, 2705, 2707, 2728, 2730, 2736, 2738, 2739, 2741, 2745, 2749, 2749, 2768, 2768, 2784, 2785, 2821, 2828, 2831, 2832, 2835, 2856, 2858, 2864, 2866, 2867, 2869, 2873, 2877, 2877, 2908, 2909, 2911, 2913, 2929, 2929, 2947, 2947, 2949, 2954, 2958, 2960, 2962, 2965, 2969, 2970, 2972, 2972, 2974, 2975, 2979, 2980, 2984, 2986, 2990, 3001, 3024, 3024, 3077, 3084, 3086, 3088, 3090, 3112, 3114, 3123, 3125, 3129, 3133, 3133, 3160, 3161, 3168, 3169, 3205, 3212, 3214, 3216, 3218, 3240, 3242, 3251, 3253, 3257, 3261, 3261, 3294, 3294, 3296, 3297, 3313, 3314, 3333, 3340, 3342, 3344, 3346, 3386, 3389, 3389, 3406, 3406, 3424, 3425, 3450, 3455, 3461, 3478, 3482, 3505, 3507, 3515, 3517, 3517, 3520, 3526, 3585, 3632, 3634, 3635, 3648, 3654, 3713, 3714, 3716, 3716, 3719, 3720, 3722, 3722, 3725, 3725, 3732, 3735, 3737, 3743, 3745, 3747, 3749, 3749, 3751, 3751, 3754, 3755, 3757, 3760, 3762, 3763, 3773, 3773, 3776, 3780, 3782, 3782, 3804, 3807, 3840, 3840, 3904, 3911, 3913, 3948, 3976, 3980, 4096, 4138, 4159, 4159, 4176, 4181, 4186, 4189, 4193, 4193, 4197, 4198, 4206, 4208, 4213, 4225, 4238, 4238, 4256, 4293, 4295, 4295, 4301, 4301, 4304, 4346, 4348, 4680, 4682, 4685, 4688, 4694, 4696, 4696, 4698, 4701, 4704, 4744, 4746, 4749, 4752, 4784, 4786, 4789, 4792, 4798, 4800, 4800, 4802, 4805, 4808, 4822, 4824, 4880, 4882, 4885, 4888, 4954, 4992, 5007, 5024, 5108, 5121, 5740, 5743, 5759, 5761, 5786, 5792, 5866, 5870, 5872, 5888, 5900, 5902, 5905, 5920, 5937, 5952, 5969, 5984, 5996, 5998, 6000, 6016, 6067, 6103, 6103, 6108, 6108, 6176, 6263, 6272, 6312, 6314, 6314, 6320, 6389, 6400, 6428, 6480, 6509, 6512, 6516, 6528, 6571, 6593, 6599, 6656, 6678, 6688, 6740, 6823, 6823, 6917, 6963, 6981, 6987, 7043, 7072, 7086, 7087, 7098, 7141, 7168, 7203, 7245, 7247, 7258, 7293, 7401, 7404, 7406, 7409, 7413, 7414, 7424, 7615, 7680, 7957, 7960, 7965, 7968, 8005, 8008, 8013, 8016, 8023, 8025, 8025, 8027, 8027, 8029, 8029, 8031, 8061, 8064, 8116, 8118, 8124, 8126, 8126, 8130, 8132, 8134, 8140, 8144, 8147, 8150, 8155, 8160, 8172, 8178, 8180, 8182, 8188, 8305, 8305, 8319, 8319, 8336, 8348, 8450, 8450, 8455, 8455, 8458, 8467, 8469, 8469, 8473, 8477, 8484, 8484, 8486, 8486, 8488, 8488, 8490, 8493, 8495, 8505, 8508, 8511, 8517, 8521, 8526, 8526, 8544, 8584, 11264, 11310, 11312, 11358, 11360, 11492, 11499, 11502, 11506, 11507, 11520, 11557, 11559, 11559, 11565, 11565, 11568, 11623, 11631, 11631, 11648, 11670, 11680, 11686, 11688, 11694, 11696, 11702, 11704, 11710, 11712, 11718, 11720, 11726, 11728, 11734, 11736, 11742, 11823, 11823, 12293, 12295, 12321, 12329, 12337, 12341, 12344, 12348, 12353, 12438, 12445, 12447, 12449, 12538, 12540, 12543, 12549, 12589, 12593, 12686, 12704, 12730, 12784, 12799, 13312, 19893, 19968, 40908, 40960, 42124, 42192, 42237, 42240, 42508, 42512, 42527, 42538, 42539, 42560, 42606, 42623, 42647, 42656, 42735, 42775, 42783, 42786, 42888, 42891, 42894, 42896, 42899, 42912, 42922, 43000, 43009, 43011, 43013, 43015, 43018, 43020, 43042, 43072, 43123, 43138, 43187, 43250, 43255, 43259, 43259, 43274, 43301, 43312, 43334, 43360, 43388, 43396, 43442, 43471, 43471, 43520, 43560, 43584, 43586, 43588, 43595, 43616, 43638, 43642, 43642, 43648, 43695, 43697, 43697, 43701, 43
const unicodeIdentifierPart: u16[] = [170, 170, 181, 181, 186, 186, 192, 214, 216, 246, 248, 705, 710, 721, 736, 740, 748, 748, 750, 750, 768, 884, 886, 887, 890, 893, 902, 902, 904, 906, 908, 908, 910, 929, 931, 1013, 1015, 1153, 1155, 1159, 1162, 1319, 1329, 1366, 1369, 1369, 1377, 1415, 1425, 1469, 1471, 1471, 1473, 1474, 1476, 1477, 1479, 1479, 1488, 1514, 1520, 1522, 1552, 1562, 1568, 1641, 1646, 1747, 1749, 1756, 1759, 1768, 1770, 1788, 1791, 1791, 1808, 1866, 1869, 1969, 1984, 2037, 2042, 2042, 2048, 2093, 2112, 2139, 2208, 2208, 2210, 2220, 2276, 2302, 2304, 2403, 2406, 2415, 2417, 2423, 2425, 2431, 2433, 2435, 2437, 2444, 2447, 2448, 2451, 2472, 2474, 2480, 2482, 2482, 2486, 2489, 2492, 2500, 2503, 2504, 2507, 2510, 2519, 2519, 2524, 2525, 2527, 2531, 2534, 2545, 2561, 2563, 2565, 2570, 2575, 2576, 2579, 2600, 2602, 2608, 2610, 2611, 2613, 2614, 2616, 2617, 2620, 2620, 2622, 2626, 2631, 2632, 2635, 2637, 2641, 2641, 2649, 2652, 2654, 2654, 2662, 2677, 2689, 2691, 2693, 2701, 2703, 2705, 2707, 2728, 2730, 2736, 2738, 2739, 2741, 2745, 2748, 2757, 2759, 2761, 2763, 2765, 2768, 2768, 2784, 2787, 2790, 2799, 2817, 2819, 2821, 2828, 2831, 2832, 2835, 2856, 2858, 2864, 2866, 2867, 2869, 2873, 2876, 2884, 2887, 2888, 2891, 2893, 2902, 2903, 2908, 2909, 2911, 2915, 2918, 2927, 2929, 2929, 2946, 2947, 2949, 2954, 2958, 2960, 2962, 2965, 2969, 2970, 2972, 2972, 2974, 2975, 2979, 2980, 2984, 2986, 2990, 3001, 3006, 3010, 3014, 3016, 3018, 3021, 3024, 3024, 3031, 3031, 3046, 3055, 3073, 3075, 3077, 3084, 3086, 3088, 3090, 3112, 3114, 3123, 3125, 3129, 3133, 3140, 3142, 3144, 3146, 3149, 3157, 3158, 3160, 3161, 3168, 3171, 3174, 3183, 3202, 3203, 3205, 3212, 3214, 3216, 3218, 3240, 3242, 3251, 3253, 3257, 3260, 3268, 3270, 3272, 3274, 3277, 3285, 3286, 3294, 3294, 3296, 3299, 3302, 3311, 3313, 3314, 3330, 3331, 3333, 3340, 3342, 3344, 3346, 3386, 3389, 3396, 3398, 3400, 3402, 3406, 3415, 3415, 3424, 3427, 3430, 3439, 3450, 3455, 3458, 3459, 3461, 3478, 3482, 3505, 3507, 3515, 3517, 3517, 3520, 3526, 3530, 3530, 3535, 3540, 3542, 3542, 3544, 3551, 3570, 3571, 3585, 3642, 3648, 3662, 3664, 3673, 3713, 3714, 3716, 3716, 3719, 3720, 3722, 3722, 3725, 3725, 3732, 3735, 3737, 3743, 3745, 3747, 3749, 3749, 3751, 3751, 3754, 3755, 3757, 3769, 3771, 3773, 3776, 3780, 3782, 3782, 3784, 3789, 3792, 3801, 3804, 3807, 3840, 3840, 3864, 3865, 3872, 3881, 3893, 3893, 3895, 3895, 3897, 3897, 3902, 3911, 3913, 3948, 3953, 3972, 3974, 3991, 3993, 4028, 4038, 4038, 4096, 4169, 4176, 4253, 4256, 4293, 4295, 4295, 4301, 4301, 4304, 4346, 4348, 4680, 4682, 4685, 4688, 4694, 4696, 4696, 4698, 4701, 4704, 4744, 4746, 4749, 4752, 4784, 4786, 4789, 4792, 4798, 4800, 4800, 4802, 4805, 4808, 4822, 4824, 4880, 4882, 4885, 4888, 4954, 4957, 4959, 4992, 5007, 5024, 5108, 5121, 5740, 5743, 5759, 5761, 5786, 5792, 5866, 5870, 5872, 5888, 5900, 5902, 5908, 5920, 5940, 5952, 5971, 5984, 5996, 5998, 6000, 6002, 6003, 6016, 6099, 6103, 6103, 6108, 6109, 6112, 6121, 6155, 6157, 6160, 6169, 6176, 6263, 6272, 6314, 6320, 6389, 6400, 6428, 6432, 6443, 6448, 6459, 6470, 6509, 6512, 6516, 6528, 6571, 6576, 6601, 6608, 6617, 6656, 6683, 6688, 6750, 6752, 6780, 6783, 6793, 6800, 6809, 6823, 6823, 6912, 6987, 6992, 7001, 7019, 7027, 7040, 7155, 7168, 7223, 7232, 7241, 7245, 7293, 7376, 7378, 7380, 7414, 7424, 7654, 7676, 7957, 7960, 7965, 7968, 8005, 8008, 8013, 8016, 8023, 8025, 8025, 8027, 8027, 8029, 8029, 8031, 8061, 8064, 8116, 8118, 8124, 8126, 8126, 8130, 8132, 8134, 8140, 8144, 8147, 8150, 8155, 8160, 8172, 8178, 8180, 8182, 8188, 8204, 8205, 8255, 8256, 8276, 8276, 8305, 8305, 8319, 8319, 8336, 8348, 8400, 8412, 8417, 8417, 8421, 8432, 8450, 8450, 8455, 8455, 8458, 8467, 8469, 8469, 8473, 8477, 8484, 8484, 8486, 8486, 8488, 8488, 8490, 8493, 8495, 8505, 8508, 8511, 8517, 8521, 8526, 8526, 8544, 8584, 11264, 11310, 11312, 11358, 11360, 11492, 11499, 11507, 11520, 11557, 11559, 11559, 11565, 11565, 11568, 11623, 11631, 11631, 11647, 11670, 11680, 11686, 11688, 11694, 11696, 11702, 11704, 11710, 11712, 11718, 11720, 11726, 11728, 11734, 11736, 11742, 11744, 11775
function lookupInUnicodeMap(code: u16, map: u16[]): bool {
if (code < map[0])
return false;
let lo: i32 = 0;
let hi: i32 = map.length;
let mid: i32;
while (lo + 1 < hi) {
mid = lo + (hi - lo) / 2;
mid -= mid % 2;
if (map[mid] <= code && code <= map[mid + 1])
return true;
if (code < map[mid])
hi = mid;
else
lo = mid + 2;
}
return false;
}
function isUnicodeIdentifierStart(code: i32): bool {
if (code < 0 || code > 0xffff) return false;
return lookupInUnicodeMap(code as u16, unicodeIdentifierStart);
}
function isUnicodeIdentifierPart(code: i32): bool {
if (code < 0 || code > 0xffff) return false;
return lookupInUnicodeMap(code as u16, unicodeIdentifierPart);
}