From 44fc13b9e071e3b9a7999d55e8970017ea492ece Mon Sep 17 00:00:00 2001 From: sbosse Date: Mon, 21 Jul 2025 23:21:06 +0200 Subject: [PATCH] Mon 21 Jul 22:43:21 CEST 2025 --- js/ui/webui/luaparse.js | 2359 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 2359 insertions(+) create mode 100644 js/ui/webui/luaparse.js diff --git a/js/ui/webui/luaparse.js b/js/ui/webui/luaparse.js new file mode 100644 index 0000000..d98ad2d --- /dev/null +++ b/js/ui/webui/luaparse.js @@ -0,0 +1,2359 @@ +/* global exports:true, module:true, require:true, define:true, global:true */ + +(function (root, name, factory) { + 'use strict'; + + // Used to determine if values are of the language type `Object` + var objectTypes = { + 'function': true + , 'object': true + } + // Detect free variable `exports` + , freeExports = objectTypes[typeof exports] && exports && !exports.nodeType && exports + // Detect free variable `module` + , freeModule = objectTypes[typeof module] && module && !module.nodeType && module + // Detect free variable `global`, from Node.js or Browserified code, and + // use it as `window` + , freeGlobal = freeExports && freeModule && typeof global === 'object' && global + // Detect the popular CommonJS extension `module.exports` + , moduleExports = freeModule && freeModule.exports === freeExports && freeExports; + + /* istanbul ignore else */ + if (freeGlobal && (freeGlobal.global === freeGlobal || + /* istanbul ignore next */ freeGlobal.window === freeGlobal || + /* istanbul ignore next */ freeGlobal.self === freeGlobal)) { + root = freeGlobal; + } + + // Some AMD build optimizers, like r.js, check for specific condition + // patterns like the following: + /* istanbul ignore if */ + if (typeof define === 'function' && + /* istanbul ignore next */ typeof define.amd === 'object' && + /* istanbul ignore next */ define.amd) { + // defined as an anonymous module. + define(['exports'], factory); + // In case the source has been processed and wrapped in a define module use + // the supplied `exports` object. + if (freeExports && moduleExports) factory(freeModule.exports); + } + // check for `exports` after `define` in case a build optimizer adds an + // `exports` object + else /* istanbul ignore else */ if (freeExports && freeModule) { + // in Node.js or RingoJS v0.8.0+ + /* istanbul ignore else */ + if (moduleExports) factory(freeModule.exports); + // in Narwhal or RingoJS v0.7.0- + else factory(freeExports); + } + // in a browser or Rhino + else { + factory((root[name] = {})); + } +}(this, 'luaparse', function (exports) { + 'use strict'; + + exports.version = '0.2.1'; + + var input, options, length, features; + + // Options can be set either globally on the parser object through + // defaultOptions, or during the parse call. + var defaultOptions = exports.defaultOptions = { + // Explicitly tell the parser when the input ends. + wait: false + // Store comments as an array in the chunk object. + , comments: true + // Track identifier scopes by adding an isLocal attribute to each + // identifier-node. + , scope: false + // Store location information on each syntax node as + // `loc: { start: { line, column }, end: { line, column } }`. + , locations: false + // Store the start and end character locations on each syntax node as + // `range: [start, end]`. + , ranges: false + // A callback which will be invoked when a syntax node has been completed. + // The node which has been created will be passed as the only parameter. + , onCreateNode: null + // A callback which will be invoked when a new scope is created. + , onCreateScope: null + // A callback which will be invoked when the current scope is destroyed. + , onDestroyScope: null + // A callback which will be invoked when a local variable is declared in the current scope. + // The variable's name will be passed as the only parameter + , onLocalDeclaration: null + // The version of Lua targeted by the parser (string; allowed values are + // '5.1', '5.2', '5.3'). + , luaVersion: '5.1' + // Whether to allow code points outside the Basic Latin block in identifiers + , extendedIdentifiers: false + }; + + // The available tokens expressed as enum flags so they can be checked with + // bitwise operations. + + var EOF = 1, StringLiteral = 2, Keyword = 4, Identifier = 8 + , NumericLiteral = 16, Punctuator = 32, BooleanLiteral = 64 + , NilLiteral = 128, VarargLiteral = 256; + + exports.tokenTypes = { EOF: EOF, StringLiteral: StringLiteral + , Keyword: Keyword, Identifier: Identifier, NumericLiteral: NumericLiteral + , Punctuator: Punctuator, BooleanLiteral: BooleanLiteral + , NilLiteral: NilLiteral, VarargLiteral: VarargLiteral + }; + + // As this parser is a bit different from luas own, the error messages + // will be different in some situations. + + var errors = exports.errors = { + unexpected: 'unexpected %1 \'%2\' near \'%3\'' + , expected: '\'%1\' expected near \'%2\'' + , expectedToken: '%1 expected near \'%2\'' + , unfinishedString: 'unfinished string near \'%1\'' + , malformedNumber: 'malformed number near \'%1\'' + , invalidVar: 'invalid left-hand side of assignment near \'%1\'' + , decimalEscapeTooLarge: 'decimal escape too large near \'%1\'' + , invalidEscape: 'invalid escape sequence near \'%1\'' + , hexadecimalDigitExpected: 'hexadecimal digit expected near \'%1\'' + , braceExpected: 'missing \'%1\' near \'%2\'' + , tooLargeCodepoint: 'UTF-8 value too large near \'%1\'' + , unfinishedLongString: 'unfinished long string (starting at line %1) near \'%2\'' + , unfinishedLongComment: 'unfinished long comment (starting at line %1) near \'%2\'' + , ambiguousSyntax: 'ambiguous syntax (function call x new statement) near \'%1\'' + }; + + // ### Abstract Syntax Tree + // + // The default AST structure is inspired by the Mozilla Parser API but can + // easily be customized by overriding these functions. + + var ast = exports.ast = { + labelStatement: function(label) { + return { + type: 'LabelStatement' + , label: label + }; + } + + , breakStatement: function() { + return { + type: 'BreakStatement' + }; + } + + , gotoStatement: function(label) { + return { + type: 'GotoStatement' + , label: label + }; + } + + , returnStatement: function(args) { + return { + type: 'ReturnStatement' + , 'arguments': args + }; + } + + , ifStatement: function(clauses) { + return { + type: 'IfStatement' + , clauses: clauses + }; + } + , ifClause: function(condition, body) { + return { + type: 'IfClause' + , condition: condition + , body: body + }; + } + , elseifClause: function(condition, body) { + return { + type: 'ElseifClause' + , condition: condition + , body: body + }; + } + , elseClause: function(body) { + return { + type: 'ElseClause' + , body: body + }; + } + + , whileStatement: function(condition, body) { + return { + type: 'WhileStatement' + , condition: condition + , body: body + }; + } + + , doStatement: function(body) { + return { + type: 'DoStatement' + , body: body + }; + } + + , repeatStatement: function(condition, body) { + return { + type: 'RepeatStatement' + , condition: condition + , body: body + }; + } + + , localStatement: function(variables, init) { + return { + type: 'LocalStatement' + , variables: variables + , init: init + }; + } + + , assignmentStatement: function(variables, init) { + return { + type: 'AssignmentStatement' + , variables: variables + , init: init + }; + } + + , callStatement: function(expression) { + return { + type: 'CallStatement' + , expression: expression + }; + } + + , functionStatement: function(identifier, parameters, isLocal, body) { + return { + type: 'FunctionDeclaration' + , identifier: identifier + , isLocal: isLocal + , parameters: parameters + , body: body + }; + } + + , forNumericStatement: function(variable, start, end, step, body) { + return { + type: 'ForNumericStatement' + , variable: variable + , start: start + , end: end + , step: step + , body: body + }; + } + + , forGenericStatement: function(variables, iterators, body) { + return { + type: 'ForGenericStatement' + , variables: variables + , iterators: iterators + , body: body + }; + } + + , chunk: function(body) { + return { + type: 'Chunk' + , body: body + }; + } + + , identifier: function(name) { + return { + type: 'Identifier' + , name: name + }; + } + + , literal: function(type, value, raw) { + type = (type === StringLiteral) ? 'StringLiteral' + : (type === NumericLiteral) ? 'NumericLiteral' + : (type === BooleanLiteral) ? 'BooleanLiteral' + : (type === NilLiteral) ? 'NilLiteral' + : 'VarargLiteral'; + + return { + type: type + , value: value + , raw: raw + }; + } + + , tableKey: function(key, value) { + return { + type: 'TableKey' + , key: key + , value: value + }; + } + , tableKeyString: function(key, value) { + return { + type: 'TableKeyString' + , key: key + , value: value + }; + } + , tableValue: function(value) { + return { + type: 'TableValue' + , value: value + }; + } + + + , tableConstructorExpression: function(fields) { + return { + type: 'TableConstructorExpression' + , fields: fields + }; + } + , binaryExpression: function(operator, left, right) { + var type = ('and' === operator || 'or' === operator) ? + 'LogicalExpression' : + 'BinaryExpression'; + + return { + type: type + , operator: operator + , left: left + , right: right + }; + } + , unaryExpression: function(operator, argument) { + return { + type: 'UnaryExpression' + , operator: operator + , argument: argument + }; + } + , memberExpression: function(base, indexer, identifier) { + return { + type: 'MemberExpression' + , indexer: indexer + , identifier: identifier + , base: base + }; + } + + , indexExpression: function(base, index) { + return { + type: 'IndexExpression' + , base: base + , index: index + }; + } + + , callExpression: function(base, args) { + return { + type: 'CallExpression' + , base: base + , 'arguments': args + }; + } + + , tableCallExpression: function(base, args) { + return { + type: 'TableCallExpression' + , base: base + , 'arguments': args + }; + } + + , stringCallExpression: function(base, argument) { + return { + type: 'StringCallExpression' + , base: base + , argument: argument + }; + } + + , comment: function(value, raw) { + return { + type: 'Comment' + , value: value + , raw: raw + }; + } + }; + + // Wrap up the node object. + + function finishNode(node) { + // Pop a `Marker` off the location-array and attach its location data. + if (trackLocations) { + var location = locations.pop(); + location.complete(); + location.bless(node); + } + if (options.onCreateNode) options.onCreateNode(node); + return node; + } + + + // Helpers + // ------- + + var slice = Array.prototype.slice + , toString = Object.prototype.toString + , indexOf = function indexOf(array, element) { + for (var i = 0, length = array.length; i < length; ++i) { + if (array[i] === element) return i; + } + return -1; + }; + + // Iterate through an array of objects and return the index of an object + // with a matching property. + + function indexOfObject(array, property, element) { + for (var i = 0, length = array.length; i < length; ++i) { + if (array[i][property] === element) return i; + } + return -1; + } + + // A sprintf implementation using %index (beginning at 1) to input + // arguments in the format string. + // + // Example: + // + // // Unexpected function in token + // sprintf('Unexpected %2 in %1.', 'token', 'function'); + + function sprintf(format) { + var args = slice.call(arguments, 1); + format = format.replace(/%(\d)/g, function (match, index) { + return '' + args[index - 1] || /* istanbul ignore next */ ''; + }); + return format; + } + + // Returns a new object with the properties from all objectes passed as + // arguments. Last argument takes precedence. + // + // Example: + // + // this.options = extend(options, { output: false }); + + function extend() { + var args = slice.call(arguments) + , dest = {} + , src, prop; + + for (var i = 0, length = args.length; i < length; ++i) { + src = args[i]; + for (prop in src) + /* istanbul ignore else */ + if (src.hasOwnProperty(prop)) { + dest[prop] = src[prop]; + } + } + return dest; + } + + // ### Error functions + + // XXX: Eliminate this function and change the error type to be different from SyntaxError. + // This will unfortunately be a breaking change, because some downstream users depend + // on the error thrown being an instance of SyntaxError. For example, the Ace editor: + // + + function fixupError(e) { + if (!Object.create) + return e; + return Object.create(e, { + 'line': { 'writable': true, value: e.line }, + 'index': { 'writable': true, value: e.index }, + 'column': { 'writable': true, value: e.column } + }); + } + + // #### Raise an exception. + // + // Raise an exception by passing a token, a string format and its paramters. + // + // The passed tokens location will automatically be added to the error + // message if it exists, if not it will default to the lexers current + // position. + // + // Example: + // + // // [1:0] expected [ near ( + // raise(token, "expected %1 near %2", '[', token.value); + + function raise(token) { + var message = sprintf.apply(null, slice.call(arguments, 1)) + , error, col; + + if ('undefined' !== typeof token.line) { + col = token.range[0] - token.lineStart; + error = fixupError(new SyntaxError(sprintf('[%1:%2] %3', token.line, col, message))); + error.line = token.line; + error.index = token.range[0]; + error.column = col; + } else { + col = index - lineStart + 1; + error = fixupError(new SyntaxError(sprintf('[%1:%2] %3', line, col, message))); + error.index = index; + error.line = line; + error.column = col; + } + throw error; + } + + // #### Raise an unexpected token error. + // + // Example: + // + // // expected near '0' + // raiseUnexpectedToken('', token); + + function raiseUnexpectedToken(type, token) { + raise(token, errors.expectedToken, type, token.value); + } + + // #### Raise a general unexpected error + // + // Usage should pass either a token object or a symbol string which was + // expected. We can also specify a nearby token such as , this will + // default to the currently active token. + // + // Example: + // + // // Unexpected symbol 'end' near '' + // unexpected(token); + // + // If there's no token in the buffer it means we have reached . + + function unexpected(found) { + var near = lookahead.value; + if ('undefined' !== typeof found.type) { + var type; + switch (found.type) { + case StringLiteral: type = 'string'; break; + case Keyword: type = 'keyword'; break; + case Identifier: type = 'identifier'; break; + case NumericLiteral: type = 'number'; break; + case Punctuator: type = 'symbol'; break; + case BooleanLiteral: type = 'boolean'; break; + case NilLiteral: + return raise(found, errors.unexpected, 'symbol', 'nil', near); + } + return raise(found, errors.unexpected, type, found.value, near); + } + return raise(found, errors.unexpected, 'symbol', found, near); + } + + // Lexer + // ----- + // + // The lexer, or the tokenizer reads the input string character by character + // and derives a token left-right. To be as efficient as possible the lexer + // prioritizes the common cases such as identifiers. It also works with + // character codes instead of characters as string comparisons was the + // biggest bottleneck of the parser. + // + // If `options.comments` is enabled, all comments encountered will be stored + // in an array which later will be appended to the chunk object. If disabled, + // they will simply be disregarded. + // + // When the lexer has derived a valid token, it will be returned as an object + // containing its value and as well as its position in the input string (this + // is always enabled to provide proper debug messages). + // + // `lex()` starts lexing and returns the following token in the stream. + + var index + , token + , previousToken + , lookahead + , comments + , tokenStart + , line + , lineStart; + + exports.lex = lex; + + function lex() { + skipWhiteSpace(); + + // Skip comments beginning with -- + while (45 === input.charCodeAt(index) && + 45 === input.charCodeAt(index + 1)) { + scanComment(); + skipWhiteSpace(); + } + if (index >= length) return { + type : EOF + , value: '' + , line: line + , lineStart: lineStart + , range: [index, index] + }; + + var charCode = input.charCodeAt(index) + , next = input.charCodeAt(index + 1); + + // Memorize the range index where the token begins. + tokenStart = index; + if (isIdentifierStart(charCode)) return scanIdentifierOrKeyword(); + + switch (charCode) { + case 39: case 34: // '" + return scanStringLiteral(); + + case 48: case 49: case 50: case 51: case 52: case 53: + case 54: case 55: case 56: case 57: // 0-9 + return scanNumericLiteral(); + + case 46: // . + // If the dot is followed by a digit it's a float. + if (isDecDigit(next)) return scanNumericLiteral(); + if (46 === next) { + if (46 === input.charCodeAt(index + 2)) return scanVarargLiteral(); + return scanPunctuator('..'); + } + return scanPunctuator('.'); + + case 61: // = + if (61 === next) return scanPunctuator('=='); + return scanPunctuator('='); + + case 62: // > + if (features.bitwiseOperators) + if (62 === next) return scanPunctuator('>>'); + if (61 === next) return scanPunctuator('>='); + return scanPunctuator('>'); + + case 60: // < + if (features.bitwiseOperators) + if (60 === next) return scanPunctuator('<<'); + if (61 === next) return scanPunctuator('<='); + return scanPunctuator('<'); + + case 126: // ~ + if (61 === next) return scanPunctuator('~='); + if (!features.bitwiseOperators) + break; + return scanPunctuator('~'); + + case 58: // : + if (features.labels) + if (58 === next) return scanPunctuator('::'); + return scanPunctuator(':'); + + case 91: // [ + // Check for a multiline string, they begin with [= or [[ + if (91 === next || 61 === next) return scanLongStringLiteral(); + return scanPunctuator('['); + + case 47: // / + // Check for integer division op (//) + if (features.integerDivision) + if (47 === next) return scanPunctuator('//'); + return scanPunctuator('/'); + + case 38: case 124: // & | + if (!features.bitwiseOperators) + break; + + /* fall through */ + case 42: case 94: case 37: case 44: case 123: case 125: + case 93: case 40: case 41: case 59: case 35: case 45: + case 43: // * ^ % , { } ] ( ) ; # - + + return scanPunctuator(input.charAt(index)); + } + + return unexpected(input.charAt(index)); + } + + // Whitespace has no semantic meaning in lua so simply skip ahead while + // tracking the encounted newlines. Any kind of eol sequence is counted as a + // single line. + + function consumeEOL() { + var charCode = input.charCodeAt(index) + , peekCharCode = input.charCodeAt(index + 1); + + if (isLineTerminator(charCode)) { + // Count \n\r and \r\n as one newline. + if (10 === charCode && 13 === peekCharCode) ++index; + if (13 === charCode && 10 === peekCharCode) ++index; + ++line; + lineStart = ++index; + + return true; + } + return false; + } + + function skipWhiteSpace() { + while (index < length) { + var charCode = input.charCodeAt(index); + if (isWhiteSpace(charCode)) { + ++index; + } else if (!consumeEOL()) { + break; + } + } + } + + function encodeUTF8(codepoint) { + if (codepoint < 0x80) { + return String.fromCharCode(codepoint); + } else if (codepoint < 0x800) { + return String.fromCharCode( + 0xc0 | (codepoint >> 6) , + 0x80 | ( codepoint & 0x3f) + ); + } else if (codepoint < 0x10000) { + return String.fromCharCode( + 0xe0 | (codepoint >> 12) , + 0x80 | ((codepoint >> 6) & 0x3f), + 0x80 | ( codepoint & 0x3f) + ); + } else if (codepoint < 0x110000) { + return String.fromCharCode( + 0xf0 | (codepoint >> 18) , + 0x80 | ((codepoint >> 12) & 0x3f), + 0x80 | ((codepoint >> 6) & 0x3f), + 0x80 | ( codepoint & 0x3f) + ); + } else { + return null; + } + } + + // This function takes a JavaScript string, encodes it in WTF-8 and + // reinterprets the resulting code units as code points; i.e. it encodes + // the string in what was the original meaning of WTF-8. + // + // For a detailed rationale, see the README.md file, section + // "Note on character encodings". + + function fixupHighCharacters(s) { + return s.replace(/[\ud800-\udbff][\udc00-\udfff]|[^\x00-\x7f]/g, function (m) { + if (m.length === 1) + return encodeUTF8(m.charCodeAt(0)); + return encodeUTF8(0x10000 + (((m.charCodeAt(0) & 0x3ff) << 10) | (m.charCodeAt(1) & 0x3ff))); + }); + } + + // Identifiers, keywords, booleans and nil all look the same syntax wise. We + // simply go through them one by one and defaulting to an identifier if no + // previous case matched. + + function scanIdentifierOrKeyword() { + var value, type; + + // Slicing the input string is prefered before string concatenation in a + // loop for performance reasons. + while (isIdentifierPart(input.charCodeAt(++index))); + value = fixupHighCharacters(input.slice(tokenStart, index)); + + // Decide on the token type and possibly cast the value. + if (isKeyword(value)) { + type = Keyword; + } else if ('true' === value || 'false' === value) { + type = BooleanLiteral; + value = ('true' === value); + } else if ('nil' === value) { + type = NilLiteral; + value = null; + } else { + type = Identifier; + } + + return { + type: type + , value: value + , line: line + , lineStart: lineStart + , range: [tokenStart, index] + }; + } + + // Once a punctuator reaches this function it should already have been + // validated so we simply return it as a token. + + function scanPunctuator(value) { + index += value.length; + return { + type: Punctuator + , value: value + , line: line + , lineStart: lineStart + , range: [tokenStart, index] + }; + } + + // A vararg literal consists of three dots. + + function scanVarargLiteral() { + index += 3; + return { + type: VarargLiteral + , value: '...' + , line: line + , lineStart: lineStart + , range: [tokenStart, index] + }; + } + + // Find the string literal by matching the delimiter marks used. + + function scanStringLiteral() { + var delimiter = input.charCodeAt(index++) + , beginLine = line + , beginLineStart = lineStart + , stringStart = index + , string = '' + , charCode; + + while (index < length) { + charCode = input.charCodeAt(index++); + if (delimiter === charCode) break; + if (92 === charCode) { // backslash + string += fixupHighCharacters(input.slice(stringStart, index - 1)) + readEscapeSequence(); + stringStart = index; + } + // EOF or `\n` terminates a string literal. If we haven't found the + // ending delimiter by now, raise an exception. + else if (index >= length || isLineTerminator(charCode)) { + string += input.slice(stringStart, index - 1); + raise({}, errors.unfinishedString, string + String.fromCharCode(charCode)); + } + } + string += fixupHighCharacters(input.slice(stringStart, index - 1)); + + return { + type: StringLiteral + , value: string + , line: beginLine + , lineStart: beginLineStart + , lastLine: line + , lastLineStart: lineStart + , range: [tokenStart, index] + }; + } + + // Expect a multiline string literal and return it as a regular string + // literal, if it doesn't validate into a valid multiline string, throw an + // exception. + + function scanLongStringLiteral() { + var beginLine = line + , beginLineStart = lineStart + , string = readLongString(false); + // Fail if it's not a multiline literal. + if (false === string) raise(token, errors.expected, '[', token.value); + + return { + type: StringLiteral + , value: fixupHighCharacters(string) + , line: beginLine + , lineStart: beginLineStart + , lastLine: line + , lastLineStart: lineStart + , range: [tokenStart, index] + }; + } + + // Numeric literals will be returned as floating-point numbers instead of + // strings. The raw value should be retrieved from slicing the input string + // later on in the process. + // + // If a hexadecimal number is encountered, it will be converted. + + function scanNumericLiteral() { + var character = input.charAt(index) + , next = input.charAt(index + 1); + + var value = ('0' === character && 'xX'.indexOf(next || null) >= 0) ? + readHexLiteral() : readDecLiteral(); + + return { + type: NumericLiteral + , value: value + , line: line + , lineStart: lineStart + , range: [tokenStart, index] + }; + } + + // Lua hexadecimals have an optional fraction part and an optional binary + // exoponent part. These are not included in JavaScript so we will compute + // all three parts separately and then sum them up at the end of the function + // with the following algorithm. + // + // Digit := toDec(digit) + // Fraction := toDec(fraction) / 16 ^ fractionCount + // BinaryExp := 2 ^ binaryExp + // Number := ( Digit + Fraction ) * BinaryExp + + function readHexLiteral() { + var fraction = 0 // defaults to 0 as it gets summed + , binaryExponent = 1 // defaults to 1 as it gets multiplied + , binarySign = 1 // positive + , digit, fractionStart, exponentStart, digitStart; + + digitStart = index += 2; // Skip 0x part + + // A minimum of one hex digit is required. + if (!isHexDigit(input.charCodeAt(index))) + raise({}, errors.malformedNumber, input.slice(tokenStart, index)); + + while (isHexDigit(input.charCodeAt(index))) ++index; + // Convert the hexadecimal digit to base 10. + digit = parseInt(input.slice(digitStart, index), 16); + + // Fraction part i optional. + if ('.' === input.charAt(index)) { + fractionStart = ++index; + + while (isHexDigit(input.charCodeAt(index))) ++index; + fraction = input.slice(fractionStart, index); + + // Empty fraction parts should default to 0, others should be converted + // 0.x form so we can use summation at the end. + fraction = (fractionStart === index) ? 0 + : parseInt(fraction, 16) / Math.pow(16, index - fractionStart); + } + + // Binary exponents are optional + if ('pP'.indexOf(input.charAt(index) || null) >= 0) { + ++index; + + // Sign part is optional and defaults to 1 (positive). + if ('+-'.indexOf(input.charAt(index) || null) >= 0) + binarySign = ('+' === input.charAt(index++)) ? 1 : -1; + + exponentStart = index; + + // The binary exponent sign requires a decimal digit. + if (!isDecDigit(input.charCodeAt(index))) + raise({}, errors.malformedNumber, input.slice(tokenStart, index)); + + while (isDecDigit(input.charCodeAt(index))) ++index; + binaryExponent = input.slice(exponentStart, index); + + // Calculate the binary exponent of the number. + binaryExponent = Math.pow(2, binaryExponent * binarySign); + } + + return (digit + fraction) * binaryExponent; + } + + // Decimal numbers are exactly the same in Lua and in JavaScript, because of + // this we check where the token ends and then parse it with native + // functions. + + function readDecLiteral() { + while (isDecDigit(input.charCodeAt(index))) ++index; + // Fraction part is optional + if ('.' === input.charAt(index)) { + ++index; + // Fraction part defaults to 0 + while (isDecDigit(input.charCodeAt(index))) ++index; + } + // Exponent part is optional. + if ('eE'.indexOf(input.charAt(index) || null) >= 0) { + ++index; + // Sign part is optional. + if ('+-'.indexOf(input.charAt(index) || null) >= 0) ++index; + // An exponent is required to contain at least one decimal digit. + if (!isDecDigit(input.charCodeAt(index))) + raise({}, errors.malformedNumber, input.slice(tokenStart, index)); + + while (isDecDigit(input.charCodeAt(index))) ++index; + } + + return parseFloat(input.slice(tokenStart, index)); + } + + function readUnicodeEscapeSequence() { + var sequenceStart = index++; + + if (input.charAt(index++) !== '{') + raise({}, errors.braceExpected, '{', '\\' + input.slice(sequenceStart, index)); + if (!isHexDigit(input.charCodeAt(index))) + raise({}, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index)); + + while (input.charCodeAt(index) === 0x30) ++index; + var escStart = index; + + while (isHexDigit(input.charCodeAt(index))) { + ++index; + if (index - escStart > 6) + raise({}, errors.tooLargeCodepoint, '\\' + input.slice(sequenceStart, index)); + } + + var b = input.charAt(index++); + if (b !== '}') { + if ((b === '"') || (b === "'")) + raise({}, errors.braceExpected, '}', '\\' + input.slice(sequenceStart, index--)); + else + raise({}, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index)); + } + + var codepoint = parseInt(input.slice(escStart, index - 1), 16); + + codepoint = encodeUTF8(codepoint); + if (codepoint === null) { + raise({}, errors.tooLargeCodepoint, '\\' + input.slice(sequenceStart, index)); + } + return codepoint; + } + + // Translate escape sequences to the actual characters. + function readEscapeSequence() { + var sequenceStart = index; + switch (input.charAt(index)) { + // Lua allow the following escape sequences. + case 'a': ++index; return '\x07'; + case 'n': ++index; return '\n'; + case 'r': ++index; return '\r'; + case 't': ++index; return '\t'; + case 'v': ++index; return '\x0b'; + case 'b': ++index; return '\b'; + case 'f': ++index; return '\f'; + + // Backslash at the end of the line. We treat all line endings as equivalent, + // and as representing the [LF] character (code 10). Lua 5.1 through 5.3 + // have been verified to behave the same way. + case '\r': + case '\n': + consumeEOL(); + return '\n'; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + // \ddd, where ddd is a sequence of up to three decimal digits. + while (isDecDigit(input.charCodeAt(index)) && index - sequenceStart < 3) ++index; + + var ddd = parseInt(input.slice(sequenceStart, index), 10); + if (ddd > 255) { + raise({}, errors.decimalEscapeTooLarge, '\\' + ddd); + } + return String.fromCharCode(ddd); + + case 'z': + if (features.skipWhitespaceEscape) { + ++index; + skipWhiteSpace(); + return ''; + } + + /* fall through */ + case 'x': + if (features.hexEscapes) { + // \xXX, where XX is a sequence of exactly two hexadecimal digits + if (isHexDigit(input.charCodeAt(index + 1)) && + isHexDigit(input.charCodeAt(index + 2))) { + index += 3; + return String.fromCharCode(parseInt(input.slice(sequenceStart + 1, index), 16)); + } + raise({}, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index + 2)); + } + + /* fall through */ + case 'u': + if (features.unicodeEscapes) { + return readUnicodeEscapeSequence(); + } + + /* fall through */ + default: + if (features.strictEscapes) + raise({}, errors.invalidEscape, '\\' + input.slice(sequenceStart, index + 1)); + + /* fall through */ + case '\\': case '"': case "'": + return input.charAt(index++); + } + } + + // Comments begin with -- after which it will be decided if they are + // multiline comments or not. + // + // The multiline functionality works the exact same way as with string + // literals so we reuse the functionality. + + function scanComment() { + tokenStart = index; + index += 2; // -- + + var character = input.charAt(index) + , content = '' + , isLong = false + , commentStart = index + , lineStartComment = lineStart + , lineComment = line; + + if ('[' === character) { + content = readLongString(true); + // This wasn't a multiline comment after all. + if (false === content) content = character; + else isLong = true; + } + // Scan until next line as long as it's not a multiline comment. + if (!isLong) { + while (index < length) { + if (isLineTerminator(input.charCodeAt(index))) break; + ++index; + } + if (options.comments) content = input.slice(commentStart, index); + } + + if (options.comments) { + var node = ast.comment(content, input.slice(tokenStart, index)); + + // `Marker`s depend on tokens available in the parser and as comments are + // intercepted in the lexer all location data is set manually. + if (options.locations) { + node.loc = { + start: { line: lineComment, column: tokenStart - lineStartComment } + , end: { line: line, column: index - lineStart } + }; + } + if (options.ranges) { + node.range = [tokenStart, index]; + } + if (options.onCreateNode) options.onCreateNode(node); + comments.push(node); + } + } + + // Read a multiline string by calculating the depth of `=` characters and + // then appending until an equal depth is found. + + function readLongString(isComment) { + var level = 0 + , content = '' + , terminator = false + , character, stringStart, firstLine = line; + + ++index; // [ + + // Calculate the depth of the comment. + while ('=' === input.charAt(index + level)) ++level; + // Exit, this is not a long string afterall. + if ('[' !== input.charAt(index + level)) return false; + + index += level + 1; + + // If the first character is a newline, ignore it and begin on next line. + if (isLineTerminator(input.charCodeAt(index))) consumeEOL(); + + stringStart = index; + while (index < length) { + // To keep track of line numbers run the `consumeEOL()` which increments + // its counter. + while (isLineTerminator(input.charCodeAt(index))) consumeEOL(); + + character = input.charAt(index++); + + // Once the delimiter is found, iterate through the depth count and see + // if it matches. + if (']' === character) { + terminator = true; + for (var i = 0; i < level; ++i) { + if ('=' !== input.charAt(index + i)) terminator = false; + } + if (']' !== input.charAt(index + level)) terminator = false; + } + + // We reached the end of the multiline string. Get out now. + if (terminator) { + content += input.slice(stringStart, index - 1); + index += level + 1; + return content; + } + } + + raise({}, isComment ? + errors.unfinishedLongComment : + errors.unfinishedLongString, + firstLine, ''); + } + + // ## Lex functions and helpers. + + // Read the next token. + // + // This is actually done by setting the current token to the lookahead and + // reading in the new lookahead token. + + function next() { + previousToken = token; + token = lookahead; + lookahead = lex(); + } + + // Consume a token if its value matches. Once consumed or not, return the + // success of the operation. + + function consume(value) { + if (value === token.value) { + next(); + return true; + } + return false; + } + + // Expect the next token value to match. If not, throw an exception. + + function expect(value) { + if (value === token.value) next(); + else raise(token, errors.expected, value, token.value); + } + + // ### Validation functions + + function isWhiteSpace(charCode) { + return 9 === charCode || 32 === charCode || 0xB === charCode || 0xC === charCode; + } + + function isLineTerminator(charCode) { + return 10 === charCode || 13 === charCode; + } + + function isDecDigit(charCode) { + return charCode >= 48 && charCode <= 57; + } + + function isHexDigit(charCode) { + return (charCode >= 48 && charCode <= 57) || (charCode >= 97 && charCode <= 102) || (charCode >= 65 && charCode <= 70); + } + + // From [Lua 5.2](http://www.lua.org/manual/5.2/manual.html#8.1) onwards + // identifiers cannot use 'locale-dependent' letters (i.e. dependent on the C locale). + // On the other hand, LuaJIT allows arbitrary octets ≥ 128 in identifiers. + + function isIdentifierStart(charCode) { + if ((charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) || 95 === charCode) + return true; + if (options.extendedIdentifiers && charCode >= 128) + return true; + return false; + } + + function isIdentifierPart(charCode) { + if ((charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) || 95 === charCode || (charCode >= 48 && charCode <= 57)) + return true; + if (options.extendedIdentifiers && charCode >= 128) + return true; + return false; + } + + // [3.1 Lexical Conventions](http://www.lua.org/manual/5.2/manual.html#3.1) + // + // `true`, `false` and `nil` will not be considered keywords, but literals. + + function isKeyword(id) { + switch (id.length) { + case 2: + return 'do' === id || 'if' === id || 'in' === id || 'or' === id; + case 3: + return 'and' === id || 'end' === id || 'for' === id || 'not' === id; + case 4: + if ('else' === id || 'then' === id) + return true; + if (features.labels && !features.contextualGoto) + return ('goto' === id); + return false; + case 5: + return 'break' === id || 'local' === id || 'until' === id || 'while' === id; + case 6: + return 'elseif' === id || 'repeat' === id || 'return' === id; + case 8: + return 'function' === id; + } + return false; + } + + function isUnary(token) { + if (Punctuator === token.type) return '#-~'.indexOf(token.value) >= 0; + if (Keyword === token.type) return 'not' === token.value; + return false; + } + + // @TODO this needs to be rethought. + function isCallExpression(expression) { + switch (expression.type) { + case 'CallExpression': + case 'TableCallExpression': + case 'StringCallExpression': + return true; + } + return false; + } + + // Check if the token syntactically closes a block. + + function isBlockFollow(token) { + if (EOF === token.type) return true; + if (Keyword !== token.type) return false; + switch (token.value) { + case 'else': case 'elseif': + case 'end': case 'until': + return true; + default: + return false; + } + } + + // Scope + // ----- + + // Store each block scope as a an array of identifier names. Each scope is + // stored in an FILO-array. + var scopes + // The current scope index + , scopeDepth + // A list of all global identifier nodes. + , globals; + + // Create a new scope inheriting all declarations from the previous scope. + function createScope() { + var scope = Array.apply(null, scopes[scopeDepth++]); + scopes.push(scope); + if (options.onCreateScope) options.onCreateScope(); + } + + // Exit and remove the current scope. + function destroyScope() { + var scope = scopes.pop(); + scopeDepth--; + if (options.onDestroyScope) options.onDestroyScope(); + } + + // Add identifier name to the current scope if it doesnt already exist. + function scopeIdentifierName(name) { + if (options.onLocalDeclaration) options.onLocalDeclaration(name); + if (-1 !== indexOf(scopes[scopeDepth], name)) return; + scopes[scopeDepth].push(name); + } + + // Add identifier to the current scope + function scopeIdentifier(node) { + scopeIdentifierName(node.name); + attachScope(node, true); + } + + // Attach scope information to node. If the node is global, store it in the + // globals array so we can return the information to the user. + function attachScope(node, isLocal) { + if (!isLocal && -1 === indexOfObject(globals, 'name', node.name)) + globals.push(node); + + node.isLocal = isLocal; + } + + // Is the identifier name available in this scope. + function scopeHasName(name) { + return (-1 !== indexOf(scopes[scopeDepth], name)); + } + + // Location tracking + // ----------------- + // + // Locations are stored in FILO-array as a `Marker` object consisting of both + // `loc` and `range` data. Once a `Marker` is popped off the list an end + // location is added and the data is attached to a syntax node. + + var locations = [] + , trackLocations; + + function createLocationMarker() { + return new Marker(token); + } + + function Marker(token) { + if (options.locations) { + this.loc = { + start: { + line: token.line + , column: token.range[0] - token.lineStart + } + , end: { + line: 0 + , column: 0 + } + }; + } + if (options.ranges) this.range = [token.range[0], 0]; + } + + // Complete the location data stored in the `Marker` by adding the location + // of the *previous token* as an end location. + Marker.prototype.complete = function() { + if (options.locations) { + this.loc.end.line = previousToken.lastLine || previousToken.line; + this.loc.end.column = previousToken.range[1] - (previousToken.lastLineStart || previousToken.lineStart); + } + if (options.ranges) { + this.range[1] = previousToken.range[1]; + } + }; + + Marker.prototype.bless = function (node) { + if (this.loc) { + var loc = this.loc; + node.loc = { + start: { + line: loc.start.line, + column: loc.start.column + }, + end: { + line: loc.end.line, + column: loc.end.column + } + }; + } + if (this.range) { + node.range = [ + this.range[0], + this.range[1] + ]; + } + }; + + // Create a new `Marker` and add it to the FILO-array. + function markLocation() { + if (trackLocations) locations.push(createLocationMarker()); + } + + // Push an arbitrary `Marker` object onto the FILO-array. + function pushLocation(marker) { + if (trackLocations) locations.push(marker); + } + + // Parse functions + // --------------- + + // Chunk is the main program object. Syntactically it's the same as a block. + // + // chunk ::= block + + function parseChunk() { + next(); + markLocation(); + if (options.scope) createScope(); + var body = parseBlock(); + if (options.scope) destroyScope(); + if (EOF !== token.type) unexpected(token); + // If the body is empty no previousToken exists when finishNode runs. + if (trackLocations && !body.length) previousToken = token; + return finishNode(ast.chunk(body)); + } + + // A block contains a list of statements with an optional return statement + // as its last statement. + // + // block ::= {stat} [retstat] + + function parseBlock(terminator) { + var block = [] + , statement; + + while (!isBlockFollow(token)) { + // Return has to be the last statement in a block. + if ('return' === token.value) { + block.push(parseStatement()); + break; + } + statement = parseStatement(); + consume(';'); + // Statements are only added if they are returned, this allows us to + // ignore some statements, such as EmptyStatement. + if (statement) block.push(statement); + } + + // Doesn't really need an ast node + return block; + } + + // There are two types of statements, simple and compound. + // + // statement ::= break | goto | do | while | repeat | return + // | if | for | function | local | label | assignment + // | functioncall | ';' + + function parseStatement() { + markLocation(); + if (Keyword === token.type) { + switch (token.value) { + case 'local': next(); return parseLocalStatement(); + case 'if': next(); return parseIfStatement(); + case 'return': next(); return parseReturnStatement(); + case 'function': next(); + var name = parseFunctionName(); + return parseFunctionDeclaration(name); + case 'while': next(); return parseWhileStatement(); + case 'for': next(); return parseForStatement(); + case 'repeat': next(); return parseRepeatStatement(); + case 'break': next(); return parseBreakStatement(); + case 'do': next(); return parseDoStatement(); + case 'goto': next(); return parseGotoStatement(); + } + } + + if (features.contextualGoto && + token.type === Identifier && token.value === 'goto' && + lookahead.type === Identifier && lookahead.value !== 'goto') { + next(); return parseGotoStatement(); + } + + if (Punctuator === token.type) { + if (consume('::')) return parseLabelStatement(); + } + // Assignments memorizes the location and pushes it manually for wrapper + // nodes. Additionally empty `;` statements should not mark a location. + if (trackLocations) locations.pop(); + + // When a `;` is encounted, simply eat it without storing it. + if (features.emptyStatement) { + if (consume(';')) return; + } + + return parseAssignmentOrCallStatement(); + } + + // ## Statements + + // label ::= '::' Name '::' + + function parseLabelStatement() { + var name = token.value + , label = parseIdentifier(); + + if (options.scope) { + scopeIdentifierName('::' + name + '::'); + attachScope(label, true); + } + + expect('::'); + return finishNode(ast.labelStatement(label)); + } + + // break ::= 'break' + + function parseBreakStatement() { + return finishNode(ast.breakStatement()); + } + + // goto ::= 'goto' Name + + function parseGotoStatement() { + var name = token.value + , label = parseIdentifier(); + + return finishNode(ast.gotoStatement(label)); + } + + // do ::= 'do' block 'end' + + function parseDoStatement() { + if (options.scope) createScope(); + var body = parseBlock(); + if (options.scope) destroyScope(); + expect('end'); + return finishNode(ast.doStatement(body)); + } + + // while ::= 'while' exp 'do' block 'end' + + function parseWhileStatement() { + var condition = parseExpectedExpression(); + expect('do'); + if (options.scope) createScope(); + var body = parseBlock(); + if (options.scope) destroyScope(); + expect('end'); + return finishNode(ast.whileStatement(condition, body)); + } + + // repeat ::= 'repeat' block 'until' exp + + function parseRepeatStatement() { + if (options.scope) createScope(); + var body = parseBlock(); + expect('until'); + var condition = parseExpectedExpression(); + if (options.scope) destroyScope(); + return finishNode(ast.repeatStatement(condition, body)); + } + + // retstat ::= 'return' [exp {',' exp}] [';'] + + function parseReturnStatement() { + var expressions = []; + + if ('end' !== token.value) { + var expression = parseExpression(); + if (null != expression) expressions.push(expression); + while (consume(',')) { + expression = parseExpectedExpression(); + expressions.push(expression); + } + consume(';'); // grammar tells us ; is optional here. + } + return finishNode(ast.returnStatement(expressions)); + } + + // if ::= 'if' exp 'then' block {elif} ['else' block] 'end' + // elif ::= 'elseif' exp 'then' block + + function parseIfStatement() { + var clauses = [] + , condition + , body + , marker; + + // IfClauses begin at the same location as the parent IfStatement. + // It ends at the start of `end`, `else`, or `elseif`. + if (trackLocations) { + marker = locations[locations.length - 1]; + locations.push(marker); + } + condition = parseExpectedExpression(); + expect('then'); + if (options.scope) createScope(); + body = parseBlock(); + if (options.scope) destroyScope(); + clauses.push(finishNode(ast.ifClause(condition, body))); + + if (trackLocations) marker = createLocationMarker(); + while (consume('elseif')) { + pushLocation(marker); + condition = parseExpectedExpression(); + expect('then'); + if (options.scope) createScope(); + body = parseBlock(); + if (options.scope) destroyScope(); + clauses.push(finishNode(ast.elseifClause(condition, body))); + if (trackLocations) marker = createLocationMarker(); + } + + if (consume('else')) { + // Include the `else` in the location of ElseClause. + if (trackLocations) { + marker = new Marker(previousToken); + locations.push(marker); + } + if (options.scope) createScope(); + body = parseBlock(); + if (options.scope) destroyScope(); + clauses.push(finishNode(ast.elseClause(body))); + } + + expect('end'); + return finishNode(ast.ifStatement(clauses)); + } + + // There are two types of for statements, generic and numeric. + // + // for ::= Name '=' exp ',' exp [',' exp] 'do' block 'end' + // for ::= namelist 'in' explist 'do' block 'end' + // namelist ::= Name {',' Name} + // explist ::= exp {',' exp} + + function parseForStatement() { + var variable = parseIdentifier() + , body; + + // The start-identifier is local. + + if (options.scope) { + createScope(); + scopeIdentifier(variable); + } + + // If the first expression is followed by a `=` punctuator, this is a + // Numeric For Statement. + if (consume('=')) { + // Start expression + var start = parseExpectedExpression(); + expect(','); + // End expression + var end = parseExpectedExpression(); + // Optional step expression + var step = consume(',') ? parseExpectedExpression() : null; + + expect('do'); + body = parseBlock(); + expect('end'); + if (options.scope) destroyScope(); + + return finishNode(ast.forNumericStatement(variable, start, end, step, body)); + } + // If not, it's a Generic For Statement + else { + // The namelist can contain one or more identifiers. + var variables = [variable]; + while (consume(',')) { + variable = parseIdentifier(); + // Each variable in the namelist is locally scoped. + if (options.scope) scopeIdentifier(variable); + variables.push(variable); + } + expect('in'); + var iterators = []; + + // One or more expressions in the explist. + do { + var expression = parseExpectedExpression(); + iterators.push(expression); + } while (consume(',')); + + expect('do'); + body = parseBlock(); + expect('end'); + if (options.scope) destroyScope(); + + return finishNode(ast.forGenericStatement(variables, iterators, body)); + } + } + + // Local statements can either be variable assignments or function + // definitions. If a function definition is found, it will be delegated to + // `parseFunctionDeclaration()` with the isLocal flag. + // + // This AST structure might change into a local assignment with a function + // child. + // + // local ::= 'local' 'function' Name funcdecl + // | 'local' Name {',' Name} ['=' exp {',' exp}] + + function parseLocalStatement() { + var name; + + if (Identifier === token.type) { + var variables = [] + , init = []; + + do { + name = parseIdentifier(); + + variables.push(name); + } while (consume(',')); + + if (consume('=')) { + do { + var expression = parseExpectedExpression(); + init.push(expression); + } while (consume(',')); + } + + // Declarations doesn't exist before the statement has been evaluated. + // Therefore assignments can't use their declarator. And the identifiers + // shouldn't be added to the scope until the statement is complete. + if (options.scope) { + for (var i = 0, l = variables.length; i < l; ++i) { + scopeIdentifier(variables[i]); + } + } + + return finishNode(ast.localStatement(variables, init)); + } + if (consume('function')) { + name = parseIdentifier(); + + if (options.scope) { + scopeIdentifier(name); + createScope(); + } + + // MemberExpressions are not allowed in local function statements. + return parseFunctionDeclaration(name, true); + } else { + raiseUnexpectedToken('', token); + } + } + + function validateVar(node) { + // @TODO we need something not dependent on the exact AST used. see also isCallExpression() + if (node.inParens || (['Identifier', 'MemberExpression', 'IndexExpression'].indexOf(node.type) === -1)) { + raise(token, errors.invalidVar, token.value); + } + } + + // assignment ::= varlist '=' explist + // var ::= Name | prefixexp '[' exp ']' | prefixexp '.' Name + // varlist ::= var {',' var} + // explist ::= exp {',' exp} + // + // call ::= callexp + // callexp ::= prefixexp args | prefixexp ':' Name args + + function parseAssignmentOrCallStatement() { + // Keep a reference to the previous token for better error messages in case + // of invalid statement + var previous = token + , expression, marker; + + if (trackLocations) marker = createLocationMarker(); + expression = parsePrefixExpression(); + + if (null == expression) return unexpected(token); + if (',='.indexOf(token.value) >= 0) { + var variables = [expression] + , init = [] + , exp; + + validateVar(expression); + while (consume(',')) { + exp = parsePrefixExpression(); + if (null == exp) raiseUnexpectedToken('', token); + validateVar(exp); + variables.push(exp); + } + expect('='); + do { + exp = parseExpectedExpression(); + init.push(exp); + } while (consume(',')); + + pushLocation(marker); + return finishNode(ast.assignmentStatement(variables, init)); + } + if (isCallExpression(expression)) { + pushLocation(marker); + return finishNode(ast.callStatement(expression)); + } + // The prefix expression was neither part of an assignment or a + // callstatement, however as it was valid it's been consumed, so raise + // the exception on the previous token to provide a helpful message. + return unexpected(previous); + } + + + + // ### Non-statements + + // Identifier ::= Name + + function parseIdentifier() { + markLocation(); + var identifier = token.value; + if (Identifier !== token.type) raiseUnexpectedToken('', token); + next(); + return finishNode(ast.identifier(identifier)); + } + + // Parse the functions parameters and body block. The name should already + // have been parsed and passed to this declaration function. By separating + // this we allow for anonymous functions in expressions. + // + // For local functions there's a boolean parameter which needs to be set + // when parsing the declaration. + // + // funcdecl ::= '(' [parlist] ')' block 'end' + // parlist ::= Name {',' Name} | [',' '...'] | '...' + + function parseFunctionDeclaration(name, isLocal) { + var parameters = []; + expect('('); + + // The declaration has arguments + if (!consume(')')) { + // Arguments are a comma separated list of identifiers, optionally ending + // with a vararg. + while (true) { + if (Identifier === token.type) { + var parameter = parseIdentifier(); + // Function parameters are local. + if (options.scope) scopeIdentifier(parameter); + + parameters.push(parameter); + + if (consume(',')) continue; + else if (consume(')')) break; + } + // No arguments are allowed after a vararg. + else if (VarargLiteral === token.type) { + parameters.push(parsePrimaryExpression()); + expect(')'); + break; + } else { + raiseUnexpectedToken(' or \'...\'', token); + } + } + } + + var body = parseBlock(); + expect('end'); + if (options.scope) destroyScope(); + + isLocal = isLocal || false; + return finishNode(ast.functionStatement(name, parameters, isLocal, body)); + } + + // Parse the function name as identifiers and member expressions. + // + // Name {'.' Name} [':' Name] + + function parseFunctionName() { + var base, name, marker; + + if (trackLocations) marker = createLocationMarker(); + base = parseIdentifier(); + + if (options.scope) { + attachScope(base, scopeHasName(base.name)); + createScope(); + } + + while (consume('.')) { + pushLocation(marker); + name = parseIdentifier(); + base = finishNode(ast.memberExpression(base, '.', name)); + } + + if (consume(':')) { + pushLocation(marker); + name = parseIdentifier(); + base = finishNode(ast.memberExpression(base, ':', name)); + if (options.scope) scopeIdentifierName('self'); + } + + return base; + } + + // tableconstructor ::= '{' [fieldlist] '}' + // fieldlist ::= field {fieldsep field} fieldsep + // field ::= '[' exp ']' '=' exp | Name = 'exp' | exp + // + // fieldsep ::= ',' | ';' + + function parseTableConstructor() { + var fields = [] + , key, value; + + while (true) { + markLocation(); + if (Punctuator === token.type && consume('[')) { + key = parseExpectedExpression(); + expect(']'); + expect('='); + value = parseExpectedExpression(); + fields.push(finishNode(ast.tableKey(key, value))); + } else if (Identifier === token.type) { + if ('=' === lookahead.value) { + key = parseIdentifier(); + next(); + value = parseExpectedExpression(); + fields.push(finishNode(ast.tableKeyString(key, value))); + } else { + value = parseExpectedExpression(); + fields.push(finishNode(ast.tableValue(value))); + } + } else { + if (null == (value = parseExpression())) { + locations.pop(); + break; + } + fields.push(finishNode(ast.tableValue(value))); + } + if (',;'.indexOf(token.value) >= 0) { + next(); + continue; + } + break; + } + expect('}'); + return finishNode(ast.tableConstructorExpression(fields)); + } + + // Expression parser + // ----------------- + // + // Expressions are evaluated and always return a value. If nothing is + // matched null will be returned. + // + // exp ::= (unop exp | primary | prefixexp ) { binop exp } + // + // primary ::= nil | false | true | Number | String | '...' + // | functiondef | tableconstructor + // + // prefixexp ::= (Name | '(' exp ')' ) { '[' exp ']' + // | '.' Name | ':' Name args | args } + // + + function parseExpression() { + var expression = parseSubExpression(0); + return expression; + } + + // Parse an expression expecting it to be valid. + + function parseExpectedExpression() { + var expression = parseExpression(); + if (null == expression) raiseUnexpectedToken('', token); + else return expression; + } + + + // Return the precedence priority of the operator. + // + // As unary `-` can't be distinguished from binary `-`, unary precedence + // isn't described in this table but in `parseSubExpression()` itself. + // + // As this function gets hit on every expression it's been optimized due to + // the expensive CompareICStub which took ~8% of the parse time. + + function binaryPrecedence(operator) { + var charCode = operator.charCodeAt(0) + , length = operator.length; + + if (1 === length) { + switch (charCode) { + case 94: return 12; // ^ + case 42: case 47: case 37: return 10; // * / % + case 43: case 45: return 9; // + - + case 38: return 6; // & + case 126: return 5; // ~ + case 124: return 4; // | + case 60: case 62: return 3; // < > + } + } else if (2 === length) { + switch (charCode) { + case 47: return 10; // // + case 46: return 8; // .. + case 60: case 62: + if('<<' === operator || '>>' === operator) return 7; // << >> + return 3; // <= >= + case 61: case 126: return 3; // == ~= + case 111: return 1; // or + } + } else if (97 === charCode && 'and' === operator) return 2; + return 0; + } + + // Implement an operator-precedence parser to handle binary operator + // precedence. + // + // We use this algorithm because it's compact, it's fast and Lua core uses + // the same so we can be sure our expressions are parsed in the same manner + // without excessive amounts of tests. + // + // exp ::= (unop exp | primary | prefixexp ) { binop exp } + + function parseSubExpression(minPrecedence) { + var operator = token.value + // The left-hand side in binary operations. + , expression, marker; + + if (trackLocations) marker = createLocationMarker(); + + // UnaryExpression + if (isUnary(token)) { + markLocation(); + next(); + var argument = parseSubExpression(10); + if (argument == null) raiseUnexpectedToken('', token); + expression = finishNode(ast.unaryExpression(operator, argument)); + } + if (null == expression) { + // PrimaryExpression + expression = parsePrimaryExpression(); + + // PrefixExpression + if (null == expression) { + expression = parsePrefixExpression(); + } + } + // This is not a valid left hand expression. + if (null == expression) return null; + + var precedence; + while (true) { + operator = token.value; + + precedence = (Punctuator === token.type || Keyword === token.type) ? + binaryPrecedence(operator) : 0; + + if (precedence === 0 || precedence <= minPrecedence) break; + // Right-hand precedence operators + if ('^' === operator || '..' === operator) precedence--; + next(); + var right = parseSubExpression(precedence); + if (null == right) raiseUnexpectedToken('', token); + // Push in the marker created before the loop to wrap its entirety. + if (trackLocations) locations.push(marker); + expression = finishNode(ast.binaryExpression(operator, expression, right)); + + } + return expression; + } + + // prefixexp ::= prefix {suffix} + // prefix ::= Name | '(' exp ')' + // suffix ::= '[' exp ']' | '.' Name | ':' Name args | args + // + // args ::= '(' [explist] ')' | tableconstructor | String + + function parsePrefixExpression() { + var base, name, marker; + + if (trackLocations) marker = createLocationMarker(); + + // The prefix + if (Identifier === token.type) { + name = token.value; + base = parseIdentifier(); + // Set the parent scope. + if (options.scope) attachScope(base, scopeHasName(name)); + } else if (consume('(')) { + base = parseExpectedExpression(); + expect(')'); + base.inParens = true; // XXX: quick and dirty. needed for validateVar + } else { + return null; + } + + // The suffix + var expression, identifier; + while (true) { + if (Punctuator === token.type) { + switch (token.value) { + case '[': + pushLocation(marker); + next(); + expression = parseExpectedExpression(); + expect(']'); + base = finishNode(ast.indexExpression(base, expression)); + break; + case '.': + pushLocation(marker); + next(); + identifier = parseIdentifier(); + base = finishNode(ast.memberExpression(base, '.', identifier)); + break; + case ':': + pushLocation(marker); + next(); + identifier = parseIdentifier(); + base = finishNode(ast.memberExpression(base, ':', identifier)); + // Once a : is found, this has to be a CallExpression, otherwise + // throw an error. + pushLocation(marker); + base = parseCallExpression(base); + break; + case '(': case '{': // args + pushLocation(marker); + base = parseCallExpression(base); + break; + default: + return base; + } + } else if (StringLiteral === token.type) { + pushLocation(marker); + base = parseCallExpression(base); + } else { + break; + } + } + + return base; + } + + // args ::= '(' [explist] ')' | tableconstructor | String + + function parseCallExpression(base) { + if (Punctuator === token.type) { + switch (token.value) { + case '(': + if (!features.emptyStatement) { + if (token.line !== previousToken.line) + raise({}, errors.ambiguousSyntax, token.value); + } + next(); + + // List of expressions + var expressions = []; + var expression = parseExpression(); + if (null != expression) expressions.push(expression); + while (consume(',')) { + expression = parseExpectedExpression(); + expressions.push(expression); + } + + expect(')'); + return finishNode(ast.callExpression(base, expressions)); + + case '{': + markLocation(); + next(); + var table = parseTableConstructor(); + return finishNode(ast.tableCallExpression(base, table)); + } + } else if (StringLiteral === token.type) { + return finishNode(ast.stringCallExpression(base, parsePrimaryExpression())); + } + + raiseUnexpectedToken('function arguments', token); + } + + // primary ::= String | Numeric | nil | true | false + // | functiondef | tableconstructor | '...' + + function parsePrimaryExpression() { + var literals = StringLiteral | NumericLiteral | BooleanLiteral | NilLiteral | VarargLiteral + , value = token.value + , type = token.type + , marker; + + if (trackLocations) marker = createLocationMarker(); + + if (type & literals) { + pushLocation(marker); + var raw = input.slice(token.range[0], token.range[1]); + next(); + return finishNode(ast.literal(type, value, raw)); + } else if (Keyword === type && 'function' === value) { + pushLocation(marker); + next(); + if (options.scope) createScope(); + return parseFunctionDeclaration(null); + } else if (consume('{')) { + pushLocation(marker); + return parseTableConstructor(); + } + } + + // Parser + // ------ + + // Export the main parser. + // + // - `wait` Hold parsing until end() is called. Defaults to false + // - `comments` Store comments. Defaults to true. + // - `scope` Track identifier scope. Defaults to false. + // - `locations` Store location information. Defaults to false. + // - `ranges` Store the start and end character locations. Defaults to + // false. + // - `onCreateNode` Callback which will be invoked when a syntax node is + // created. + // - `onCreateScope` Callback which will be invoked when a new scope is + // created. + // - `onDestroyScope` Callback which will be invoked when the current scope + // is destroyed. + // + // Example: + // + // var parser = require('luaparser'); + // parser.parse('i = 0'); + + exports.parse = parse; + + var versionFeatures = { + '5.1': { + }, + '5.2': { + labels: true, + emptyStatement: true, + hexEscapes: true, + skipWhitespaceEscape: true, + strictEscapes: true + }, + '5.3': { + labels: true, + emptyStatement: true, + hexEscapes: true, + skipWhitespaceEscape: true, + strictEscapes: true, + unicodeEscapes: true, + bitwiseOperators: true, + integerDivision: true + }, + 'LuaJIT': { + // XXX: LuaJIT language features may depend on compilation options; may need to + // rethink how to handle this. Specifically, there is a LUAJIT_ENABLE_LUA52COMPAT + // that removes contextual goto. Maybe add 'LuaJIT-5.2compat' as well? + labels: true, + contextualGoto: true, + hexEscapes: true, + skipWhitespaceEscape: true, + strictEscapes: true, + unicodeEscapes: true + } + }; + + function parse(_input, _options) { + if ('undefined' === typeof _options && 'object' === typeof _input) { + _options = _input; + _input = undefined; + } + if (!_options) _options = {}; + + input = _input || ''; + options = extend(defaultOptions, _options); + + // Rewind the lexer + index = 0; + line = 1; + lineStart = 0; + length = input.length; + // When tracking identifier scope, initialize with an empty scope. + scopes = [[]]; + scopeDepth = 0; + globals = []; + locations = []; + + if (!(features = versionFeatures[options.luaVersion])) { + throw new Error(sprintf("Lua version '%1' not supported", options.luaVersion)); + } + + if (options.comments) comments = []; + if (!options.wait) return end(); + return exports; + } + + // Write to the source code buffer without beginning the parse. + exports.write = write; + + function write(_input) { + input += String(_input); + length = input.length; + return exports; + } + + // Send an EOF and begin parsing. + exports.end = end; + + function end(_input) { + if ('undefined' !== typeof _input) write(_input); + + // Ignore shebangs. + if (input && input.substr(0, 2) === '#!') input = input.replace(/^.*/, function (line) { + return line.replace(/./g, ' '); + }); + + length = input.length; + trackLocations = options.locations || options.ranges; + // Initialize with a lookahead token. + lookahead = lex(); + + var chunk = parseChunk(); + if (options.comments) chunk.comments = comments; + if (options.scope) chunk.globals = globals; + + /* istanbul ignore if */ + if (locations.length > 0) + throw new Error('Location tracking failed. This is most likely a bug in luaparse'); + + return chunk; + } + +})); +/* vim: set sw=2 ts=2 et tw=79 : */