123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642 |
- (function(root, factory) {
- if (typeof define === 'function' && define.amd) {
- define([], factory) /* global define */
- } else if (typeof module === 'object' && module.exports) {
- module.exports = factory()
- } else {
- root.moo = factory()
- }
- }(this, function() {
- 'use strict';
- var hasOwnProperty = Object.prototype.hasOwnProperty
- var toString = Object.prototype.toString
- var hasSticky = typeof new RegExp().sticky === 'boolean'
- /***************************************************************************/
- function isRegExp(o) { return o && toString.call(o) === '[object RegExp]' }
- function isObject(o) { return o && typeof o === 'object' && !isRegExp(o) && !Array.isArray(o) }
- function reEscape(s) {
- return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&')
- }
- function reGroups(s) {
- var re = new RegExp('|' + s)
- return re.exec('').length - 1
- }
- function reCapture(s) {
- return '(' + s + ')'
- }
- function reUnion(regexps) {
- if (!regexps.length) return '(?!)'
- var source = regexps.map(function(s) {
- return "(?:" + s + ")"
- }).join('|')
- return "(?:" + source + ")"
- }
- function regexpOrLiteral(obj) {
- if (typeof obj === 'string') {
- return '(?:' + reEscape(obj) + ')'
- } else if (isRegExp(obj)) {
- // TODO: consider /u support
- if (obj.ignoreCase) throw new Error('RegExp /i flag not allowed')
- if (obj.global) throw new Error('RegExp /g flag is implied')
- if (obj.sticky) throw new Error('RegExp /y flag is implied')
- if (obj.multiline) throw new Error('RegExp /m flag is implied')
- return obj.source
- } else {
- throw new Error('Not a pattern: ' + obj)
- }
- }
- function pad(s, length) {
- if (s.length > length) {
- return s
- }
- return Array(length - s.length + 1).join(" ") + s
- }
- function lastNLines(string, numLines) {
- var position = string.length
- var lineBreaks = 0;
- while (true) {
- var idx = string.lastIndexOf("\n", position - 1)
- if (idx === -1) {
- break;
- } else {
- lineBreaks++
- }
- position = idx
- if (lineBreaks === numLines) {
- break;
- }
- if (position === 0) {
- break;
- }
- }
- var startPosition =
- lineBreaks < numLines ?
- 0 :
- position + 1
- return string.substring(startPosition).split("\n")
- }
- function objectToRules(object) {
- var keys = Object.getOwnPropertyNames(object)
- var result = []
- for (var i = 0; i < keys.length; i++) {
- var key = keys[i]
- var thing = object[key]
- var rules = [].concat(thing)
- if (key === 'include') {
- for (var j = 0; j < rules.length; j++) {
- result.push({include: rules[j]})
- }
- continue
- }
- var match = []
- rules.forEach(function(rule) {
- if (isObject(rule)) {
- if (match.length) result.push(ruleOptions(key, match))
- result.push(ruleOptions(key, rule))
- match = []
- } else {
- match.push(rule)
- }
- })
- if (match.length) result.push(ruleOptions(key, match))
- }
- return result
- }
- function arrayToRules(array) {
- var result = []
- for (var i = 0; i < array.length; i++) {
- var obj = array[i]
- if (obj.include) {
- var include = [].concat(obj.include)
- for (var j = 0; j < include.length; j++) {
- result.push({include: include[j]})
- }
- continue
- }
- if (!obj.type) {
- throw new Error('Rule has no type: ' + JSON.stringify(obj))
- }
- result.push(ruleOptions(obj.type, obj))
- }
- return result
- }
- function ruleOptions(type, obj) {
- if (!isObject(obj)) {
- obj = { match: obj }
- }
- if (obj.include) {
- throw new Error('Matching rules cannot also include states')
- }
- // nb. error and fallback imply lineBreaks
- var options = {
- defaultType: type,
- lineBreaks: !!obj.error || !!obj.fallback,
- pop: false,
- next: null,
- push: null,
- error: false,
- fallback: false,
- value: null,
- type: null,
- shouldThrow: false,
- }
- // Avoid Object.assign(), so we support IE9+
- for (var key in obj) {
- if (hasOwnProperty.call(obj, key)) {
- options[key] = obj[key]
- }
- }
- // type transform cannot be a string
- if (typeof options.type === 'string' && type !== options.type) {
- throw new Error("Type transform cannot be a string (type '" + options.type + "' for token '" + type + "')")
- }
- // convert to array
- var match = options.match
- options.match = Array.isArray(match) ? match : match ? [match] : []
- options.match.sort(function(a, b) {
- return isRegExp(a) && isRegExp(b) ? 0
- : isRegExp(b) ? -1 : isRegExp(a) ? +1 : b.length - a.length
- })
- return options
- }
- function toRules(spec) {
- return Array.isArray(spec) ? arrayToRules(spec) : objectToRules(spec)
- }
- var defaultErrorRule = ruleOptions('error', {lineBreaks: true, shouldThrow: true})
- function compileRules(rules, hasStates) {
- var errorRule = null
- var fast = Object.create(null)
- var fastAllowed = true
- var unicodeFlag = null
- var groups = []
- var parts = []
- // If there is a fallback rule, then disable fast matching
- for (var i = 0; i < rules.length; i++) {
- if (rules[i].fallback) {
- fastAllowed = false
- }
- }
- for (var i = 0; i < rules.length; i++) {
- var options = rules[i]
- if (options.include) {
- // all valid inclusions are removed by states() preprocessor
- throw new Error('Inheritance is not allowed in stateless lexers')
- }
- if (options.error || options.fallback) {
- // errorRule can only be set once
- if (errorRule) {
- if (!options.fallback === !errorRule.fallback) {
- throw new Error("Multiple " + (options.fallback ? "fallback" : "error") + " rules not allowed (for token '" + options.defaultType + "')")
- } else {
- throw new Error("fallback and error are mutually exclusive (for token '" + options.defaultType + "')")
- }
- }
- errorRule = options
- }
- var match = options.match.slice()
- if (fastAllowed) {
- while (match.length && typeof match[0] === 'string' && match[0].length === 1) {
- var word = match.shift()
- fast[word.charCodeAt(0)] = options
- }
- }
- // Warn about inappropriate state-switching options
- if (options.pop || options.push || options.next) {
- if (!hasStates) {
- throw new Error("State-switching options are not allowed in stateless lexers (for token '" + options.defaultType + "')")
- }
- if (options.fallback) {
- throw new Error("State-switching options are not allowed on fallback tokens (for token '" + options.defaultType + "')")
- }
- }
- // Only rules with a .match are included in the RegExp
- if (match.length === 0) {
- continue
- }
- fastAllowed = false
- groups.push(options)
- // Check unicode flag is used everywhere or nowhere
- for (var j = 0; j < match.length; j++) {
- var obj = match[j]
- if (!isRegExp(obj)) {
- continue
- }
- if (unicodeFlag === null) {
- unicodeFlag = obj.unicode
- } else if (unicodeFlag !== obj.unicode && options.fallback === false) {
- throw new Error('If one rule is /u then all must be')
- }
- }
- // convert to RegExp
- var pat = reUnion(match.map(regexpOrLiteral))
- // validate
- var regexp = new RegExp(pat)
- if (regexp.test("")) {
- throw new Error("RegExp matches empty string: " + regexp)
- }
- var groupCount = reGroups(pat)
- if (groupCount > 0) {
- throw new Error("RegExp has capture groups: " + regexp + "\nUse (?: … ) instead")
- }
- // try and detect rules matching newlines
- if (!options.lineBreaks && regexp.test('\n')) {
- throw new Error('Rule should declare lineBreaks: ' + regexp)
- }
- // store regex
- parts.push(reCapture(pat))
- }
- // If there's no fallback rule, use the sticky flag so we only look for
- // matches at the current index.
- //
- // If we don't support the sticky flag, then fake it using an irrefutable
- // match (i.e. an empty pattern).
- var fallbackRule = errorRule && errorRule.fallback
- var flags = hasSticky && !fallbackRule ? 'ym' : 'gm'
- var suffix = hasSticky || fallbackRule ? '' : '|'
- if (unicodeFlag === true) flags += "u"
- var combined = new RegExp(reUnion(parts) + suffix, flags)
- return {regexp: combined, groups: groups, fast: fast, error: errorRule || defaultErrorRule}
- }
- function compile(rules) {
- var result = compileRules(toRules(rules))
- return new Lexer({start: result}, 'start')
- }
- function checkStateGroup(g, name, map) {
- var state = g && (g.push || g.next)
- if (state && !map[state]) {
- throw new Error("Missing state '" + state + "' (in token '" + g.defaultType + "' of state '" + name + "')")
- }
- if (g && g.pop && +g.pop !== 1) {
- throw new Error("pop must be 1 (in token '" + g.defaultType + "' of state '" + name + "')")
- }
- }
- function compileStates(states, start) {
- var all = states.$all ? toRules(states.$all) : []
- delete states.$all
- var keys = Object.getOwnPropertyNames(states)
- if (!start) start = keys[0]
- var ruleMap = Object.create(null)
- for (var i = 0; i < keys.length; i++) {
- var key = keys[i]
- ruleMap[key] = toRules(states[key]).concat(all)
- }
- for (var i = 0; i < keys.length; i++) {
- var key = keys[i]
- var rules = ruleMap[key]
- var included = Object.create(null)
- for (var j = 0; j < rules.length; j++) {
- var rule = rules[j]
- if (!rule.include) continue
- var splice = [j, 1]
- if (rule.include !== key && !included[rule.include]) {
- included[rule.include] = true
- var newRules = ruleMap[rule.include]
- if (!newRules) {
- throw new Error("Cannot include nonexistent state '" + rule.include + "' (in state '" + key + "')")
- }
- for (var k = 0; k < newRules.length; k++) {
- var newRule = newRules[k]
- if (rules.indexOf(newRule) !== -1) continue
- splice.push(newRule)
- }
- }
- rules.splice.apply(rules, splice)
- j--
- }
- }
- var map = Object.create(null)
- for (var i = 0; i < keys.length; i++) {
- var key = keys[i]
- map[key] = compileRules(ruleMap[key], true)
- }
- for (var i = 0; i < keys.length; i++) {
- var name = keys[i]
- var state = map[name]
- var groups = state.groups
- for (var j = 0; j < groups.length; j++) {
- checkStateGroup(groups[j], name, map)
- }
- var fastKeys = Object.getOwnPropertyNames(state.fast)
- for (var j = 0; j < fastKeys.length; j++) {
- checkStateGroup(state.fast[fastKeys[j]], name, map)
- }
- }
- return new Lexer(map, start)
- }
- function keywordTransform(map) {
- // Use a JavaScript Map to map keywords to their corresponding token type
- // unless Map is unsupported, then fall back to using an Object:
- var isMap = typeof Map !== 'undefined'
- var reverseMap = isMap ? new Map : Object.create(null)
- var types = Object.getOwnPropertyNames(map)
- for (var i = 0; i < types.length; i++) {
- var tokenType = types[i]
- var item = map[tokenType]
- var keywordList = Array.isArray(item) ? item : [item]
- keywordList.forEach(function(keyword) {
- if (typeof keyword !== 'string') {
- throw new Error("keyword must be string (in keyword '" + tokenType + "')")
- }
- if (isMap) {
- reverseMap.set(keyword, tokenType)
- } else {
- reverseMap[keyword] = tokenType
- }
- })
- }
- return function(k) {
- return isMap ? reverseMap.get(k) : reverseMap[k]
- }
- }
- /***************************************************************************/
- var Lexer = function(states, state) {
- this.startState = state
- this.states = states
- this.buffer = ''
- this.stack = []
- this.reset()
- }
- Lexer.prototype.reset = function(data, info) {
- this.buffer = data || ''
- this.index = 0
- this.line = info ? info.line : 1
- this.col = info ? info.col : 1
- this.queuedToken = info ? info.queuedToken : null
- this.queuedText = info ? info.queuedText: "";
- this.queuedThrow = info ? info.queuedThrow : null
- this.setState(info ? info.state : this.startState)
- this.stack = info && info.stack ? info.stack.slice() : []
- return this
- }
- Lexer.prototype.save = function() {
- return {
- line: this.line,
- col: this.col,
- state: this.state,
- stack: this.stack.slice(),
- queuedToken: this.queuedToken,
- queuedText: this.queuedText,
- queuedThrow: this.queuedThrow,
- }
- }
- Lexer.prototype.setState = function(state) {
- if (!state || this.state === state) return
- this.state = state
- var info = this.states[state]
- this.groups = info.groups
- this.error = info.error
- this.re = info.regexp
- this.fast = info.fast
- }
- Lexer.prototype.popState = function() {
- this.setState(this.stack.pop())
- }
- Lexer.prototype.pushState = function(state) {
- this.stack.push(this.state)
- this.setState(state)
- }
- var eat = hasSticky ? function(re, buffer) { // assume re is /y
- return re.exec(buffer)
- } : function(re, buffer) { // assume re is /g
- var match = re.exec(buffer)
- // will always match, since we used the |(?:) trick
- if (match[0].length === 0) {
- return null
- }
- return match
- }
- Lexer.prototype._getGroup = function(match) {
- var groupCount = this.groups.length
- for (var i = 0; i < groupCount; i++) {
- if (match[i + 1] !== undefined) {
- return this.groups[i]
- }
- }
- throw new Error('Cannot find token type for matched text')
- }
- function tokenToString() {
- return this.value
- }
- Lexer.prototype.next = function() {
- var index = this.index
- // If a fallback token matched, we don't need to re-run the RegExp
- if (this.queuedGroup) {
- var token = this._token(this.queuedGroup, this.queuedText, index)
- this.queuedGroup = null
- this.queuedText = ""
- return token
- }
- var buffer = this.buffer
- if (index === buffer.length) {
- return // EOF
- }
- // Fast matching for single characters
- var group = this.fast[buffer.charCodeAt(index)]
- if (group) {
- return this._token(group, buffer.charAt(index), index)
- }
- // Execute RegExp
- var re = this.re
- re.lastIndex = index
- var match = eat(re, buffer)
- // Error tokens match the remaining buffer
- var error = this.error
- if (match == null) {
- return this._token(error, buffer.slice(index, buffer.length), index)
- }
- var group = this._getGroup(match)
- var text = match[0]
- if (error.fallback && match.index !== index) {
- this.queuedGroup = group
- this.queuedText = text
- // Fallback tokens contain the unmatched portion of the buffer
- return this._token(error, buffer.slice(index, match.index), index)
- }
- return this._token(group, text, index)
- }
- Lexer.prototype._token = function(group, text, offset) {
- // count line breaks
- var lineBreaks = 0
- if (group.lineBreaks) {
- var matchNL = /\n/g
- var nl = 1
- if (text === '\n') {
- lineBreaks = 1
- } else {
- while (matchNL.exec(text)) { lineBreaks++; nl = matchNL.lastIndex }
- }
- }
- var token = {
- type: (typeof group.type === 'function' && group.type(text)) || group.defaultType,
- value: typeof group.value === 'function' ? group.value(text) : text,
- text: text,
- toString: tokenToString,
- offset: offset,
- lineBreaks: lineBreaks,
- line: this.line,
- col: this.col,
- }
- // nb. adding more props to token object will make V8 sad!
- var size = text.length
- this.index += size
- this.line += lineBreaks
- if (lineBreaks !== 0) {
- this.col = size - nl + 1
- } else {
- this.col += size
- }
- // throw, if no rule with {error: true}
- if (group.shouldThrow) {
- var err = new Error(this.formatError(token, "invalid syntax"))
- throw err;
- }
- if (group.pop) this.popState()
- else if (group.push) this.pushState(group.push)
- else if (group.next) this.setState(group.next)
- return token
- }
- if (typeof Symbol !== 'undefined' && Symbol.iterator) {
- var LexerIterator = function(lexer) {
- this.lexer = lexer
- }
- LexerIterator.prototype.next = function() {
- var token = this.lexer.next()
- return {value: token, done: !token}
- }
- LexerIterator.prototype[Symbol.iterator] = function() {
- return this
- }
- Lexer.prototype[Symbol.iterator] = function() {
- return new LexerIterator(this)
- }
- }
- Lexer.prototype.formatError = function(token, message) {
- if (token == null) {
- // An undefined token indicates EOF
- var text = this.buffer.slice(this.index)
- var token = {
- text: text,
- offset: this.index,
- lineBreaks: text.indexOf('\n') === -1 ? 0 : 1,
- line: this.line,
- col: this.col,
- }
- }
-
- var numLinesAround = 2
- var firstDisplayedLine = Math.max(token.line - numLinesAround, 1)
- var lastDisplayedLine = token.line + numLinesAround
- var lastLineDigits = String(lastDisplayedLine).length
- var displayedLines = lastNLines(
- this.buffer,
- (this.line - token.line) + numLinesAround + 1
- )
- .slice(0, 5)
- var errorLines = []
- errorLines.push(message + " at line " + token.line + " col " + token.col + ":")
- errorLines.push("")
- for (var i = 0; i < displayedLines.length; i++) {
- var line = displayedLines[i]
- var lineNo = firstDisplayedLine + i
- errorLines.push(pad(String(lineNo), lastLineDigits) + " " + line);
- if (lineNo === token.line) {
- errorLines.push(pad("", lastLineDigits + token.col + 1) + "^")
- }
- }
- return errorLines.join("\n")
- }
- Lexer.prototype.clone = function() {
- return new Lexer(this.states, this.state)
- }
- Lexer.prototype.has = function(tokenType) {
- return true
- }
- return {
- compile: compile,
- states: compileStates,
- error: Object.freeze({error: true}),
- fallback: Object.freeze({fallback: true}),
- keywords: keywordTransform,
- }
- }));
|