moo.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642
  1. (function(root, factory) {
  2. if (typeof define === 'function' && define.amd) {
  3. define([], factory) /* global define */
  4. } else if (typeof module === 'object' && module.exports) {
  5. module.exports = factory()
  6. } else {
  7. root.moo = factory()
  8. }
  9. }(this, function() {
  10. 'use strict';
  11. var hasOwnProperty = Object.prototype.hasOwnProperty
  12. var toString = Object.prototype.toString
  13. var hasSticky = typeof new RegExp().sticky === 'boolean'
  14. /***************************************************************************/
  15. function isRegExp(o) { return o && toString.call(o) === '[object RegExp]' }
  16. function isObject(o) { return o && typeof o === 'object' && !isRegExp(o) && !Array.isArray(o) }
  17. function reEscape(s) {
  18. return s.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&')
  19. }
  20. function reGroups(s) {
  21. var re = new RegExp('|' + s)
  22. return re.exec('').length - 1
  23. }
  24. function reCapture(s) {
  25. return '(' + s + ')'
  26. }
  27. function reUnion(regexps) {
  28. if (!regexps.length) return '(?!)'
  29. var source = regexps.map(function(s) {
  30. return "(?:" + s + ")"
  31. }).join('|')
  32. return "(?:" + source + ")"
  33. }
  34. function regexpOrLiteral(obj) {
  35. if (typeof obj === 'string') {
  36. return '(?:' + reEscape(obj) + ')'
  37. } else if (isRegExp(obj)) {
  38. // TODO: consider /u support
  39. if (obj.ignoreCase) throw new Error('RegExp /i flag not allowed')
  40. if (obj.global) throw new Error('RegExp /g flag is implied')
  41. if (obj.sticky) throw new Error('RegExp /y flag is implied')
  42. if (obj.multiline) throw new Error('RegExp /m flag is implied')
  43. return obj.source
  44. } else {
  45. throw new Error('Not a pattern: ' + obj)
  46. }
  47. }
  48. function pad(s, length) {
  49. if (s.length > length) {
  50. return s
  51. }
  52. return Array(length - s.length + 1).join(" ") + s
  53. }
  54. function lastNLines(string, numLines) {
  55. var position = string.length
  56. var lineBreaks = 0;
  57. while (true) {
  58. var idx = string.lastIndexOf("\n", position - 1)
  59. if (idx === -1) {
  60. break;
  61. } else {
  62. lineBreaks++
  63. }
  64. position = idx
  65. if (lineBreaks === numLines) {
  66. break;
  67. }
  68. if (position === 0) {
  69. break;
  70. }
  71. }
  72. var startPosition =
  73. lineBreaks < numLines ?
  74. 0 :
  75. position + 1
  76. return string.substring(startPosition).split("\n")
  77. }
  78. function objectToRules(object) {
  79. var keys = Object.getOwnPropertyNames(object)
  80. var result = []
  81. for (var i = 0; i < keys.length; i++) {
  82. var key = keys[i]
  83. var thing = object[key]
  84. var rules = [].concat(thing)
  85. if (key === 'include') {
  86. for (var j = 0; j < rules.length; j++) {
  87. result.push({include: rules[j]})
  88. }
  89. continue
  90. }
  91. var match = []
  92. rules.forEach(function(rule) {
  93. if (isObject(rule)) {
  94. if (match.length) result.push(ruleOptions(key, match))
  95. result.push(ruleOptions(key, rule))
  96. match = []
  97. } else {
  98. match.push(rule)
  99. }
  100. })
  101. if (match.length) result.push(ruleOptions(key, match))
  102. }
  103. return result
  104. }
  105. function arrayToRules(array) {
  106. var result = []
  107. for (var i = 0; i < array.length; i++) {
  108. var obj = array[i]
  109. if (obj.include) {
  110. var include = [].concat(obj.include)
  111. for (var j = 0; j < include.length; j++) {
  112. result.push({include: include[j]})
  113. }
  114. continue
  115. }
  116. if (!obj.type) {
  117. throw new Error('Rule has no type: ' + JSON.stringify(obj))
  118. }
  119. result.push(ruleOptions(obj.type, obj))
  120. }
  121. return result
  122. }
  123. function ruleOptions(type, obj) {
  124. if (!isObject(obj)) {
  125. obj = { match: obj }
  126. }
  127. if (obj.include) {
  128. throw new Error('Matching rules cannot also include states')
  129. }
  130. // nb. error and fallback imply lineBreaks
  131. var options = {
  132. defaultType: type,
  133. lineBreaks: !!obj.error || !!obj.fallback,
  134. pop: false,
  135. next: null,
  136. push: null,
  137. error: false,
  138. fallback: false,
  139. value: null,
  140. type: null,
  141. shouldThrow: false,
  142. }
  143. // Avoid Object.assign(), so we support IE9+
  144. for (var key in obj) {
  145. if (hasOwnProperty.call(obj, key)) {
  146. options[key] = obj[key]
  147. }
  148. }
  149. // type transform cannot be a string
  150. if (typeof options.type === 'string' && type !== options.type) {
  151. throw new Error("Type transform cannot be a string (type '" + options.type + "' for token '" + type + "')")
  152. }
  153. // convert to array
  154. var match = options.match
  155. options.match = Array.isArray(match) ? match : match ? [match] : []
  156. options.match.sort(function(a, b) {
  157. return isRegExp(a) && isRegExp(b) ? 0
  158. : isRegExp(b) ? -1 : isRegExp(a) ? +1 : b.length - a.length
  159. })
  160. return options
  161. }
  162. function toRules(spec) {
  163. return Array.isArray(spec) ? arrayToRules(spec) : objectToRules(spec)
  164. }
  165. var defaultErrorRule = ruleOptions('error', {lineBreaks: true, shouldThrow: true})
  166. function compileRules(rules, hasStates) {
  167. var errorRule = null
  168. var fast = Object.create(null)
  169. var fastAllowed = true
  170. var unicodeFlag = null
  171. var groups = []
  172. var parts = []
  173. // If there is a fallback rule, then disable fast matching
  174. for (var i = 0; i < rules.length; i++) {
  175. if (rules[i].fallback) {
  176. fastAllowed = false
  177. }
  178. }
  179. for (var i = 0; i < rules.length; i++) {
  180. var options = rules[i]
  181. if (options.include) {
  182. // all valid inclusions are removed by states() preprocessor
  183. throw new Error('Inheritance is not allowed in stateless lexers')
  184. }
  185. if (options.error || options.fallback) {
  186. // errorRule can only be set once
  187. if (errorRule) {
  188. if (!options.fallback === !errorRule.fallback) {
  189. throw new Error("Multiple " + (options.fallback ? "fallback" : "error") + " rules not allowed (for token '" + options.defaultType + "')")
  190. } else {
  191. throw new Error("fallback and error are mutually exclusive (for token '" + options.defaultType + "')")
  192. }
  193. }
  194. errorRule = options
  195. }
  196. var match = options.match.slice()
  197. if (fastAllowed) {
  198. while (match.length && typeof match[0] === 'string' && match[0].length === 1) {
  199. var word = match.shift()
  200. fast[word.charCodeAt(0)] = options
  201. }
  202. }
  203. // Warn about inappropriate state-switching options
  204. if (options.pop || options.push || options.next) {
  205. if (!hasStates) {
  206. throw new Error("State-switching options are not allowed in stateless lexers (for token '" + options.defaultType + "')")
  207. }
  208. if (options.fallback) {
  209. throw new Error("State-switching options are not allowed on fallback tokens (for token '" + options.defaultType + "')")
  210. }
  211. }
  212. // Only rules with a .match are included in the RegExp
  213. if (match.length === 0) {
  214. continue
  215. }
  216. fastAllowed = false
  217. groups.push(options)
  218. // Check unicode flag is used everywhere or nowhere
  219. for (var j = 0; j < match.length; j++) {
  220. var obj = match[j]
  221. if (!isRegExp(obj)) {
  222. continue
  223. }
  224. if (unicodeFlag === null) {
  225. unicodeFlag = obj.unicode
  226. } else if (unicodeFlag !== obj.unicode && options.fallback === false) {
  227. throw new Error('If one rule is /u then all must be')
  228. }
  229. }
  230. // convert to RegExp
  231. var pat = reUnion(match.map(regexpOrLiteral))
  232. // validate
  233. var regexp = new RegExp(pat)
  234. if (regexp.test("")) {
  235. throw new Error("RegExp matches empty string: " + regexp)
  236. }
  237. var groupCount = reGroups(pat)
  238. if (groupCount > 0) {
  239. throw new Error("RegExp has capture groups: " + regexp + "\nUse (?: … ) instead")
  240. }
  241. // try and detect rules matching newlines
  242. if (!options.lineBreaks && regexp.test('\n')) {
  243. throw new Error('Rule should declare lineBreaks: ' + regexp)
  244. }
  245. // store regex
  246. parts.push(reCapture(pat))
  247. }
  248. // If there's no fallback rule, use the sticky flag so we only look for
  249. // matches at the current index.
  250. //
  251. // If we don't support the sticky flag, then fake it using an irrefutable
  252. // match (i.e. an empty pattern).
  253. var fallbackRule = errorRule && errorRule.fallback
  254. var flags = hasSticky && !fallbackRule ? 'ym' : 'gm'
  255. var suffix = hasSticky || fallbackRule ? '' : '|'
  256. if (unicodeFlag === true) flags += "u"
  257. var combined = new RegExp(reUnion(parts) + suffix, flags)
  258. return {regexp: combined, groups: groups, fast: fast, error: errorRule || defaultErrorRule}
  259. }
  260. function compile(rules) {
  261. var result = compileRules(toRules(rules))
  262. return new Lexer({start: result}, 'start')
  263. }
  264. function checkStateGroup(g, name, map) {
  265. var state = g && (g.push || g.next)
  266. if (state && !map[state]) {
  267. throw new Error("Missing state '" + state + "' (in token '" + g.defaultType + "' of state '" + name + "')")
  268. }
  269. if (g && g.pop && +g.pop !== 1) {
  270. throw new Error("pop must be 1 (in token '" + g.defaultType + "' of state '" + name + "')")
  271. }
  272. }
  273. function compileStates(states, start) {
  274. var all = states.$all ? toRules(states.$all) : []
  275. delete states.$all
  276. var keys = Object.getOwnPropertyNames(states)
  277. if (!start) start = keys[0]
  278. var ruleMap = Object.create(null)
  279. for (var i = 0; i < keys.length; i++) {
  280. var key = keys[i]
  281. ruleMap[key] = toRules(states[key]).concat(all)
  282. }
  283. for (var i = 0; i < keys.length; i++) {
  284. var key = keys[i]
  285. var rules = ruleMap[key]
  286. var included = Object.create(null)
  287. for (var j = 0; j < rules.length; j++) {
  288. var rule = rules[j]
  289. if (!rule.include) continue
  290. var splice = [j, 1]
  291. if (rule.include !== key && !included[rule.include]) {
  292. included[rule.include] = true
  293. var newRules = ruleMap[rule.include]
  294. if (!newRules) {
  295. throw new Error("Cannot include nonexistent state '" + rule.include + "' (in state '" + key + "')")
  296. }
  297. for (var k = 0; k < newRules.length; k++) {
  298. var newRule = newRules[k]
  299. if (rules.indexOf(newRule) !== -1) continue
  300. splice.push(newRule)
  301. }
  302. }
  303. rules.splice.apply(rules, splice)
  304. j--
  305. }
  306. }
  307. var map = Object.create(null)
  308. for (var i = 0; i < keys.length; i++) {
  309. var key = keys[i]
  310. map[key] = compileRules(ruleMap[key], true)
  311. }
  312. for (var i = 0; i < keys.length; i++) {
  313. var name = keys[i]
  314. var state = map[name]
  315. var groups = state.groups
  316. for (var j = 0; j < groups.length; j++) {
  317. checkStateGroup(groups[j], name, map)
  318. }
  319. var fastKeys = Object.getOwnPropertyNames(state.fast)
  320. for (var j = 0; j < fastKeys.length; j++) {
  321. checkStateGroup(state.fast[fastKeys[j]], name, map)
  322. }
  323. }
  324. return new Lexer(map, start)
  325. }
  326. function keywordTransform(map) {
  327. // Use a JavaScript Map to map keywords to their corresponding token type
  328. // unless Map is unsupported, then fall back to using an Object:
  329. var isMap = typeof Map !== 'undefined'
  330. var reverseMap = isMap ? new Map : Object.create(null)
  331. var types = Object.getOwnPropertyNames(map)
  332. for (var i = 0; i < types.length; i++) {
  333. var tokenType = types[i]
  334. var item = map[tokenType]
  335. var keywordList = Array.isArray(item) ? item : [item]
  336. keywordList.forEach(function(keyword) {
  337. if (typeof keyword !== 'string') {
  338. throw new Error("keyword must be string (in keyword '" + tokenType + "')")
  339. }
  340. if (isMap) {
  341. reverseMap.set(keyword, tokenType)
  342. } else {
  343. reverseMap[keyword] = tokenType
  344. }
  345. })
  346. }
  347. return function(k) {
  348. return isMap ? reverseMap.get(k) : reverseMap[k]
  349. }
  350. }
  351. /***************************************************************************/
  352. var Lexer = function(states, state) {
  353. this.startState = state
  354. this.states = states
  355. this.buffer = ''
  356. this.stack = []
  357. this.reset()
  358. }
  359. Lexer.prototype.reset = function(data, info) {
  360. this.buffer = data || ''
  361. this.index = 0
  362. this.line = info ? info.line : 1
  363. this.col = info ? info.col : 1
  364. this.queuedToken = info ? info.queuedToken : null
  365. this.queuedText = info ? info.queuedText: "";
  366. this.queuedThrow = info ? info.queuedThrow : null
  367. this.setState(info ? info.state : this.startState)
  368. this.stack = info && info.stack ? info.stack.slice() : []
  369. return this
  370. }
  371. Lexer.prototype.save = function() {
  372. return {
  373. line: this.line,
  374. col: this.col,
  375. state: this.state,
  376. stack: this.stack.slice(),
  377. queuedToken: this.queuedToken,
  378. queuedText: this.queuedText,
  379. queuedThrow: this.queuedThrow,
  380. }
  381. }
  382. Lexer.prototype.setState = function(state) {
  383. if (!state || this.state === state) return
  384. this.state = state
  385. var info = this.states[state]
  386. this.groups = info.groups
  387. this.error = info.error
  388. this.re = info.regexp
  389. this.fast = info.fast
  390. }
  391. Lexer.prototype.popState = function() {
  392. this.setState(this.stack.pop())
  393. }
  394. Lexer.prototype.pushState = function(state) {
  395. this.stack.push(this.state)
  396. this.setState(state)
  397. }
  398. var eat = hasSticky ? function(re, buffer) { // assume re is /y
  399. return re.exec(buffer)
  400. } : function(re, buffer) { // assume re is /g
  401. var match = re.exec(buffer)
  402. // will always match, since we used the |(?:) trick
  403. if (match[0].length === 0) {
  404. return null
  405. }
  406. return match
  407. }
  408. Lexer.prototype._getGroup = function(match) {
  409. var groupCount = this.groups.length
  410. for (var i = 0; i < groupCount; i++) {
  411. if (match[i + 1] !== undefined) {
  412. return this.groups[i]
  413. }
  414. }
  415. throw new Error('Cannot find token type for matched text')
  416. }
  417. function tokenToString() {
  418. return this.value
  419. }
  420. Lexer.prototype.next = function() {
  421. var index = this.index
  422. // If a fallback token matched, we don't need to re-run the RegExp
  423. if (this.queuedGroup) {
  424. var token = this._token(this.queuedGroup, this.queuedText, index)
  425. this.queuedGroup = null
  426. this.queuedText = ""
  427. return token
  428. }
  429. var buffer = this.buffer
  430. if (index === buffer.length) {
  431. return // EOF
  432. }
  433. // Fast matching for single characters
  434. var group = this.fast[buffer.charCodeAt(index)]
  435. if (group) {
  436. return this._token(group, buffer.charAt(index), index)
  437. }
  438. // Execute RegExp
  439. var re = this.re
  440. re.lastIndex = index
  441. var match = eat(re, buffer)
  442. // Error tokens match the remaining buffer
  443. var error = this.error
  444. if (match == null) {
  445. return this._token(error, buffer.slice(index, buffer.length), index)
  446. }
  447. var group = this._getGroup(match)
  448. var text = match[0]
  449. if (error.fallback && match.index !== index) {
  450. this.queuedGroup = group
  451. this.queuedText = text
  452. // Fallback tokens contain the unmatched portion of the buffer
  453. return this._token(error, buffer.slice(index, match.index), index)
  454. }
  455. return this._token(group, text, index)
  456. }
  457. Lexer.prototype._token = function(group, text, offset) {
  458. // count line breaks
  459. var lineBreaks = 0
  460. if (group.lineBreaks) {
  461. var matchNL = /\n/g
  462. var nl = 1
  463. if (text === '\n') {
  464. lineBreaks = 1
  465. } else {
  466. while (matchNL.exec(text)) { lineBreaks++; nl = matchNL.lastIndex }
  467. }
  468. }
  469. var token = {
  470. type: (typeof group.type === 'function' && group.type(text)) || group.defaultType,
  471. value: typeof group.value === 'function' ? group.value(text) : text,
  472. text: text,
  473. toString: tokenToString,
  474. offset: offset,
  475. lineBreaks: lineBreaks,
  476. line: this.line,
  477. col: this.col,
  478. }
  479. // nb. adding more props to token object will make V8 sad!
  480. var size = text.length
  481. this.index += size
  482. this.line += lineBreaks
  483. if (lineBreaks !== 0) {
  484. this.col = size - nl + 1
  485. } else {
  486. this.col += size
  487. }
  488. // throw, if no rule with {error: true}
  489. if (group.shouldThrow) {
  490. var err = new Error(this.formatError(token, "invalid syntax"))
  491. throw err;
  492. }
  493. if (group.pop) this.popState()
  494. else if (group.push) this.pushState(group.push)
  495. else if (group.next) this.setState(group.next)
  496. return token
  497. }
  498. if (typeof Symbol !== 'undefined' && Symbol.iterator) {
  499. var LexerIterator = function(lexer) {
  500. this.lexer = lexer
  501. }
  502. LexerIterator.prototype.next = function() {
  503. var token = this.lexer.next()
  504. return {value: token, done: !token}
  505. }
  506. LexerIterator.prototype[Symbol.iterator] = function() {
  507. return this
  508. }
  509. Lexer.prototype[Symbol.iterator] = function() {
  510. return new LexerIterator(this)
  511. }
  512. }
  513. Lexer.prototype.formatError = function(token, message) {
  514. if (token == null) {
  515. // An undefined token indicates EOF
  516. var text = this.buffer.slice(this.index)
  517. var token = {
  518. text: text,
  519. offset: this.index,
  520. lineBreaks: text.indexOf('\n') === -1 ? 0 : 1,
  521. line: this.line,
  522. col: this.col,
  523. }
  524. }
  525. var numLinesAround = 2
  526. var firstDisplayedLine = Math.max(token.line - numLinesAround, 1)
  527. var lastDisplayedLine = token.line + numLinesAround
  528. var lastLineDigits = String(lastDisplayedLine).length
  529. var displayedLines = lastNLines(
  530. this.buffer,
  531. (this.line - token.line) + numLinesAround + 1
  532. )
  533. .slice(0, 5)
  534. var errorLines = []
  535. errorLines.push(message + " at line " + token.line + " col " + token.col + ":")
  536. errorLines.push("")
  537. for (var i = 0; i < displayedLines.length; i++) {
  538. var line = displayedLines[i]
  539. var lineNo = firstDisplayedLine + i
  540. errorLines.push(pad(String(lineNo), lastLineDigits) + " " + line);
  541. if (lineNo === token.line) {
  542. errorLines.push(pad("", lastLineDigits + token.col + 1) + "^")
  543. }
  544. }
  545. return errorLines.join("\n")
  546. }
  547. Lexer.prototype.clone = function() {
  548. return new Lexer(this.states, this.state)
  549. }
  550. Lexer.prototype.has = function(tokenType) {
  551. return true
  552. }
  553. return {
  554. compile: compile,
  555. states: compileStates,
  556. error: Object.freeze({error: true}),
  557. fallback: Object.freeze({fallback: true}),
  558. keywords: keywordTransform,
  559. }
  560. }));