123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316 |
- 'use strict'
- var assign = require('../constant/assign.js')
- var markdownLineEnding = require('../character/markdown-line-ending.js')
- var chunkedPush = require('./chunked-push.js')
- var chunkedSplice = require('./chunked-splice.js')
- var miniflat = require('./miniflat.js')
- var resolveAll = require('./resolve-all.js')
- var serializeChunks = require('./serialize-chunks.js')
- var shallow = require('./shallow.js')
- var sliceChunks = require('./slice-chunks.js')
- // Create a tokenizer.
- // Tokenizers deal with one type of data (e.g., containers, flow, text).
- // The parser is the object dealing with it all.
- // `initialize` works like other constructs, except that only its `tokenize`
- // function is used, in which case it doesn’t receive an `ok` or `nok`.
- // `from` can be given to set the point before the first character, although
- // when further lines are indented, they must be set with `defineSkip`.
- function createTokenizer(parser, initialize, from) {
- var point = from
- ? shallow(from)
- : {
- line: 1,
- column: 1,
- offset: 0
- }
- var columnStart = {}
- var resolveAllConstructs = []
- var chunks = []
- var stack = []
- var effects = {
- consume: consume,
- enter: enter,
- exit: exit,
- attempt: constructFactory(onsuccessfulconstruct),
- check: constructFactory(onsuccessfulcheck),
- interrupt: constructFactory(onsuccessfulcheck, {
- interrupt: true
- }),
- lazy: constructFactory(onsuccessfulcheck, {
- lazy: true
- })
- } // State and tools for resolving and serializing.
- var context = {
- previous: null,
- events: [],
- parser: parser,
- sliceStream: sliceStream,
- sliceSerialize: sliceSerialize,
- now: now,
- defineSkip: skip,
- write: write
- } // The state function.
- var state = initialize.tokenize.call(context, effects) // Track which character we expect to be consumed, to catch bugs.
- if (initialize.resolveAll) {
- resolveAllConstructs.push(initialize)
- } // Store where we are in the input stream.
- point._index = 0
- point._bufferIndex = -1
- return context
- function write(slice) {
- chunks = chunkedPush(chunks, slice)
- main() // Exit if we’re not done, resolve might change stuff.
- if (chunks[chunks.length - 1] !== null) {
- return []
- }
- addResult(initialize, 0) // Otherwise, resolve, and exit.
- context.events = resolveAll(resolveAllConstructs, context.events, context)
- return context.events
- } //
- // Tools.
- //
- function sliceSerialize(token) {
- return serializeChunks(sliceStream(token))
- }
- function sliceStream(token) {
- return sliceChunks(chunks, token)
- }
- function now() {
- return shallow(point)
- }
- function skip(value) {
- columnStart[value.line] = value.column
- accountForPotentialSkip()
- } //
- // State management.
- //
- // Main loop (note that `_index` and `_bufferIndex` in `point` are modified by
- // `consume`).
- // Here is where we walk through the chunks, which either include strings of
- // several characters, or numerical character codes.
- // The reason to do this in a loop instead of a call is so the stack can
- // drain.
- function main() {
- var chunkIndex
- var chunk
- while (point._index < chunks.length) {
- chunk = chunks[point._index] // If we’re in a buffer chunk, loop through it.
- if (typeof chunk === 'string') {
- chunkIndex = point._index
- if (point._bufferIndex < 0) {
- point._bufferIndex = 0
- }
- while (
- point._index === chunkIndex &&
- point._bufferIndex < chunk.length
- ) {
- go(chunk.charCodeAt(point._bufferIndex))
- }
- } else {
- go(chunk)
- }
- }
- } // Deal with one code.
- function go(code) {
- state = state(code)
- } // Move a character forward.
- function consume(code) {
- if (markdownLineEnding(code)) {
- point.line++
- point.column = 1
- point.offset += code === -3 ? 2 : 1
- accountForPotentialSkip()
- } else if (code !== -1) {
- point.column++
- point.offset++
- } // Not in a string chunk.
- if (point._bufferIndex < 0) {
- point._index++
- } else {
- point._bufferIndex++ // At end of string chunk.
- if (point._bufferIndex === chunks[point._index].length) {
- point._bufferIndex = -1
- point._index++
- }
- } // Expose the previous character.
- context.previous = code // Mark as consumed.
- } // Start a token.
- function enter(type, fields) {
- var token = fields || {}
- token.type = type
- token.start = now()
- context.events.push(['enter', token, context])
- stack.push(token)
- return token
- } // Stop a token.
- function exit(type) {
- var token = stack.pop()
- token.end = now()
- context.events.push(['exit', token, context])
- return token
- } // Use results.
- function onsuccessfulconstruct(construct, info) {
- addResult(construct, info.from)
- } // Discard results.
- function onsuccessfulcheck(construct, info) {
- info.restore()
- } // Factory to attempt/check/interrupt.
- function constructFactory(onreturn, fields) {
- return hook // Handle either an object mapping codes to constructs, a list of
- // constructs, or a single construct.
- function hook(constructs, returnState, bogusState) {
- var listOfConstructs
- var constructIndex
- var currentConstruct
- var info
- return constructs.tokenize || 'length' in constructs
- ? handleListOfConstructs(miniflat(constructs))
- : handleMapOfConstructs
- function handleMapOfConstructs(code) {
- if (code in constructs || null in constructs) {
- return handleListOfConstructs(
- constructs.null
- ? /* c8 ignore next */
- miniflat(constructs[code]).concat(miniflat(constructs.null))
- : constructs[code]
- )(code)
- }
- return bogusState(code)
- }
- function handleListOfConstructs(list) {
- listOfConstructs = list
- constructIndex = 0
- return handleConstruct(list[constructIndex])
- }
- function handleConstruct(construct) {
- return start
- function start(code) {
- // To do: not nede to store if there is no bogus state, probably?
- // Currently doesn’t work because `inspect` in document does a check
- // w/o a bogus, which doesn’t make sense. But it does seem to help perf
- // by not storing.
- info = store()
- currentConstruct = construct
- if (!construct.partial) {
- context.currentConstruct = construct
- }
- if (
- construct.name &&
- context.parser.constructs.disable.null.indexOf(construct.name) > -1
- ) {
- return nok()
- }
- return construct.tokenize.call(
- fields ? assign({}, context, fields) : context,
- effects,
- ok,
- nok
- )(code)
- }
- }
- function ok(code) {
- onreturn(currentConstruct, info)
- return returnState
- }
- function nok(code) {
- info.restore()
- if (++constructIndex < listOfConstructs.length) {
- return handleConstruct(listOfConstructs[constructIndex])
- }
- return bogusState
- }
- }
- }
- function addResult(construct, from) {
- if (construct.resolveAll && resolveAllConstructs.indexOf(construct) < 0) {
- resolveAllConstructs.push(construct)
- }
- if (construct.resolve) {
- chunkedSplice(
- context.events,
- from,
- context.events.length - from,
- construct.resolve(context.events.slice(from), context)
- )
- }
- if (construct.resolveTo) {
- context.events = construct.resolveTo(context.events, context)
- }
- }
- function store() {
- var startPoint = now()
- var startPrevious = context.previous
- var startCurrentConstruct = context.currentConstruct
- var startEventsIndex = context.events.length
- var startStack = Array.from(stack)
- return {
- restore: restore,
- from: startEventsIndex
- }
- function restore() {
- point = startPoint
- context.previous = startPrevious
- context.currentConstruct = startCurrentConstruct
- context.events.length = startEventsIndex
- stack = startStack
- accountForPotentialSkip()
- }
- }
- function accountForPotentialSkip() {
- if (point.line in columnStart && point.column < 2) {
- point.column = columnStart[point.line]
- point.offset += columnStart[point.line] - 1
- }
- }
- }
- module.exports = createTokenizer
|