index.js 21 KB


  1. 'use strict'
  2. module.exports = fromMarkdown
  3. // These three are compiled away in the `dist/`
  4. var codes = require('micromark/dist/character/codes')
  5. var constants = require('micromark/dist/constant/constants')
  6. var types = require('micromark/dist/constant/types')
  7. var toString = require('mdast-util-to-string')
  8. var assign = require('micromark/dist/constant/assign')
  9. var own = require('micromark/dist/constant/has-own-property')
  10. var normalizeIdentifier = require('micromark/dist/util/normalize-identifier')
  11. var safeFromInt = require('micromark/dist/util/safe-from-int')
  12. var parser = require('micromark/dist/parse')
  13. var preprocessor = require('micromark/dist/preprocess')
  14. var postprocess = require('micromark/dist/postprocess')
  15. var decode = require('parse-entities/decode-entity')
  16. var stringifyPosition = require('unist-util-stringify-position')
  17. function fromMarkdown(value, encoding, options) {
  18. if (typeof encoding !== 'string') {
  19. options = encoding
  20. encoding = undefined
  21. }
  22. return compiler(options)(
  23. postprocess(
  24. parser(options).document().write(preprocessor()(value, encoding, true))
  25. )
  26. )
  27. }
  28. // Note this compiler only understand complete buffering, not streaming.
  29. function compiler(options) {
  30. var settings = options || {}
  31. var config = configure(
  32. {
  33. transforms: [],
  34. canContainEols: [
  35. 'emphasis',
  36. 'fragment',
  37. 'heading',
  38. 'paragraph',
  39. 'strong'
  40. ],
  41. enter: {
  42. autolink: opener(link),
  43. autolinkProtocol: onenterdata,
  44. autolinkEmail: onenterdata,
  45. atxHeading: opener(heading),
  46. blockQuote: opener(blockQuote),
  47. characterEscape: onenterdata,
  48. characterReference: onenterdata,
  49. codeFenced: opener(codeFlow),
  50. codeFencedFenceInfo: buffer,
  51. codeFencedFenceMeta: buffer,
  52. codeIndented: opener(codeFlow, buffer),
  53. codeText: opener(codeText, buffer),
  54. codeTextData: onenterdata,
  55. data: onenterdata,
  56. codeFlowValue: onenterdata,
  57. definition: opener(definition),
  58. definitionDestinationString: buffer,
  59. definitionLabelString: buffer,
  60. definitionTitleString: buffer,
  61. emphasis: opener(emphasis),
  62. hardBreakEscape: opener(hardBreak),
  63. hardBreakTrailing: opener(hardBreak),
  64. htmlFlow: opener(html, buffer),
  65. htmlFlowData: onenterdata,
  66. htmlText: opener(html, buffer),
  67. htmlTextData: onenterdata,
  68. image: opener(image),
  69. label: buffer,
  70. link: opener(link),
  71. listItem: opener(listItem),
  72. listItemValue: onenterlistitemvalue,
  73. listOrdered: opener(list, onenterlistordered),
  74. listUnordered: opener(list),
  75. paragraph: opener(paragraph),
  76. reference: onenterreference,
  77. referenceString: buffer,
  78. resourceDestinationString: buffer,
  79. resourceTitleString: buffer,
  80. setextHeading: opener(heading),
  81. strong: opener(strong),
  82. thematicBreak: opener(thematicBreak)
  83. },
  84. exit: {
  85. atxHeading: closer(),
  86. atxHeadingSequence: onexitatxheadingsequence,
  87. autolink: closer(),
  88. autolinkEmail: onexitautolinkemail,
  89. autolinkProtocol: onexitautolinkprotocol,
  90. blockQuote: closer(),
  91. characterEscapeValue: onexitdata,
  92. characterReferenceMarkerHexadecimal: onexitcharacterreferencemarker,
  93. characterReferenceMarkerNumeric: onexitcharacterreferencemarker,
  94. characterReferenceValue: onexitcharacterreferencevalue,
  95. codeFenced: closer(onexitcodefenced),
  96. codeFencedFence: onexitcodefencedfence,
  97. codeFencedFenceInfo: onexitcodefencedfenceinfo,
  98. codeFencedFenceMeta: onexitcodefencedfencemeta,
  99. codeFlowValue: onexitdata,
  100. codeIndented: closer(onexitcodeindented),
  101. codeText: closer(onexitcodetext),
  102. codeTextData: onexitdata,
  103. data: onexitdata,
  104. definition: closer(),
  105. definitionDestinationString: onexitdefinitiondestinationstring,
  106. definitionLabelString: onexitdefinitionlabelstring,
  107. definitionTitleString: onexitdefinitiontitlestring,
  108. emphasis: closer(),
  109. hardBreakEscape: closer(onexithardbreak),
  110. hardBreakTrailing: closer(onexithardbreak),
  111. htmlFlow: closer(onexithtmlflow),
  112. htmlFlowData: onexitdata,
  113. htmlText: closer(onexithtmltext),
  114. htmlTextData: onexitdata,
  115. image: closer(onexitimage),
  116. label: onexitlabel,
  117. labelText: onexitlabeltext,
  118. lineEnding: onexitlineending,
  119. link: closer(onexitlink),
  120. listItem: closer(),
  121. listOrdered: closer(),
  122. listUnordered: closer(),
  123. paragraph: closer(),
  124. referenceString: onexitreferencestring,
  125. resourceDestinationString: onexitresourcedestinationstring,
  126. resourceTitleString: onexitresourcetitlestring,
  127. resource: onexitresource,
  128. setextHeading: closer(onexitsetextheading),
  129. setextHeadingLineSequence: onexitsetextheadinglinesequence,
  130. setextHeadingText: onexitsetextheadingtext,
  131. strong: closer(),
  132. thematicBreak: closer()
  133. }
  134. },
  135. settings.mdastExtensions || []
  136. )
  137. var data = {}
  138. return compile
  139. function compile(events) {
  140. var tree = {type: 'root', children: []}
  141. var stack = [tree]
  142. var tokenStack = []
  143. var listStack = []
  144. var index = -1
  145. var handler
  146. var listStart
  147. var context = {
  148. stack: stack,
  149. tokenStack: tokenStack,
  150. config: config,
  151. enter: enter,
  152. exit: exit,
  153. buffer: buffer,
  154. resume: resume,
  155. setData: setData,
  156. getData: getData
  157. }
  158. while (++index < events.length) {
  159. // We preprocess lists to add `listItem` tokens, and to infer whether
  160. // items the list itself are spread out.
  161. if (
  162. events[index][1].type === types.listOrdered ||
  163. events[index][1].type === types.listUnordered
  164. ) {
  165. if (events[index][0] === 'enter') {
  166. listStack.push(index)
  167. } else {
  168. listStart = listStack.pop(index)
  169. index = prepareList(events, listStart, index)
  170. }
  171. }
  172. }
  173. index = -1
  174. while (++index < events.length) {
  175. handler = config[events[index][0]]
  176. if (own.call(handler, events[index][1].type)) {
  177. handler[events[index][1].type].call(
  178. assign({sliceSerialize: events[index][2].sliceSerialize}, context),
  179. events[index][1]
  180. )
  181. }
  182. }
  183. if (tokenStack.length) {
  184. throw new Error(
  185. 'Cannot close document, a token (`' +
  186. tokenStack[tokenStack.length - 1].type +
  187. '`, ' +
  188. stringifyPosition({
  189. start: tokenStack[tokenStack.length - 1].start,
  190. end: tokenStack[tokenStack.length - 1].end
  191. }) +
  192. ') is still open'
  193. )
  194. }
  195. // Figure out `root` position.
  196. tree.position = {
  197. start: point(
  198. events.length ? events[0][1].start : {line: 1, column: 1, offset: 0}
  199. ),
  200. end: point(
  201. events.length
  202. ? events[events.length - 2][1].end
  203. : {line: 1, column: 1, offset: 0}
  204. )
  205. }
  206. index = -1
  207. while (++index < config.transforms.length) {
  208. tree = config.transforms[index](tree) || tree
  209. }
  210. return tree
  211. }
  212. function prepareList(events, start, length) {
  213. var index = start - 1
  214. var containerBalance = -1
  215. var listSpread = false
  216. var listItem
  217. var tailIndex
  218. var lineIndex
  219. var tailEvent
  220. var event
  221. var firstBlankLineIndex
  222. var atMarker
  223. while (++index <= length) {
  224. event = events[index]
  225. if (
  226. event[1].type === types.listUnordered ||
  227. event[1].type === types.listOrdered ||
  228. event[1].type === types.blockQuote
  229. ) {
  230. if (event[0] === 'enter') {
  231. containerBalance++
  232. } else {
  233. containerBalance--
  234. }
  235. atMarker = undefined
  236. } else if (event[1].type === types.lineEndingBlank) {
  237. if (event[0] === 'enter') {
  238. if (
  239. listItem &&
  240. !atMarker &&
  241. !containerBalance &&
  242. !firstBlankLineIndex
  243. ) {
  244. firstBlankLineIndex = index
  245. }
  246. atMarker = undefined
  247. }
  248. } else if (
  249. event[1].type === types.linePrefix ||
  250. event[1].type === types.listItemValue ||
  251. event[1].type === types.listItemMarker ||
  252. event[1].type === types.listItemPrefix ||
  253. event[1].type === types.listItemPrefixWhitespace
  254. ) {
  255. // Empty.
  256. } else {
  257. atMarker = undefined
  258. }
  259. if (
  260. (!containerBalance &&
  261. event[0] === 'enter' &&
  262. event[1].type === types.listItemPrefix) ||
  263. (containerBalance === -1 &&
  264. event[0] === 'exit' &&
  265. (event[1].type === types.listUnordered ||
  266. event[1].type === types.listOrdered))
  267. ) {
  268. if (listItem) {
  269. tailIndex = index
  270. lineIndex = undefined
  271. while (tailIndex--) {
  272. tailEvent = events[tailIndex]
  273. if (
  274. tailEvent[1].type === types.lineEnding ||
  275. tailEvent[1].type === types.lineEndingBlank
  276. ) {
  277. if (tailEvent[0] === 'exit') continue
  278. if (lineIndex) {
  279. events[lineIndex][1].type = types.lineEndingBlank
  280. listSpread = true
  281. }
  282. tailEvent[1].type = types.lineEnding
  283. lineIndex = tailIndex
  284. } else if (
  285. tailEvent[1].type === types.linePrefix ||
  286. tailEvent[1].type === types.blockQuotePrefix ||
  287. tailEvent[1].type === types.blockQuotePrefixWhitespace ||
  288. tailEvent[1].type === types.blockQuoteMarker ||
  289. tailEvent[1].type === types.listItemIndent
  290. ) {
  291. // Empty
  292. } else {
  293. break
  294. }
  295. }
  296. if (
  297. firstBlankLineIndex &&
  298. (!lineIndex || firstBlankLineIndex < lineIndex)
  299. ) {
  300. listItem._spread = true
  301. }
  302. // Fix position.
  303. listItem.end = point(
  304. lineIndex ? events[lineIndex][1].start : event[1].end
  305. )
  306. events.splice(lineIndex || index, 0, ['exit', listItem, event[2]])
  307. index++
  308. length++
  309. }
  310. // Create a new list item.
  311. if (event[1].type === types.listItemPrefix) {
  312. listItem = {
  313. type: 'listItem',
  314. _spread: false,
  315. start: point(event[1].start)
  316. }
  317. events.splice(index, 0, ['enter', listItem, event[2]])
  318. index++
  319. length++
  320. firstBlankLineIndex = undefined
  321. atMarker = true
  322. }
  323. }
  324. }
  325. events[start][1]._spread = listSpread
  326. return length
  327. }
  328. function setData(key, value) {
  329. data[key] = value
  330. }
  331. function getData(key) {
  332. return data[key]
  333. }
  334. function point(d) {
  335. return {line: d.line, column: d.column, offset: d.offset}
  336. }
  337. function opener(create, and) {
  338. return open
  339. function open(token) {
  340. enter.call(this, create(token), token)
  341. if (and) and.call(this, token)
  342. }
  343. }
  344. function buffer() {
  345. this.stack.push({type: 'fragment', children: []})
  346. }
  347. function enter(node, token) {
  348. this.stack[this.stack.length - 1].children.push(node)
  349. this.stack.push(node)
  350. this.tokenStack.push(token)
  351. node.position = {start: point(token.start)}
  352. return node
  353. }
  354. function closer(and) {
  355. return close
  356. function close(token) {
  357. if (and) and.call(this, token)
  358. exit.call(this, token)
  359. }
  360. }
  361. function exit(token) {
  362. var node = this.stack.pop()
  363. var open = this.tokenStack.pop()
  364. if (!open) {
  365. throw new Error(
  366. 'Cannot close `' +
  367. token.type +
  368. '` (' +
  369. stringifyPosition({start: token.start, end: token.end}) +
  370. '): it’s not open'
  371. )
  372. } else if (open.type !== token.type) {
  373. throw new Error(
  374. 'Cannot close `' +
  375. token.type +
  376. '` (' +
  377. stringifyPosition({start: token.start, end: token.end}) +
  378. '): a different token (`' +
  379. open.type +
  380. '`, ' +
  381. stringifyPosition({start: open.start, end: open.end}) +
  382. ') is open'
  383. )
  384. }
  385. node.position.end = point(token.end)
  386. return node
  387. }
  388. function resume() {
  389. return toString(this.stack.pop())
  390. }
  391. //
  392. // Handlers.
  393. //
  394. function onenterlistordered() {
  395. setData('expectingFirstListItemValue', true)
  396. }
  397. function onenterlistitemvalue(token) {
  398. if (getData('expectingFirstListItemValue')) {
  399. this.stack[this.stack.length - 2].start = parseInt(
  400. this.sliceSerialize(token),
  401. constants.numericBaseDecimal
  402. )
  403. setData('expectingFirstListItemValue')
  404. }
  405. }
  406. function onexitcodefencedfenceinfo() {
  407. var data = this.resume()
  408. this.stack[this.stack.length - 1].lang = data
  409. }
  410. function onexitcodefencedfencemeta() {
  411. var data = this.resume()
  412. this.stack[this.stack.length - 1].meta = data
  413. }
  414. function onexitcodefencedfence() {
  415. // Exit if this is the closing fence.
  416. if (getData('flowCodeInside')) return
  417. this.buffer()
  418. setData('flowCodeInside', true)
  419. }
  420. function onexitcodefenced() {
  421. var data = this.resume()
  422. this.stack[this.stack.length - 1].value = data.replace(
  423. /^(\r?\n|\r)|(\r?\n|\r)$/g,
  424. ''
  425. )
  426. setData('flowCodeInside')
  427. }
  428. function onexitcodeindented() {
  429. var data = this.resume()
  430. this.stack[this.stack.length - 1].value = data
  431. }
  432. function onexitdefinitionlabelstring(token) {
  433. // Discard label, use the source content instead.
  434. var label = this.resume()
  435. this.stack[this.stack.length - 1].label = label
  436. this.stack[this.stack.length - 1].identifier = normalizeIdentifier(
  437. this.sliceSerialize(token)
  438. ).toLowerCase()
  439. }
  440. function onexitdefinitiontitlestring() {
  441. var data = this.resume()
  442. this.stack[this.stack.length - 1].title = data
  443. }
  444. function onexitdefinitiondestinationstring() {
  445. var data = this.resume()
  446. this.stack[this.stack.length - 1].url = data
  447. }
  448. function onexitatxheadingsequence(token) {
  449. if (!this.stack[this.stack.length - 1].depth) {
  450. this.stack[this.stack.length - 1].depth = this.sliceSerialize(
  451. token
  452. ).length
  453. }
  454. }
  455. function onexitsetextheadingtext() {
  456. setData('setextHeadingSlurpLineEnding', true)
  457. }
  458. function onexitsetextheadinglinesequence(token) {
  459. this.stack[this.stack.length - 1].depth =
  460. this.sliceSerialize(token).charCodeAt(0) === codes.equalsTo ? 1 : 2
  461. }
  462. function onexitsetextheading() {
  463. setData('setextHeadingSlurpLineEnding')
  464. }
  465. function onenterdata(token) {
  466. var siblings = this.stack[this.stack.length - 1].children
  467. var tail = siblings[siblings.length - 1]
  468. if (!tail || tail.type !== 'text') {
  469. // Add a new text node.
  470. tail = text()
  471. tail.position = {start: point(token.start)}
  472. this.stack[this.stack.length - 1].children.push(tail)
  473. }
  474. this.stack.push(tail)
  475. }
  476. function onexitdata(token) {
  477. var tail = this.stack.pop()
  478. tail.value += this.sliceSerialize(token)
  479. tail.position.end = point(token.end)
  480. }
  481. function onexitlineending(token) {
  482. var context = this.stack[this.stack.length - 1]
  483. // If we’re at a hard break, include the line ending in there.
  484. if (getData('atHardBreak')) {
  485. context.children[context.children.length - 1].position.end = point(
  486. token.end
  487. )
  488. setData('atHardBreak')
  489. return
  490. }
  491. if (
  492. !getData('setextHeadingSlurpLineEnding') &&
  493. config.canContainEols.indexOf(context.type) > -1
  494. ) {
  495. onenterdata.call(this, token)
  496. onexitdata.call(this, token)
  497. }
  498. }
  499. function onexithardbreak() {
  500. setData('atHardBreak', true)
  501. }
  502. function onexithtmlflow() {
  503. var data = this.resume()
  504. this.stack[this.stack.length - 1].value = data
  505. }
  506. function onexithtmltext() {
  507. var data = this.resume()
  508. this.stack[this.stack.length - 1].value = data
  509. }
  510. function onexitcodetext() {
  511. var data = this.resume()
  512. this.stack[this.stack.length - 1].value = data
  513. }
  514. function onexitlink() {
  515. var context = this.stack[this.stack.length - 1]
  516. // To do: clean.
  517. if (getData('inReference')) {
  518. context.type += 'Reference'
  519. context.referenceType = getData('referenceType') || 'shortcut'
  520. delete context.url
  521. delete context.title
  522. } else {
  523. delete context.identifier
  524. delete context.label
  525. delete context.referenceType
  526. }
  527. setData('referenceType')
  528. }
  529. function onexitimage() {
  530. var context = this.stack[this.stack.length - 1]
  531. // To do: clean.
  532. if (getData('inReference')) {
  533. context.type += 'Reference'
  534. context.referenceType = getData('referenceType') || 'shortcut'
  535. delete context.url
  536. delete context.title
  537. } else {
  538. delete context.identifier
  539. delete context.label
  540. delete context.referenceType
  541. }
  542. setData('referenceType')
  543. }
  544. function onexitlabeltext(token) {
  545. this.stack[this.stack.length - 2].identifier = normalizeIdentifier(
  546. this.sliceSerialize(token)
  547. ).toLowerCase()
  548. }
  549. function onexitlabel() {
  550. var fragment = this.stack[this.stack.length - 1]
  551. var value = this.resume()
  552. this.stack[this.stack.length - 1].label = value
  553. // Assume a reference.
  554. setData('inReference', true)
  555. if (this.stack[this.stack.length - 1].type === 'link') {
  556. this.stack[this.stack.length - 1].children = fragment.children
  557. } else {
  558. this.stack[this.stack.length - 1].alt = value
  559. }
  560. }
  561. function onexitresourcedestinationstring() {
  562. var data = this.resume()
  563. this.stack[this.stack.length - 1].url = data
  564. }
  565. function onexitresourcetitlestring() {
  566. var data = this.resume()
  567. this.stack[this.stack.length - 1].title = data
  568. }
  569. function onexitresource() {
  570. setData('inReference')
  571. }
  572. function onenterreference() {
  573. setData('referenceType', 'collapsed')
  574. }
  575. function onexitreferencestring(token) {
  576. var label = this.resume()
  577. this.stack[this.stack.length - 1].label = label
  578. this.stack[this.stack.length - 1].identifier = normalizeIdentifier(
  579. this.sliceSerialize(token)
  580. ).toLowerCase()
  581. setData('referenceType', 'full')
  582. }
  583. function onexitcharacterreferencemarker(token) {
  584. setData('characterReferenceType', token.type)
  585. }
  586. function onexitcharacterreferencevalue(token) {
  587. var data = this.sliceSerialize(token)
  588. var type = getData('characterReferenceType')
  589. var value
  590. var tail
  591. if (type) {
  592. value = safeFromInt(
  593. data,
  594. type === types.characterReferenceMarkerNumeric
  595. ? constants.numericBaseDecimal
  596. : constants.numericBaseHexadecimal
  597. )
  598. setData('characterReferenceType')
  599. } else {
  600. value = decode(data)
  601. }
  602. tail = this.stack.pop()
  603. tail.value += value
  604. tail.position.end = point(token.end)
  605. }
  606. function onexitautolinkprotocol(token) {
  607. onexitdata.call(this, token)
  608. this.stack[this.stack.length - 1].url = this.sliceSerialize(token)
  609. }
  610. function onexitautolinkemail(token) {
  611. onexitdata.call(this, token)
  612. this.stack[this.stack.length - 1].url =
  613. 'mailto:' + this.sliceSerialize(token)
  614. }
  615. //
  616. // Creaters.
  617. //
  618. function blockQuote() {
  619. return {type: 'blockquote', children: []}
  620. }
  621. function codeFlow() {
  622. return {type: 'code', lang: null, meta: null, value: ''}
  623. }
  624. function codeText() {
  625. return {type: 'inlineCode', value: ''}
  626. }
  627. function definition() {
  628. return {
  629. type: 'definition',
  630. identifier: '',
  631. label: null,
  632. title: null,
  633. url: ''
  634. }
  635. }
  636. function emphasis() {
  637. return {type: 'emphasis', children: []}
  638. }
  639. function heading() {
  640. return {type: 'heading', depth: undefined, children: []}
  641. }
  642. function hardBreak() {
  643. return {type: 'break'}
  644. }
  645. function html() {
  646. return {type: 'html', value: ''}
  647. }
  648. function image() {
  649. return {type: 'image', title: null, url: '', alt: null}
  650. }
  651. function link() {
  652. return {type: 'link', title: null, url: '', children: []}
  653. }
  654. function list(token) {
  655. return {
  656. type: 'list',
  657. ordered: token.type === 'listOrdered',
  658. start: null,
  659. spread: token._spread,
  660. children: []
  661. }
  662. }
  663. function listItem(token) {
  664. return {
  665. type: 'listItem',
  666. spread: token._spread,
  667. checked: null,
  668. children: []
  669. }
  670. }
  671. function paragraph() {
  672. return {type: 'paragraph', children: []}
  673. }
  674. function strong() {
  675. return {type: 'strong', children: []}
  676. }
  677. function text() {
  678. return {type: 'text', value: ''}
  679. }
  680. function thematicBreak() {
  681. return {type: 'thematicBreak'}
  682. }
  683. }
  684. function configure(config, extensions) {
  685. var index = -1
  686. while (++index < extensions.length) {
  687. extension(config, extensions[index])
  688. }
  689. return config
  690. }
  691. function extension(config, extension) {
  692. var key
  693. var left
  694. for (key in extension) {
  695. left = own.call(config, key) ? config[key] : (config[key] = {})
  696. if (key === 'canContainEols' || key === 'transforms') {
  697. config[key] = [].concat(left, extension[key])
  698. } else {
  699. Object.assign(left, extension[key])
  700. }
  701. }
  702. }