text.js 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. 'use strict'
  2. Object.defineProperty(exports, '__esModule', {value: true})
  3. var codes = require('../character/codes.js')
  4. var assign = require('../constant/assign.js')
  5. var constants = require('../constant/constants.js')
  6. var types = require('../constant/types.js')
  7. var shallow = require('../util/shallow.js')
  8. var text = initializeFactory('text')
  9. var string = initializeFactory('string')
  10. var resolver = {resolveAll: createResolver()}
  11. function initializeFactory(field) {
  12. return {
  13. tokenize: initializeText,
  14. resolveAll: createResolver(
  15. field === 'text' ? resolveAllLineSuffixes : undefined
  16. )
  17. }
  18. function initializeText(effects) {
  19. var self = this
  20. var constructs = this.parser.constructs[field]
  21. var text = effects.attempt(constructs, start, notText)
  22. return start
  23. function start(code) {
  24. return atBreak(code) ? text(code) : notText(code)
  25. }
  26. function notText(code) {
  27. if (code === codes.eof) {
  28. effects.consume(code)
  29. return
  30. }
  31. effects.enter(types.data)
  32. effects.consume(code)
  33. return data
  34. }
  35. function data(code) {
  36. if (atBreak(code)) {
  37. effects.exit(types.data)
  38. return text(code)
  39. }
  40. // Data.
  41. effects.consume(code)
  42. return data
  43. }
  44. function atBreak(code) {
  45. var list = constructs[code]
  46. var index = -1
  47. if (code === codes.eof) {
  48. return true
  49. }
  50. if (list) {
  51. while (++index < list.length) {
  52. if (
  53. !list[index].previous ||
  54. list[index].previous.call(self, self.previous)
  55. ) {
  56. return true
  57. }
  58. }
  59. }
  60. }
  61. }
  62. }
  63. function createResolver(extraResolver) {
  64. return resolveAllText
  65. function resolveAllText(events, context) {
  66. var index = -1
  67. var enter
  68. // A rather boring computation (to merge adjacent `data` events) which
  69. // improves mm performance by 29%.
  70. while (++index <= events.length) {
  71. if (enter === undefined) {
  72. if (events[index] && events[index][1].type === types.data) {
  73. enter = index
  74. index++
  75. }
  76. } else if (!events[index] || events[index][1].type !== types.data) {
  77. // Don’t do anything if there is one data token.
  78. if (index !== enter + 2) {
  79. events[enter][1].end = events[index - 1][1].end
  80. events.splice(enter + 2, index - enter - 2)
  81. index = enter + 2
  82. }
  83. enter = undefined
  84. }
  85. }
  86. return extraResolver ? extraResolver(events, context) : events
  87. }
  88. }
  89. // A rather ugly set of instructions which again looks at chunks in the input
  90. // stream.
  91. // The reason to do this here is that it is *much* faster to parse in reverse.
  92. // And that we can’t hook into `null` to split the line suffix before an EOF.
  93. // To do: figure out if we can make this into a clean utility, or even in core.
  94. // As it will be useful for GFMs literal autolink extension (and maybe even
  95. // tables?)
  96. function resolveAllLineSuffixes(events, context) {
  97. var eventIndex = -1
  98. var chunks
  99. var data
  100. var chunk
  101. var index
  102. var bufferIndex
  103. var size
  104. var tabs
  105. var token
  106. while (++eventIndex <= events.length) {
  107. if (
  108. (eventIndex === events.length ||
  109. events[eventIndex][1].type === types.lineEnding) &&
  110. events[eventIndex - 1][1].type === types.data
  111. ) {
  112. data = events[eventIndex - 1][1]
  113. chunks = context.sliceStream(data)
  114. index = chunks.length
  115. bufferIndex = -1
  116. size = 0
  117. tabs = undefined
  118. while (index--) {
  119. chunk = chunks[index]
  120. if (typeof chunk === 'string') {
  121. bufferIndex = chunk.length
  122. while (chunk.charCodeAt(bufferIndex - 1) === codes.space) {
  123. size++
  124. bufferIndex--
  125. }
  126. if (bufferIndex) break
  127. bufferIndex = -1
  128. }
  129. // Number
  130. else if (chunk === codes.horizontalTab) {
  131. tabs = true
  132. size++
  133. } else if (chunk === codes.virtualSpace);
  134. else {
  135. // Replacement character, exit.
  136. index++
  137. break
  138. }
  139. }
  140. if (size) {
  141. token = {
  142. type:
  143. eventIndex === events.length ||
  144. tabs ||
  145. size < constants.hardBreakPrefixSizeMin
  146. ? types.lineSuffix
  147. : types.hardBreakTrailing,
  148. start: {
  149. line: data.end.line,
  150. column: data.end.column - size,
  151. offset: data.end.offset - size,
  152. _index: data.start._index + index,
  153. _bufferIndex: index
  154. ? bufferIndex
  155. : data.start._bufferIndex + bufferIndex
  156. },
  157. end: shallow(data.end)
  158. }
  159. data.end = shallow(token.start)
  160. if (data.start.offset === data.end.offset) {
  161. assign(data, token)
  162. } else {
  163. events.splice(
  164. eventIndex,
  165. 0,
  166. ['enter', token, context],
  167. ['exit', token, context]
  168. )
  169. eventIndex += 2
  170. }
  171. }
  172. eventIndex++
  173. }
  174. }
  175. return events
  176. }
  177. exports.resolver = resolver
  178. exports.string = string
  179. exports.text = text