subtokenize.mjs 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. export default subtokenize
  2. import assert from 'assert'
  3. import codes from '../character/codes.mjs'
  4. import assign from '../constant/assign.mjs'
  5. import types from '../constant/types.mjs'
  6. import chunkedSplice from './chunked-splice.mjs'
  7. import shallow from './shallow.mjs'
  8. function subtokenize(events) {
  9. var jumps = {}
  10. var index = -1
  11. var event
  12. var lineIndex
  13. var otherIndex
  14. var otherEvent
  15. var parameters
  16. var subevents
  17. var more
  18. while (++index < events.length) {
  19. while (index in jumps) {
  20. index = jumps[index]
  21. }
  22. event = events[index]
  23. // Add a hook for the GFM tasklist extension, which needs to know if text
  24. // is in the first content of a list item.
  25. if (
  26. index &&
  27. event[1].type === types.chunkFlow &&
  28. events[index - 1][1].type === types.listItemPrefix
  29. ) {
  30. subevents = event[1]._tokenizer.events
  31. otherIndex = 0
  32. if (
  33. otherIndex < subevents.length &&
  34. subevents[otherIndex][1].type === types.lineEndingBlank
  35. ) {
  36. otherIndex += 2
  37. }
  38. if (
  39. otherIndex < subevents.length &&
  40. subevents[otherIndex][1].type === types.content
  41. ) {
  42. while (++otherIndex < subevents.length) {
  43. if (subevents[otherIndex][1].type === types.content) {
  44. break
  45. }
  46. if (subevents[otherIndex][1].type === types.chunkText) {
  47. subevents[otherIndex][1].isInFirstContentOfListItem = true
  48. otherIndex++
  49. }
  50. }
  51. }
  52. }
  53. // Enter.
  54. if (event[0] === 'enter') {
  55. if (event[1].contentType) {
  56. assign(jumps, subcontent(events, index))
  57. index = jumps[index]
  58. more = true
  59. }
  60. }
  61. // Exit.
  62. else if (event[1]._container || event[1]._movePreviousLineEndings) {
  63. otherIndex = index
  64. lineIndex = undefined
  65. while (otherIndex--) {
  66. otherEvent = events[otherIndex]
  67. if (
  68. otherEvent[1].type === types.lineEnding ||
  69. otherEvent[1].type === types.lineEndingBlank
  70. ) {
  71. if (otherEvent[0] === 'enter') {
  72. if (lineIndex) {
  73. events[lineIndex][1].type = types.lineEndingBlank
  74. }
  75. otherEvent[1].type = types.lineEnding
  76. lineIndex = otherIndex
  77. }
  78. } else {
  79. break
  80. }
  81. }
  82. if (lineIndex) {
  83. // Fix position.
  84. event[1].end = shallow(events[lineIndex][1].start)
  85. // Switch container exit w/ line endings.
  86. parameters = events.slice(lineIndex, index)
  87. parameters.unshift(event)
  88. chunkedSplice(events, lineIndex, index - lineIndex + 1, parameters)
  89. }
  90. }
  91. }
  92. return !more
  93. }
  94. function subcontent(events, eventIndex) {
  95. var token = events[eventIndex][1]
  96. var context = events[eventIndex][2]
  97. var startPosition = eventIndex - 1
  98. var startPositions = []
  99. var tokenizer =
  100. token._tokenizer || context.parser[token.contentType](token.start)
  101. var childEvents = tokenizer.events
  102. var jumps = []
  103. var gaps = {}
  104. var stream
  105. var previous
  106. var index
  107. var entered
  108. var end
  109. var adjust
  110. // Loop forward through the linked tokens to pass them in order to the
  111. // subtokenizer.
  112. while (token) {
  113. // Find the position of the event for this token.
  114. while (events[++startPosition][1] !== token) {
  115. // Empty.
  116. }
  117. startPositions.push(startPosition)
  118. if (!token._tokenizer) {
  119. stream = context.sliceStream(token)
  120. if (!token.next) {
  121. stream.push(codes.eof)
  122. }
  123. if (previous) {
  124. tokenizer.defineSkip(token.start)
  125. }
  126. if (token.isInFirstContentOfListItem) {
  127. tokenizer._gfmTasklistFirstContentOfListItem = true
  128. }
  129. tokenizer.write(stream)
  130. if (token.isInFirstContentOfListItem) {
  131. tokenizer._gfmTasklistFirstContentOfListItem = undefined
  132. }
  133. }
  134. // Unravel the next token.
  135. previous = token
  136. token = token.next
  137. }
  138. // Now, loop back through all events (and linked tokens), to figure out which
  139. // parts belong where.
  140. token = previous
  141. index = childEvents.length
  142. while (index--) {
  143. // Make sure we’ve at least seen something (final eol is part of the last
  144. // token).
  145. if (childEvents[index][0] === 'enter') {
  146. entered = true
  147. } else if (
  148. // Find a void token that includes a break.
  149. entered &&
  150. childEvents[index][1].type === childEvents[index - 1][1].type &&
  151. childEvents[index][1].start.line !== childEvents[index][1].end.line
  152. ) {
  153. add(childEvents.slice(index + 1, end))
  154. assert(token.previous, 'expected a previous token')
  155. // Help GC.
  156. token._tokenizer = token.next = undefined
  157. token = token.previous
  158. end = index + 1
  159. }
  160. }
  161. assert(!token.previous, 'expected no previous token')
  162. // Help GC.
  163. tokenizer.events = token._tokenizer = token.next = undefined
  164. // Do head:
  165. add(childEvents.slice(0, end))
  166. index = -1
  167. adjust = 0
  168. while (++index < jumps.length) {
  169. gaps[adjust + jumps[index][0]] = adjust + jumps[index][1]
  170. adjust += jumps[index][1] - jumps[index][0] - 1
  171. }
  172. return gaps
  173. function add(slice) {
  174. var start = startPositions.pop()
  175. jumps.unshift([start, start + slice.length - 1])
  176. chunkedSplice(events, start, 2, slice)
  177. }
  178. }