index.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. // Main parser class
  2. 'use strict';
  3. var utils = require('./common/utils');
  4. var helpers = require('./helpers');
  5. var Renderer = require('./renderer');
  6. var ParserCore = require('./parser_core');
  7. var ParserBlock = require('./parser_block');
  8. var ParserInline = require('./parser_inline');
  9. var LinkifyIt = require('linkify-it');
  10. var mdurl = require('mdurl');
  11. var punycode = require('punycode');
  12. var config = {
  13. 'default': require('./presets/default'),
  14. zero: require('./presets/zero'),
  15. commonmark: require('./presets/commonmark')
  16. };
  17. ////////////////////////////////////////////////////////////////////////////////
  18. //
  19. // This validator can prohibit more than really needed to prevent XSS. It's a
  20. // tradeoff to keep code simple and to be secure by default.
  21. //
  22. // If you need different setup - override validator method as you wish. Or
  23. // replace it with dummy function and use external sanitizer.
  24. //
  25. var BAD_PROTO_RE = /^(vbscript|javascript|file|data):/;
  26. var GOOD_DATA_RE = /^data:image\/(gif|png|jpeg|webp);/;
  27. function validateLink(url) {
  28. // url should be normalized at this point, and existing entities are decoded
  29. var str = url.trim().toLowerCase();
  30. return BAD_PROTO_RE.test(str) ? (GOOD_DATA_RE.test(str) ? true : false) : true;
  31. }
  32. ////////////////////////////////////////////////////////////////////////////////
  33. var RECODE_HOSTNAME_FOR = [ 'http:', 'https:', 'mailto:' ];
  34. function normalizeLink(url) {
  35. var parsed = mdurl.parse(url, true);
  36. if (parsed.hostname) {
  37. // Encode hostnames in urls like:
  38. // `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
  39. //
  40. // We don't encode unknown schemas, because it's likely that we encode
  41. // something we shouldn't (e.g. `skype:name` treated as `skype:host`)
  42. //
  43. if (!parsed.protocol || RECODE_HOSTNAME_FOR.indexOf(parsed.protocol) >= 0) {
  44. try {
  45. parsed.hostname = punycode.toASCII(parsed.hostname);
  46. } catch (er) { /**/ }
  47. }
  48. }
  49. return mdurl.encode(mdurl.format(parsed));
  50. }
  51. function normalizeLinkText(url) {
  52. var parsed = mdurl.parse(url, true);
  53. if (parsed.hostname) {
  54. // Encode hostnames in urls like:
  55. // `http://host/`, `https://host/`, `mailto:user@host`, `//host/`
  56. //
  57. // We don't encode unknown schemas, because it's likely that we encode
  58. // something we shouldn't (e.g. `skype:name` treated as `skype:host`)
  59. //
  60. if (!parsed.protocol || RECODE_HOSTNAME_FOR.indexOf(parsed.protocol) >= 0) {
  61. try {
  62. parsed.hostname = punycode.toUnicode(parsed.hostname);
  63. } catch (er) { /**/ }
  64. }
  65. }
  66. return mdurl.decode(mdurl.format(parsed));
  67. }
  68. /**
  69. * class MarkdownIt
  70. *
  71. * Main parser/renderer class.
  72. *
  73. * ##### Usage
  74. *
  75. * ```javascript
  76. * // node.js, "classic" way:
  77. * var MarkdownIt = require('markdown-it'),
  78. * md = new MarkdownIt();
  79. * var result = md.render('# markdown-it rulezz!');
  80. *
  81. * // node.js, the same, but with sugar:
  82. * var md = require('markdown-it')();
  83. * var result = md.render('# markdown-it rulezz!');
  84. *
  85. * // browser without AMD, added to "window" on script load
  86. * // Note, there are no dash.
  87. * var md = window.markdownit();
  88. * var result = md.render('# markdown-it rulezz!');
  89. * ```
  90. *
  91. * Single line rendering, without paragraph wrap:
  92. *
  93. * ```javascript
  94. * var md = require('markdown-it')();
  95. * var result = md.renderInline('__markdown-it__ rulezz!');
  96. * ```
  97. **/
  98. /**
  99. * new MarkdownIt([presetName, options])
  100. * - presetName (String): optional, `commonmark` / `zero`
  101. * - options (Object)
  102. *
  103. * Creates parser instanse with given config. Can be called without `new`.
  104. *
  105. * ##### presetName
  106. *
  107. * MarkdownIt provides named presets as a convenience to quickly
  108. * enable/disable active syntax rules and options for common use cases.
  109. *
  110. * - ["commonmark"](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/commonmark.js) -
  111. * configures parser to strict [CommonMark](http://commonmark.org/) mode.
  112. * - [default](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/default.js) -
  113. * similar to GFM, used when no preset name given. Enables all available rules,
  114. * but still without html, typographer & autolinker.
  115. * - ["zero"](https://github.com/markdown-it/markdown-it/blob/master/lib/presets/zero.js) -
  116. * all rules disabled. Useful to quickly setup your config via `.enable()`.
  117. * For example, when you need only `bold` and `italic` markup and nothing else.
  118. *
  119. * ##### options:
  120. *
  121. * - __html__ - `false`. Set `true` to enable HTML tags in source. Be careful!
  122. * That's not safe! You may need external sanitizer to protect output from XSS.
  123. * It's better to extend features via plugins, instead of enabling HTML.
  124. * - __xhtmlOut__ - `false`. Set `true` to add '/' when closing single tags
  125. * (`<br />`). This is needed only for full CommonMark compatibility. In real
  126. * world you will need HTML output.
  127. * - __breaks__ - `false`. Set `true` to convert `\n` in paragraphs into `<br>`.
  128. * - __langPrefix__ - `language-`. CSS language class prefix for fenced blocks.
  129. * Can be useful for external highlighters.
  130. * - __linkify__ - `false`. Set `true` to autoconvert URL-like text to links.
  131. * - __typographer__ - `false`. Set `true` to enable [some language-neutral
  132. * replacement](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/replacements.js) +
  133. * quotes beautification (smartquotes).
  134. * - __quotes__ - `“”‘’`, String or Array. Double + single quotes replacement
  135. * pairs, when typographer enabled and smartquotes on. For example, you can
  136. * use `'«»„“'` for Russian, `'„“‚‘'` for German, and
  137. * `['«\xA0', '\xA0»', '‹\xA0', '\xA0›']` for French (including nbsp).
  138. * - __highlight__ - `null`. Highlighter function for fenced code blocks.
  139. * Highlighter `function (str, lang)` should return escaped HTML. It can also
  140. * return empty string if the source was not changed and should be escaped
  141. * externaly. If result starts with <pre... internal wrapper is skipped.
  142. *
  143. * ##### Example
  144. *
  145. * ```javascript
  146. * // commonmark mode
  147. * var md = require('markdown-it')('commonmark');
  148. *
  149. * // default mode
  150. * var md = require('markdown-it')();
  151. *
  152. * // enable everything
  153. * var md = require('markdown-it')({
  154. * html: true,
  155. * linkify: true,
  156. * typographer: true
  157. * });
  158. * ```
  159. *
  160. * ##### Syntax highlighting
  161. *
  162. * ```js
  163. * var hljs = require('highlight.js') // https://highlightjs.org/
  164. *
  165. * var md = require('markdown-it')({
  166. * highlight: function (str, lang) {
  167. * if (lang && hljs.getLanguage(lang)) {
  168. * try {
  169. * return hljs.highlight(lang, str, true).value;
  170. * } catch (__) {}
  171. * }
  172. *
  173. * return ''; // use external default escaping
  174. * }
  175. * });
  176. * ```
  177. *
  178. * Or with full wrapper override (if you need assign class to `<pre>`):
  179. *
  180. * ```javascript
  181. * var hljs = require('highlight.js') // https://highlightjs.org/
  182. *
  183. * // Actual default values
  184. * var md = require('markdown-it')({
  185. * highlight: function (str, lang) {
  186. * if (lang && hljs.getLanguage(lang)) {
  187. * try {
  188. * return '<pre class="hljs"><code>' +
  189. * hljs.highlight(lang, str, true).value +
  190. * '</code></pre>';
  191. * } catch (__) {}
  192. * }
  193. *
  194. * return '<pre class="hljs"><code>' + md.utils.escapeHtml(str) + '</code></pre>';
  195. * }
  196. * });
  197. * ```
  198. *
  199. **/
  200. function MarkdownIt(presetName, options) {
  201. if (!(this instanceof MarkdownIt)) {
  202. return new MarkdownIt(presetName, options);
  203. }
  204. if (!options) {
  205. if (!utils.isString(presetName)) {
  206. options = presetName || {};
  207. presetName = 'default';
  208. }
  209. }
  210. /**
  211. * MarkdownIt#inline -> ParserInline
  212. *
  213. * Instance of [[ParserInline]]. You may need it to add new rules when
  214. * writing plugins. For simple rules control use [[MarkdownIt.disable]] and
  215. * [[MarkdownIt.enable]].
  216. **/
  217. this.inline = new ParserInline();
  218. /**
  219. * MarkdownIt#block -> ParserBlock
  220. *
  221. * Instance of [[ParserBlock]]. You may need it to add new rules when
  222. * writing plugins. For simple rules control use [[MarkdownIt.disable]] and
  223. * [[MarkdownIt.enable]].
  224. **/
  225. this.block = new ParserBlock();
  226. /**
  227. * MarkdownIt#core -> Core
  228. *
  229. * Instance of [[Core]] chain executor. You may need it to add new rules when
  230. * writing plugins. For simple rules control use [[MarkdownIt.disable]] and
  231. * [[MarkdownIt.enable]].
  232. **/
  233. this.core = new ParserCore();
  234. /**
  235. * MarkdownIt#renderer -> Renderer
  236. *
  237. * Instance of [[Renderer]]. Use it to modify output look. Or to add rendering
  238. * rules for new token types, generated by plugins.
  239. *
  240. * ##### Example
  241. *
  242. * ```javascript
  243. * var md = require('markdown-it')();
  244. *
  245. * function myToken(tokens, idx, options, env, self) {
  246. * //...
  247. * return result;
  248. * };
  249. *
  250. * md.renderer.rules['my_token'] = myToken
  251. * ```
  252. *
  253. * See [[Renderer]] docs and [source code](https://github.com/markdown-it/markdown-it/blob/master/lib/renderer.js).
  254. **/
  255. this.renderer = new Renderer();
  256. /**
  257. * MarkdownIt#linkify -> LinkifyIt
  258. *
  259. * [linkify-it](https://github.com/markdown-it/linkify-it) instance.
  260. * Used by [linkify](https://github.com/markdown-it/markdown-it/blob/master/lib/rules_core/linkify.js)
  261. * rule.
  262. **/
  263. this.linkify = new LinkifyIt();
  264. /**
  265. * MarkdownIt#validateLink(url) -> Boolean
  266. *
  267. * Link validation function. CommonMark allows too much in links. By default
  268. * we disable `javascript:`, `vbscript:`, `file:` schemas, and almost all `data:...` schemas
  269. * except some embedded image types.
  270. *
  271. * You can change this behaviour:
  272. *
  273. * ```javascript
  274. * var md = require('markdown-it')();
  275. * // enable everything
  276. * md.validateLink = function () { return true; }
  277. * ```
  278. **/
  279. this.validateLink = validateLink;
  280. /**
  281. * MarkdownIt#normalizeLink(url) -> String
  282. *
  283. * Function used to encode link url to a machine-readable format,
  284. * which includes url-encoding, punycode, etc.
  285. **/
  286. this.normalizeLink = normalizeLink;
  287. /**
  288. * MarkdownIt#normalizeLinkText(url) -> String
  289. *
  290. * Function used to decode link url to a human-readable format`
  291. **/
  292. this.normalizeLinkText = normalizeLinkText;
  293. // Expose utils & helpers for easy acces from plugins
  294. /**
  295. * MarkdownIt#utils -> utils
  296. *
  297. * Assorted utility functions, useful to write plugins. See details
  298. * [here](https://github.com/markdown-it/markdown-it/blob/master/lib/common/utils.js).
  299. **/
  300. this.utils = utils;
  301. /**
  302. * MarkdownIt#helpers -> helpers
  303. *
  304. * Link components parser functions, useful to write plugins. See details
  305. * [here](https://github.com/markdown-it/markdown-it/blob/master/lib/helpers).
  306. **/
  307. this.helpers = utils.assign({}, helpers);
  308. this.options = {};
  309. this.configure(presetName);
  310. if (options) { this.set(options); }
  311. }
  312. /** chainable
  313. * MarkdownIt.set(options)
  314. *
  315. * Set parser options (in the same format as in constructor). Probably, you
  316. * will never need it, but you can change options after constructor call.
  317. *
  318. * ##### Example
  319. *
  320. * ```javascript
  321. * var md = require('markdown-it')()
  322. * .set({ html: true, breaks: true })
  323. * .set({ typographer, true });
  324. * ```
  325. *
  326. * __Note:__ To achieve the best possible performance, don't modify a
  327. * `markdown-it` instance options on the fly. If you need multiple configurations
  328. * it's best to create multiple instances and initialize each with separate
  329. * config.
  330. **/
  331. MarkdownIt.prototype.set = function (options) {
  332. utils.assign(this.options, options);
  333. return this;
  334. };
  335. /** chainable, internal
  336. * MarkdownIt.configure(presets)
  337. *
  338. * Batch load of all options and compenent settings. This is internal method,
  339. * and you probably will not need it. But if you with - see available presets
  340. * and data structure [here](https://github.com/markdown-it/markdown-it/tree/master/lib/presets)
  341. *
  342. * We strongly recommend to use presets instead of direct config loads. That
  343. * will give better compatibility with next versions.
  344. **/
  345. MarkdownIt.prototype.configure = function (presets) {
  346. var self = this, presetName;
  347. if (utils.isString(presets)) {
  348. presetName = presets;
  349. presets = config[presetName];
  350. if (!presets) { throw new Error('Wrong `markdown-it` preset "' + presetName + '", check name'); }
  351. }
  352. if (!presets) { throw new Error('Wrong `markdown-it` preset, can\'t be empty'); }
  353. if (presets.options) { self.set(presets.options); }
  354. if (presets.components) {
  355. Object.keys(presets.components).forEach(function (name) {
  356. if (presets.components[name].rules) {
  357. self[name].ruler.enableOnly(presets.components[name].rules);
  358. }
  359. if (presets.components[name].rules2) {
  360. self[name].ruler2.enableOnly(presets.components[name].rules2);
  361. }
  362. });
  363. }
  364. return this;
  365. };
  366. /** chainable
  367. * MarkdownIt.enable(list, ignoreInvalid)
  368. * - list (String|Array): rule name or list of rule names to enable
  369. * - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found.
  370. *
  371. * Enable list or rules. It will automatically find appropriate components,
  372. * containing rules with given names. If rule not found, and `ignoreInvalid`
  373. * not set - throws exception.
  374. *
  375. * ##### Example
  376. *
  377. * ```javascript
  378. * var md = require('markdown-it')()
  379. * .enable(['sub', 'sup'])
  380. * .disable('smartquotes');
  381. * ```
  382. **/
  383. MarkdownIt.prototype.enable = function (list, ignoreInvalid) {
  384. var result = [];
  385. if (!Array.isArray(list)) { list = [ list ]; }
  386. [ 'core', 'block', 'inline' ].forEach(function (chain) {
  387. result = result.concat(this[chain].ruler.enable(list, true));
  388. }, this);
  389. result = result.concat(this.inline.ruler2.enable(list, true));
  390. var missed = list.filter(function (name) { return result.indexOf(name) < 0; });
  391. if (missed.length && !ignoreInvalid) {
  392. throw new Error('MarkdownIt. Failed to enable unknown rule(s): ' + missed);
  393. }
  394. return this;
  395. };
  396. /** chainable
  397. * MarkdownIt.disable(list, ignoreInvalid)
  398. * - list (String|Array): rule name or list of rule names to disable.
  399. * - ignoreInvalid (Boolean): set `true` to ignore errors when rule not found.
  400. *
  401. * The same as [[MarkdownIt.enable]], but turn specified rules off.
  402. **/
  403. MarkdownIt.prototype.disable = function (list, ignoreInvalid) {
  404. var result = [];
  405. if (!Array.isArray(list)) { list = [ list ]; }
  406. [ 'core', 'block', 'inline' ].forEach(function (chain) {
  407. result = result.concat(this[chain].ruler.disable(list, true));
  408. }, this);
  409. result = result.concat(this.inline.ruler2.disable(list, true));
  410. var missed = list.filter(function (name) { return result.indexOf(name) < 0; });
  411. if (missed.length && !ignoreInvalid) {
  412. throw new Error('MarkdownIt. Failed to disable unknown rule(s): ' + missed);
  413. }
  414. return this;
  415. };
  416. /** chainable
  417. * MarkdownIt.use(plugin, params)
  418. *
  419. * Load specified plugin with given params into current parser instance.
  420. * It's just a sugar to call `plugin(md, params)` with curring.
  421. *
  422. * ##### Example
  423. *
  424. * ```javascript
  425. * var iterator = require('markdown-it-for-inline');
  426. * var md = require('markdown-it')()
  427. * .use(iterator, 'foo_replace', 'text', function (tokens, idx) {
  428. * tokens[idx].content = tokens[idx].content.replace(/foo/g, 'bar');
  429. * });
  430. * ```
  431. **/
  432. MarkdownIt.prototype.use = function (plugin /*, params, ... */) {
  433. var args = [ this ].concat(Array.prototype.slice.call(arguments, 1));
  434. plugin.apply(plugin, args);
  435. return this;
  436. };
  437. /** internal
  438. * MarkdownIt.parse(src, env) -> Array
  439. * - src (String): source string
  440. * - env (Object): environment sandbox
  441. *
  442. * Parse input string and returns list of block tokens (special token type
  443. * "inline" will contain list of inline tokens). You should not call this
  444. * method directly, until you write custom renderer (for example, to produce
  445. * AST).
  446. *
  447. * `env` is used to pass data between "distributed" rules and return additional
  448. * metadata like reference info, needed for the renderer. It also can be used to
  449. * inject data in specific cases. Usually, you will be ok to pass `{}`,
  450. * and then pass updated object to renderer.
  451. **/
  452. MarkdownIt.prototype.parse = function (src, env) {
  453. if (typeof src !== 'string') {
  454. throw new Error('Input data should be a String');
  455. }
  456. var state = new this.core.State(src, this, env);
  457. this.core.process(state);
  458. return state.tokens;
  459. };
  460. /**
  461. * MarkdownIt.render(src [, env]) -> String
  462. * - src (String): source string
  463. * - env (Object): environment sandbox
  464. *
  465. * Render markdown string into html. It does all magic for you :).
  466. *
  467. * `env` can be used to inject additional metadata (`{}` by default).
  468. * But you will not need it with high probability. See also comment
  469. * in [[MarkdownIt.parse]].
  470. **/
  471. MarkdownIt.prototype.render = function (src, env) {
  472. env = env || {};
  473. return this.renderer.render(this.parse(src, env), this.options, env);
  474. };
  475. /** internal
  476. * MarkdownIt.parseInline(src, env) -> Array
  477. * - src (String): source string
  478. * - env (Object): environment sandbox
  479. *
  480. * The same as [[MarkdownIt.parse]] but skip all block rules. It returns the
  481. * block tokens list with the single `inline` element, containing parsed inline
  482. * tokens in `children` property. Also updates `env` object.
  483. **/
  484. MarkdownIt.prototype.parseInline = function (src, env) {
  485. var state = new this.core.State(src, this, env);
  486. state.inlineMode = true;
  487. this.core.process(state);
  488. return state.tokens;
  489. };
  490. /**
  491. * MarkdownIt.renderInline(src [, env]) -> String
  492. * - src (String): source string
  493. * - env (Object): environment sandbox
  494. *
  495. * Similar to [[MarkdownIt.render]] but for single paragraph content. Result
  496. * will NOT be wrapped into `<p>` tags.
  497. **/
  498. MarkdownIt.prototype.renderInline = function (src, env) {
  499. env = env || {};
  500. return this.renderer.render(this.parseInline(src, env), this.options, env);
  501. };
  502. module.exports = MarkdownIt;