no-misleading-character-class.js 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. /**
  2. * @author Toru Nagashima <https://github.com/mysticatea>
  3. */
  4. "use strict";
  5. const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("eslint-utils");
  6. const { RegExpParser, visitRegExpAST } = require("regexpp");
  7. const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
  8. //------------------------------------------------------------------------------
  9. // Helpers
  10. //------------------------------------------------------------------------------
  11. /**
  12. * Iterate character sequences of a given nodes.
  13. *
  14. * CharacterClassRange syntax can steal a part of character sequence,
  15. * so this function reverts CharacterClassRange syntax and restore the sequence.
  16. * @param {regexpp.AST.CharacterClassElement[]} nodes The node list to iterate character sequences.
  17. * @returns {IterableIterator<number[]>} The list of character sequences.
  18. */
  19. function *iterateCharacterSequence(nodes) {
  20. let seq = [];
  21. for (const node of nodes) {
  22. switch (node.type) {
  23. case "Character":
  24. seq.push(node.value);
  25. break;
  26. case "CharacterClassRange":
  27. seq.push(node.min.value);
  28. yield seq;
  29. seq = [node.max.value];
  30. break;
  31. case "CharacterSet":
  32. if (seq.length > 0) {
  33. yield seq;
  34. seq = [];
  35. }
  36. break;
  37. // no default
  38. }
  39. }
  40. if (seq.length > 0) {
  41. yield seq;
  42. }
  43. }
  44. const hasCharacterSequence = {
  45. surrogatePairWithoutUFlag(chars) {
  46. return chars.some((c, i) => i !== 0 && isSurrogatePair(chars[i - 1], c));
  47. },
  48. combiningClass(chars) {
  49. return chars.some((c, i) => (
  50. i !== 0 &&
  51. isCombiningCharacter(c) &&
  52. !isCombiningCharacter(chars[i - 1])
  53. ));
  54. },
  55. emojiModifier(chars) {
  56. return chars.some((c, i) => (
  57. i !== 0 &&
  58. isEmojiModifier(c) &&
  59. !isEmojiModifier(chars[i - 1])
  60. ));
  61. },
  62. regionalIndicatorSymbol(chars) {
  63. return chars.some((c, i) => (
  64. i !== 0 &&
  65. isRegionalIndicatorSymbol(c) &&
  66. isRegionalIndicatorSymbol(chars[i - 1])
  67. ));
  68. },
  69. zwj(chars) {
  70. const lastIndex = chars.length - 1;
  71. return chars.some((c, i) => (
  72. i !== 0 &&
  73. i !== lastIndex &&
  74. c === 0x200d &&
  75. chars[i - 1] !== 0x200d &&
  76. chars[i + 1] !== 0x200d
  77. ));
  78. }
  79. };
  80. const kinds = Object.keys(hasCharacterSequence);
  81. //------------------------------------------------------------------------------
  82. // Rule Definition
  83. //------------------------------------------------------------------------------
  84. module.exports = {
  85. meta: {
  86. type: "problem",
  87. docs: {
  88. description: "disallow characters which are made with multiple code points in character class syntax",
  89. category: "Possible Errors",
  90. recommended: true,
  91. url: "https://eslint.org/docs/rules/no-misleading-character-class"
  92. },
  93. schema: [],
  94. messages: {
  95. surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
  96. combiningClass: "Unexpected combined character in character class.",
  97. emojiModifier: "Unexpected modified Emoji in character class.",
  98. regionalIndicatorSymbol: "Unexpected national flag in character class.",
  99. zwj: "Unexpected joined character sequence in character class."
  100. }
  101. },
  102. create(context) {
  103. const parser = new RegExpParser();
  104. /**
  105. * Verify a given regular expression.
  106. * @param {Node} node The node to report.
  107. * @param {string} pattern The regular expression pattern to verify.
  108. * @param {string} flags The flags of the regular expression.
  109. * @returns {void}
  110. */
  111. function verify(node, pattern, flags) {
  112. const has = {
  113. surrogatePairWithoutUFlag: false,
  114. combiningClass: false,
  115. variationSelector: false,
  116. emojiModifier: false,
  117. regionalIndicatorSymbol: false,
  118. zwj: false
  119. };
  120. let patternNode;
  121. try {
  122. patternNode = parser.parsePattern(
  123. pattern,
  124. 0,
  125. pattern.length,
  126. flags.includes("u")
  127. );
  128. } catch {
  129. // Ignore regular expressions with syntax errors
  130. return;
  131. }
  132. visitRegExpAST(patternNode, {
  133. onCharacterClassEnter(ccNode) {
  134. for (const chars of iterateCharacterSequence(ccNode.elements)) {
  135. for (const kind of kinds) {
  136. has[kind] = has[kind] || hasCharacterSequence[kind](chars);
  137. }
  138. }
  139. }
  140. });
  141. for (const kind of kinds) {
  142. if (has[kind]) {
  143. context.report({ node, messageId: kind });
  144. }
  145. }
  146. }
  147. return {
  148. "Literal[regex]"(node) {
  149. verify(node, node.regex.pattern, node.regex.flags);
  150. },
  151. "Program"() {
  152. const scope = context.getScope();
  153. const tracker = new ReferenceTracker(scope);
  154. /*
  155. * Iterate calls of RegExp.
  156. * E.g., `new RegExp()`, `RegExp()`, `new window.RegExp()`,
  157. * `const {RegExp: a} = window; new a()`, etc...
  158. */
  159. for (const { node } of tracker.iterateGlobalReferences({
  160. RegExp: { [CALL]: true, [CONSTRUCT]: true }
  161. })) {
  162. const [patternNode, flagsNode] = node.arguments;
  163. const pattern = getStringIfConstant(patternNode, scope);
  164. const flags = getStringIfConstant(flagsNode, scope);
  165. if (typeof pattern === "string") {
  166. verify(node, pattern, flags || "");
  167. }
  168. }
  169. }
  170. };
  171. }
  172. };