parser.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. "use strict";
  2. /**
  3. * An AST parser for ICU MessageFormat strings
  4. *
  5. * @packageDocumentation
  6. * @example
  7. * ```
  8. * import { parse } from '@messageformat/parser
  9. *
  10. * parse('So {wow}.')
  11. * [ { type: 'content', value: 'So ' },
  12. * { type: 'argument', arg: 'wow' },
  13. * { type: 'content', value: '.' } ]
  14. *
  15. *
  16. * parse('Such { thing }. { count, selectordinal, one {First} two {Second}' +
  17. * ' few {Third} other {#th} } word.')
  18. * [ { type: 'content', value: 'Such ' },
  19. * { type: 'argument', arg: 'thing' },
  20. * { type: 'content', value: '. ' },
  21. * { type: 'selectordinal',
  22. * arg: 'count',
  23. * cases: [
  24. * { key: 'one', tokens: [ { type: 'content', value: 'First' } ] },
  25. * { key: 'two', tokens: [ { type: 'content', value: 'Second' } ] },
  26. * { key: 'few', tokens: [ { type: 'content', value: 'Third' } ] },
  27. * { key: 'other',
  28. * tokens: [ { type: 'octothorpe' }, { type: 'content', value: 'th' } ] }
  29. * ] },
  30. * { type: 'content', value: ' word.' } ]
  31. *
  32. *
  33. * parse('Many{type,select,plural{ numbers}selectordinal{ counting}' +
  34. * 'select{ choices}other{ some {type}}}.')
  35. * [ { type: 'content', value: 'Many' },
  36. * { type: 'select',
  37. * arg: 'type',
  38. * cases: [
  39. * { key: 'plural', tokens: [ { type: 'content', value: 'numbers' } ] },
  40. * { key: 'selectordinal', tokens: [ { type: 'content', value: 'counting' } ] },
  41. * { key: 'select', tokens: [ { type: 'content', value: 'choices' } ] },
  42. * { key: 'other',
  43. * tokens: [ { type: 'content', value: 'some ' }, { type: 'argument', arg: 'type' } ] }
  44. * ] },
  45. * { type: 'content', value: '.' } ]
  46. *
  47. *
  48. * parse('{Such compliance')
  49. * // ParseError: invalid syntax at line 1 col 7:
  50. * //
  51. * // {Such compliance
  52. * // ^
  53. *
  54. *
  55. * const msg = '{words, plural, zero{No words} one{One word} other{# words}}'
  56. * parse(msg)
  57. * [ { type: 'plural',
  58. * arg: 'words',
  59. * cases: [
  60. * { key: 'zero', tokens: [ { type: 'content', value: 'No words' } ] },
  61. * { key: 'one', tokens: [ { type: 'content', value: 'One word' } ] },
  62. * { key: 'other',
  63. * tokens: [ { type: 'octothorpe' }, { type: 'content', value: ' words' } ] }
  64. * ] } ]
  65. *
  66. *
  67. * parse(msg, { cardinal: [ 'one', 'other' ], ordinal: [ 'one', 'two', 'few', 'other' ] })
  68. * // ParseError: The plural case zero is not valid in this locale at line 1 col 17:
  69. * //
  70. * // {words, plural, zero{
  71. * // ^
  72. * ```
  73. */
  74. Object.defineProperty(exports, "__esModule", { value: true });
  75. exports.parse = exports.ParseError = void 0;
  76. const lexer_js_1 = require("./lexer.js");
  77. const getContext = (lt) => ({
  78. offset: lt.offset,
  79. line: lt.line,
  80. col: lt.col,
  81. text: lt.text,
  82. lineBreaks: lt.lineBreaks
  83. });
  84. const isSelectType = (type) => type === 'plural' || type === 'select' || type === 'selectordinal';
  85. function strictArgStyleParam(lt, param) {
  86. let value = '';
  87. let text = '';
  88. for (const p of param) {
  89. const pText = p.ctx.text;
  90. text += pText;
  91. switch (p.type) {
  92. case 'content':
  93. value += p.value;
  94. break;
  95. case 'argument':
  96. case 'function':
  97. case 'octothorpe':
  98. value += pText;
  99. break;
  100. default:
  101. throw new ParseError(lt, `Unsupported part in strict mode function arg style: ${pText}`);
  102. }
  103. }
  104. const c = {
  105. type: 'content',
  106. value: value.trim(),
  107. ctx: Object.assign({}, param[0].ctx, { text })
  108. };
  109. return [c];
  110. }
  111. const strictArgTypes = [
  112. 'number',
  113. 'date',
  114. 'time',
  115. 'spellout',
  116. 'ordinal',
  117. 'duration'
  118. ];
  119. const defaultPluralKeys = ['zero', 'one', 'two', 'few', 'many', 'other'];
  120. /**
  121. * Thrown by {@link parse} on error
  122. *
  123. * @public
  124. */
  125. class ParseError extends Error {
  126. /** @internal */
  127. constructor(lt, msg) {
  128. super(lexer_js_1.lexer.formatError(lt, msg));
  129. }
  130. }
  131. exports.ParseError = ParseError;
  132. class Parser {
  133. constructor(src, opt) {
  134. var _a, _b, _c, _d;
  135. this.lexer = lexer_js_1.lexer.reset(src);
  136. this.cardinalKeys = (_a = opt === null || opt === void 0 ? void 0 : opt.cardinal) !== null && _a !== void 0 ? _a : defaultPluralKeys;
  137. this.ordinalKeys = (_b = opt === null || opt === void 0 ? void 0 : opt.ordinal) !== null && _b !== void 0 ? _b : defaultPluralKeys;
  138. this.strict = (_c = opt === null || opt === void 0 ? void 0 : opt.strict) !== null && _c !== void 0 ? _c : false;
  139. this.strictPluralKeys = (_d = opt === null || opt === void 0 ? void 0 : opt.strictPluralKeys) !== null && _d !== void 0 ? _d : true;
  140. }
  141. parse() {
  142. return this.parseBody(false, true);
  143. }
  144. checkSelectKey(lt, type, key) {
  145. if (key[0] === '=') {
  146. if (type === 'select')
  147. throw new ParseError(lt, `The case ${key} is not valid with select`);
  148. }
  149. else if (type !== 'select') {
  150. const keys = type === 'plural' ? this.cardinalKeys : this.ordinalKeys;
  151. if (this.strictPluralKeys && keys.length > 0 && !keys.includes(key)) {
  152. const msg = `The ${type} case ${key} is not valid in this locale`;
  153. throw new ParseError(lt, msg);
  154. }
  155. }
  156. }
  157. parseSelect({ value: arg }, inPlural, ctx, type) {
  158. const sel = { type, arg, cases: [], ctx };
  159. if (type === 'plural' || type === 'selectordinal')
  160. inPlural = true;
  161. else if (this.strict)
  162. inPlural = false;
  163. for (const lt of this.lexer) {
  164. switch (lt.type) {
  165. case 'offset':
  166. if (type === 'select')
  167. throw new ParseError(lt, 'Unexpected plural offset for select');
  168. if (sel.cases.length > 0)
  169. throw new ParseError(lt, 'Plural offset must be set before cases');
  170. sel.pluralOffset = Number(lt.value);
  171. ctx.text += lt.text;
  172. ctx.lineBreaks += lt.lineBreaks;
  173. break;
  174. case 'case': {
  175. this.checkSelectKey(lt, type, lt.value);
  176. sel.cases.push({
  177. key: lt.value,
  178. tokens: this.parseBody(inPlural),
  179. ctx: getContext(lt)
  180. });
  181. break;
  182. }
  183. case 'end':
  184. return sel;
  185. /* istanbul ignore next: never happens */
  186. default:
  187. throw new ParseError(lt, `Unexpected lexer token: ${lt.type}`);
  188. }
  189. }
  190. throw new ParseError(null, 'Unexpected message end');
  191. }
  192. parseArgToken(lt, inPlural) {
  193. const ctx = getContext(lt);
  194. const argType = this.lexer.next();
  195. if (!argType)
  196. throw new ParseError(null, 'Unexpected message end');
  197. ctx.text += argType.text;
  198. ctx.lineBreaks += argType.lineBreaks;
  199. if (this.strict &&
  200. (argType.type === 'func-simple' || argType.type === 'func-args') &&
  201. !strictArgTypes.includes(argType.value)) {
  202. const msg = `Invalid strict mode function arg type: ${argType.value}`;
  203. throw new ParseError(lt, msg);
  204. }
  205. switch (argType.type) {
  206. case 'end':
  207. return { type: 'argument', arg: lt.value, ctx };
  208. case 'func-simple': {
  209. const end = this.lexer.next();
  210. if (!end)
  211. throw new ParseError(null, 'Unexpected message end');
  212. /* istanbul ignore if: never happens */
  213. if (end.type !== 'end')
  214. throw new ParseError(end, `Unexpected lexer token: ${end.type}`);
  215. ctx.text += end.text;
  216. if (isSelectType(argType.value.toLowerCase()))
  217. throw new ParseError(argType, `Invalid type identifier: ${argType.value}`);
  218. return {
  219. type: 'function',
  220. arg: lt.value,
  221. key: argType.value,
  222. ctx
  223. };
  224. }
  225. case 'func-args': {
  226. if (isSelectType(argType.value.toLowerCase())) {
  227. const msg = `Invalid type identifier: ${argType.value}`;
  228. throw new ParseError(argType, msg);
  229. }
  230. let param = this.parseBody(this.strict ? false : inPlural);
  231. if (this.strict && param.length > 0)
  232. param = strictArgStyleParam(lt, param);
  233. return {
  234. type: 'function',
  235. arg: lt.value,
  236. key: argType.value,
  237. param,
  238. ctx
  239. };
  240. }
  241. case 'select':
  242. /* istanbul ignore else: never happens */
  243. if (isSelectType(argType.value))
  244. return this.parseSelect(lt, inPlural, ctx, argType.value);
  245. else
  246. throw new ParseError(argType, `Unexpected select type ${argType.value}`);
  247. /* istanbul ignore next: never happens */
  248. default:
  249. throw new ParseError(argType, `Unexpected lexer token: ${argType.type}`);
  250. }
  251. }
  252. parseBody(inPlural, atRoot) {
  253. const tokens = [];
  254. let content = null;
  255. for (const lt of this.lexer) {
  256. if (lt.type === 'argument') {
  257. if (content)
  258. content = null;
  259. tokens.push(this.parseArgToken(lt, inPlural));
  260. }
  261. else if (lt.type === 'octothorpe' && inPlural) {
  262. if (content)
  263. content = null;
  264. tokens.push({ type: 'octothorpe', ctx: getContext(lt) });
  265. }
  266. else if (lt.type === 'end' && !atRoot) {
  267. return tokens;
  268. }
  269. else {
  270. let value = lt.value;
  271. if (!inPlural && lt.type === 'quoted' && value[0] === '#') {
  272. if (value.includes('{')) {
  273. const errMsg = `Unsupported escape pattern: ${value}`;
  274. throw new ParseError(lt, errMsg);
  275. }
  276. value = lt.text;
  277. }
  278. if (content) {
  279. content.value += value;
  280. content.ctx.text += lt.text;
  281. content.ctx.lineBreaks += lt.lineBreaks;
  282. }
  283. else {
  284. content = { type: 'content', value, ctx: getContext(lt) };
  285. tokens.push(content);
  286. }
  287. }
  288. }
  289. if (atRoot)
  290. return tokens;
  291. throw new ParseError(null, 'Unexpected message end');
  292. }
  293. }
  294. /**
  295. * Parse an input string into an array of tokens
  296. *
  297. * @public
  298. * @remarks
  299. * The parser only supports the default `DOUBLE_OPTIONAL`
  300. * {@link http://www.icu-project.org/apiref/icu4c/messagepattern_8h.html#af6e0757e0eb81c980b01ee5d68a9978b | apostrophe mode}.
  301. */
  302. function parse(src, options = {}) {
  303. const parser = new Parser(src, options);
  304. return parser.parse();
  305. }
  306. exports.parse = parse;