parser.d.ts 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. /**
  2. * An AST parser for ICU MessageFormat strings
  3. *
  4. * @packageDocumentation
  5. * @example
  6. * ```
  7. * import { parse } from '@messageformat/parser
  8. *
  9. * parse('So {wow}.')
  10. * [ { type: 'content', value: 'So ' },
  11. * { type: 'argument', arg: 'wow' },
  12. * { type: 'content', value: '.' } ]
  13. *
  14. *
  15. * parse('Such { thing }. { count, selectordinal, one {First} two {Second}' +
  16. * ' few {Third} other {#th} } word.')
  17. * [ { type: 'content', value: 'Such ' },
  18. * { type: 'argument', arg: 'thing' },
  19. * { type: 'content', value: '. ' },
  20. * { type: 'selectordinal',
  21. * arg: 'count',
  22. * cases: [
  23. * { key: 'one', tokens: [ { type: 'content', value: 'First' } ] },
  24. * { key: 'two', tokens: [ { type: 'content', value: 'Second' } ] },
  25. * { key: 'few', tokens: [ { type: 'content', value: 'Third' } ] },
  26. * { key: 'other',
  27. * tokens: [ { type: 'octothorpe' }, { type: 'content', value: 'th' } ] }
  28. * ] },
  29. * { type: 'content', value: ' word.' } ]
  30. *
  31. *
  32. * parse('Many{type,select,plural{ numbers}selectordinal{ counting}' +
  33. * 'select{ choices}other{ some {type}}}.')
  34. * [ { type: 'content', value: 'Many' },
  35. * { type: 'select',
  36. * arg: 'type',
  37. * cases: [
  38. * { key: 'plural', tokens: [ { type: 'content', value: 'numbers' } ] },
  39. * { key: 'selectordinal', tokens: [ { type: 'content', value: 'counting' } ] },
  40. * { key: 'select', tokens: [ { type: 'content', value: 'choices' } ] },
  41. * { key: 'other',
  42. * tokens: [ { type: 'content', value: 'some ' }, { type: 'argument', arg: 'type' } ] }
  43. * ] },
  44. * { type: 'content', value: '.' } ]
  45. *
  46. *
  47. * parse('{Such compliance')
  48. * // ParseError: invalid syntax at line 1 col 7:
  49. * //
  50. * // {Such compliance
  51. * // ^
  52. *
  53. *
  54. * const msg = '{words, plural, zero{No words} one{One word} other{# words}}'
  55. * parse(msg)
  56. * [ { type: 'plural',
  57. * arg: 'words',
  58. * cases: [
  59. * { key: 'zero', tokens: [ { type: 'content', value: 'No words' } ] },
  60. * { key: 'one', tokens: [ { type: 'content', value: 'One word' } ] },
  61. * { key: 'other',
  62. * tokens: [ { type: 'octothorpe' }, { type: 'content', value: ' words' } ] }
  63. * ] } ]
  64. *
  65. *
  66. * parse(msg, { cardinal: [ 'one', 'other' ], ordinal: [ 'one', 'two', 'few', 'other' ] })
  67. * // ParseError: The plural case zero is not valid in this locale at line 1 col 17:
  68. * //
  69. * // {words, plural, zero{
  70. * // ^
  71. * ```
  72. */
  73. import { Token as LexerToken } from 'moo';
  74. /** @internal */
  75. export type Token = Content | PlainArg | FunctionArg | Select | Octothorpe;
  76. /**
  77. * Text content of the message
  78. *
  79. * @public
  80. */
  81. export interface Content {
  82. type: 'content';
  83. value: string;
  84. ctx: Context;
  85. }
  86. /**
  87. * A simple placeholder
  88. *
  89. * @public
  90. * @remarks
  91. * `arg` identifies an input variable, the value of which is used directly in the output.
  92. */
  93. export interface PlainArg {
  94. type: 'argument';
  95. arg: string;
  96. ctx: Context;
  97. }
  98. /**
  99. * A placeholder for a mapped argument
  100. *
  101. * @public
  102. * @remarks
  103. * `arg` identifies an input variable, the value of which is passed to the function identified by `key`, with `param` as an optional argument.
  104. * The output of the function is used in the output.
  105. *
  106. * In strict mode, `param` (if defined) may only be an array containing one {@link Content} token.
  107. */
  108. export interface FunctionArg {
  109. type: 'function';
  110. arg: string;
  111. key: string;
  112. param?: Array<Content | PlainArg | FunctionArg | Select | Octothorpe>;
  113. ctx: Context;
  114. }
  115. /**
  116. * A selector between multiple variants
  117. *
  118. * @public
  119. * @remarks
  120. * The value of the `arg` input variable determines which of the `cases` is used as the output value of this placeholder.
  121. *
  122. * For `plural` and `selectordinal`, the value of `arg` is expected to be numeric, and will be matched either to an exact case with a key like `=3`,
  123. * or to a case with a key that has a matching plural category as the input number.
  124. */
  125. export interface Select {
  126. type: 'plural' | 'select' | 'selectordinal';
  127. arg: string;
  128. cases: SelectCase[];
  129. pluralOffset?: number;
  130. ctx: Context;
  131. }
  132. /**
  133. * A case within a {@link Select}
  134. *
  135. * @public
  136. */
  137. export interface SelectCase {
  138. key: string;
  139. tokens: Array<Content | PlainArg | FunctionArg | Select | Octothorpe>;
  140. ctx: Context;
  141. }
  142. /**
  143. * Represents the `#` character
  144. *
  145. * @public
  146. * @remarks
  147. * Within a `plural` or `selectordinal` {@link Select}, the `#` character should be replaced with a formatted representation of the Select's input value.
  148. */
  149. export interface Octothorpe {
  150. type: 'octothorpe';
  151. ctx: Context;
  152. }
  153. /**
  154. * The parsing context for a token
  155. *
  156. * @public
  157. */
  158. export interface Context {
  159. /** Token start index from the beginning of the input string */
  160. offset: number;
  161. /** Token start line number, starting from 1 */
  162. line: number;
  163. /** Token start column, starting from 1 */
  164. col: number;
  165. /** The raw input source for the token */
  166. text: string;
  167. /** The number of line breaks consumed while parsing the token */
  168. lineBreaks: number;
  169. }
  170. /**
  171. * Thrown by {@link parse} on error
  172. *
  173. * @public
  174. */
  175. export declare class ParseError extends Error {
  176. /** @internal */
  177. constructor(lt: LexerToken | null, msg: string);
  178. }
  179. /**
  180. * One of the valid {@link http://cldr.unicode.org/index/cldr-spec/plural-rules | Unicode CLDR} plural category keys
  181. *
  182. * @public
  183. */
  184. export type PluralCategory = 'zero' | 'one' | 'two' | 'few' | 'many' | 'other';
  185. /**
  186. * Options for the parser
  187. *
  188. * @public
  189. */
  190. export interface ParseOptions {
  191. /**
  192. * Array of valid plural categories for the current locale, used to validate `plural` keys.
  193. *
  194. * If undefined, the full set of valid {@link PluralCategory} keys is used.
  195. * To disable this check, pass in an empty array.
  196. */
  197. cardinal?: PluralCategory[];
  198. /**
  199. * Array of valid plural categories for the current locale, used to validate `selectordinal` keys.
  200. *
  201. * If undefined, the full set of valid {@link PluralCategory} keys is used.
  202. * To disable this check, pass in an empty array.
  203. */
  204. ordinal?: PluralCategory[];
  205. /**
  206. * By default, the parsing applies a few relaxations to the ICU MessageFormat spec.
  207. * Setting `strict: true` will disable these relaxations.
  208. *
  209. * @remarks
  210. * - The `argType` of `simpleArg` formatting functions will be restricted to the set of
  211. * `number`, `date`, `time`, `spellout`, `ordinal`, and `duration`,
  212. * rather than accepting any lower-case identifier that does not start with a number.
  213. *
  214. * - The optional `argStyle` of `simpleArg` formatting functions will not be parsed as any other text, but instead as the spec requires:
  215. * "In argStyleText, every single ASCII apostrophe begins and ends quoted literal text, and unquoted \{curly braces\} must occur in matched pairs."
  216. *
  217. * - Inside a `plural` or `selectordinal` statement, a pound symbol (`#`) is replaced with the input number.
  218. * By default, `#` is also parsed as a special character in nested statements too, and can be escaped using apostrophes (`'#'`).
  219. * In strict mode `#` will be parsed as a special character only directly inside a `plural` or `selectordinal` statement.
  220. * Outside those, `#` and `'#'` will be parsed as literal text.
  221. */
  222. strict?: boolean;
  223. /**
  224. * By default, the parser will reject any plural keys that are not valid
  225. * {@link http://cldr.unicode.org/index/cldr-spec/plural-rules | Unicode CLDR}
  226. * plural category keys.
  227. * Setting `strictPluralKeys: false` will disable this check.
  228. */
  229. strictPluralKeys?: boolean;
  230. }
  231. /**
  232. * Parse an input string into an array of tokens
  233. *
  234. * @public
  235. * @remarks
  236. * The parser only supports the default `DOUBLE_OPTIONAL`
  237. * {@link http://www.icu-project.org/apiref/icu4c/messagepattern_8h.html#af6e0757e0eb81c980b01ee5d68a9978b | apostrophe mode}.
  238. */
  239. export declare function parse(src: string, options?: ParseOptions): Array<Content | PlainArg | FunctionArg | Select>;