typo.js 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975
  1. /* globals chrome: false */
  2. /* globals __dirname: false */
  3. /* globals require: false */
  4. /* globals Buffer: false */
  5. /* globals module: false */
  6. /**
  7. * Typo is a JavaScript implementation of a spellchecker using hunspell-style
  8. * dictionaries.
  9. */
  10. var Typo;
  11. (function () {
  12. "use strict";
  13. /**
  14. * Typo constructor.
  15. *
  16. * @param {String} [dictionary] The locale code of the dictionary being used. e.g.,
  17. * "en_US". This is only used to auto-load dictionaries.
  18. * @param {String} [affData] The data from the dictionary's .aff file. If omitted
  19. * and Typo.js is being used in a Chrome extension, the .aff
  20. * file will be loaded automatically from
  21. * lib/typo/dictionaries/[dictionary]/[dictionary].aff
  22. * In other environments, it will be loaded from
  23. * [settings.dictionaryPath]/dictionaries/[dictionary]/[dictionary].aff
  24. * @param {String} [wordsData] The data from the dictionary's .dic file. If omitted
  25. * and Typo.js is being used in a Chrome extension, the .dic
  26. * file will be loaded automatically from
  27. * lib/typo/dictionaries/[dictionary]/[dictionary].dic
  28. * In other environments, it will be loaded from
  29. * [settings.dictionaryPath]/dictionaries/[dictionary]/[dictionary].dic
  30. * @param {Object} [settings] Constructor settings. Available properties are:
  31. * {String} [dictionaryPath]: path to load dictionary from in non-chrome
  32. * environment.
  33. * {Object} [flags]: flag information.
  34. * {Boolean} [asyncLoad]: If true, affData and wordsData will be loaded
  35. * asynchronously.
  36. * {Function} [loadedCallback]: Called when both affData and wordsData
  37. * have been loaded. Only used if asyncLoad is set to true. The parameter
  38. * is the instantiated Typo object.
  39. *
  40. * @returns {Typo} A Typo object.
  41. */
  42. Typo = function (dictionary, affData, wordsData, settings) {
  43. settings = settings || {};
  44. this.dictionary = null;
  45. this.rules = {};
  46. this.dictionaryTable = {};
  47. this.compoundRules = [];
  48. this.compoundRuleCodes = {};
  49. this.replacementTable = [];
  50. this.flags = settings.flags || {};
  51. this.memoized = {};
  52. this.loaded = false;
  53. var self = this;
  54. var path;
  55. // Loop-control variables.
  56. var i, j, _len, _jlen;
  57. if (dictionary) {
  58. self.dictionary = dictionary;
  59. // If the data is preloaded, just setup the Typo object.
  60. if (affData && wordsData) {
  61. setup();
  62. }
  63. // Loading data for Chrome extentions.
  64. else if (typeof window !== 'undefined' && 'chrome' in window && 'extension' in window.chrome && 'getURL' in window.chrome.extension) {
  65. if (settings.dictionaryPath) {
  66. path = settings.dictionaryPath;
  67. }
  68. else {
  69. path = "typo/dictionaries";
  70. }
  71. if (!affData) readDataFile(chrome.extension.getURL(path + "/" + dictionary + "/" + dictionary + ".aff"), setAffData);
  72. if (!wordsData) readDataFile(chrome.extension.getURL(path + "/" + dictionary + "/" + dictionary + ".dic"), setWordsData);
  73. }
  74. else {
  75. if (settings.dictionaryPath) {
  76. path = settings.dictionaryPath;
  77. }
  78. else if (typeof __dirname !== 'undefined') {
  79. path = __dirname + '/dictionaries';
  80. }
  81. else {
  82. path = './dictionaries';
  83. }
  84. if (!affData) readDataFile(path + "/" + dictionary + "/" + dictionary + ".aff", setAffData);
  85. if (!wordsData) readDataFile(path + "/" + dictionary + "/" + dictionary + ".dic", setWordsData);
  86. }
  87. }
  88. function readDataFile(url, setFunc) {
  89. var response = self._readFile(url, null, settings.asyncLoad);
  90. if (settings.asyncLoad) {
  91. response.then(function(data) {
  92. setFunc(data);
  93. });
  94. }
  95. else {
  96. setFunc(response);
  97. }
  98. }
  99. function setAffData(data) {
  100. affData = data;
  101. if (wordsData) {
  102. setup();
  103. }
  104. }
  105. function setWordsData(data) {
  106. wordsData = data;
  107. if (affData) {
  108. setup();
  109. }
  110. }
  111. function setup() {
  112. self.rules = self._parseAFF(affData);
  113. // Save the rule codes that are used in compound rules.
  114. self.compoundRuleCodes = {};
  115. for (i = 0, _len = self.compoundRules.length; i < _len; i++) {
  116. var rule = self.compoundRules[i];
  117. for (j = 0, _jlen = rule.length; j < _jlen; j++) {
  118. self.compoundRuleCodes[rule[j]] = [];
  119. }
  120. }
  121. // If we add this ONLYINCOMPOUND flag to self.compoundRuleCodes, then _parseDIC
  122. // will do the work of saving the list of words that are compound-only.
  123. if ("ONLYINCOMPOUND" in self.flags) {
  124. self.compoundRuleCodes[self.flags.ONLYINCOMPOUND] = [];
  125. }
  126. self.dictionaryTable = self._parseDIC(wordsData);
  127. // Get rid of any codes from the compound rule codes that are never used
  128. // (or that were special regex characters). Not especially necessary...
  129. for (i in self.compoundRuleCodes) {
  130. if (self.compoundRuleCodes[i].length === 0) {
  131. delete self.compoundRuleCodes[i];
  132. }
  133. }
  134. // Build the full regular expressions for each compound rule.
  135. // I have a feeling (but no confirmation yet) that this method of
  136. // testing for compound words is probably slow.
  137. for (i = 0, _len = self.compoundRules.length; i < _len; i++) {
  138. var ruleText = self.compoundRules[i];
  139. var expressionText = "";
  140. for (j = 0, _jlen = ruleText.length; j < _jlen; j++) {
  141. var character = ruleText[j];
  142. if (character in self.compoundRuleCodes) {
  143. expressionText += "(" + self.compoundRuleCodes[character].join("|") + ")";
  144. }
  145. else {
  146. expressionText += character;
  147. }
  148. }
  149. self.compoundRules[i] = new RegExp(expressionText, "i");
  150. }
  151. self.loaded = true;
  152. if (settings.asyncLoad && settings.loadedCallback) {
  153. settings.loadedCallback(self);
  154. }
  155. }
  156. return this;
  157. };
  158. Typo.prototype = {
  159. /**
  160. * Loads a Typo instance from a hash of all of the Typo properties.
  161. *
  162. * @param object obj A hash of Typo properties, probably gotten from a JSON.parse(JSON.stringify(typo_instance)).
  163. */
  164. load : function (obj) {
  165. for (var i in obj) {
  166. if (obj.hasOwnProperty(i)) {
  167. this[i] = obj[i];
  168. }
  169. }
  170. return this;
  171. },
  172. /**
  173. * Read the contents of a file.
  174. *
  175. * @param {String} path The path (relative) to the file.
  176. * @param {String} [charset="ISO8859-1"] The expected charset of the file
  177. * @param {Boolean} async If true, the file will be read asynchronously. For node.js this does nothing, all
  178. * files are read synchronously.
  179. * @returns {String} The file data if async is false, otherwise a promise object. If running node.js, the data is
  180. * always returned.
  181. */
  182. _readFile : function (path, charset, async) {
  183. charset = charset || "utf8";
  184. if (typeof XMLHttpRequest !== 'undefined') {
  185. var promise;
  186. var req = new XMLHttpRequest();
  187. req.open("GET", path, async);
  188. if (async) {
  189. promise = new Promise(function(resolve, reject) {
  190. req.onload = function() {
  191. if (req.status === 200) {
  192. resolve(req.responseText);
  193. }
  194. else {
  195. reject(req.statusText);
  196. }
  197. };
  198. req.onerror = function() {
  199. reject(req.statusText);
  200. }
  201. });
  202. }
  203. if (req.overrideMimeType)
  204. req.overrideMimeType("text/plain; charset=" + charset);
  205. req.send(null);
  206. return async ? promise : req.responseText;
  207. }
  208. else if (typeof require !== 'undefined') {
  209. // Node.js
  210. var fs = require("fs");
  211. try {
  212. if (fs.existsSync(path)) {
  213. return fs.readFileSync(path, charset);
  214. }
  215. else {
  216. console.log("Path " + path + " does not exist.");
  217. }
  218. } catch (e) {
  219. console.log(e);
  220. return '';
  221. }
  222. }
  223. },
  224. /**
  225. * Parse the rules out from a .aff file.
  226. *
  227. * @param {String} data The contents of the affix file.
  228. * @returns object The rules from the file.
  229. */
  230. _parseAFF : function (data) {
  231. var rules = {};
  232. var line, subline, numEntries, lineParts;
  233. var i, j, _len, _jlen;
  234. // Remove comment lines
  235. data = this._removeAffixComments(data);
  236. var lines = data.split(/\r?\n/);
  237. for (i = 0, _len = lines.length; i < _len; i++) {
  238. line = lines[i];
  239. var definitionParts = line.split(/\s+/);
  240. var ruleType = definitionParts[0];
  241. if (ruleType == "PFX" || ruleType == "SFX") {
  242. var ruleCode = definitionParts[1];
  243. var combineable = definitionParts[2];
  244. numEntries = parseInt(definitionParts[3], 10);
  245. var entries = [];
  246. for (j = i + 1, _jlen = i + 1 + numEntries; j < _jlen; j++) {
  247. subline = lines[j];
  248. lineParts = subline.split(/\s+/);
  249. var charactersToRemove = lineParts[2];
  250. var additionParts = lineParts[3].split("/");
  251. var charactersToAdd = additionParts[0];
  252. if (charactersToAdd === "0") charactersToAdd = "";
  253. var continuationClasses = this.parseRuleCodes(additionParts[1]);
  254. var regexToMatch = lineParts[4];
  255. var entry = {};
  256. entry.add = charactersToAdd;
  257. if (continuationClasses.length > 0) entry.continuationClasses = continuationClasses;
  258. if (regexToMatch !== ".") {
  259. if (ruleType === "SFX") {
  260. entry.match = new RegExp(regexToMatch + "$");
  261. }
  262. else {
  263. entry.match = new RegExp("^" + regexToMatch);
  264. }
  265. }
  266. if (charactersToRemove != "0") {
  267. if (ruleType === "SFX") {
  268. entry.remove = new RegExp(charactersToRemove + "$");
  269. }
  270. else {
  271. entry.remove = charactersToRemove;
  272. }
  273. }
  274. entries.push(entry);
  275. }
  276. rules[ruleCode] = { "type" : ruleType, "combineable" : (combineable == "Y"), "entries" : entries };
  277. i += numEntries;
  278. }
  279. else if (ruleType === "COMPOUNDRULE") {
  280. numEntries = parseInt(definitionParts[1], 10);
  281. for (j = i + 1, _jlen = i + 1 + numEntries; j < _jlen; j++) {
  282. line = lines[j];
  283. lineParts = line.split(/\s+/);
  284. this.compoundRules.push(lineParts[1]);
  285. }
  286. i += numEntries;
  287. }
  288. else if (ruleType === "REP") {
  289. lineParts = line.split(/\s+/);
  290. if (lineParts.length === 3) {
  291. this.replacementTable.push([ lineParts[1], lineParts[2] ]);
  292. }
  293. }
  294. else {
  295. // ONLYINCOMPOUND
  296. // COMPOUNDMIN
  297. // FLAG
  298. // KEEPCASE
  299. // NEEDAFFIX
  300. this.flags[ruleType] = definitionParts[1];
  301. }
  302. }
  303. return rules;
  304. },
  305. /**
  306. * Removes comment lines and then cleans up blank lines and trailing whitespace.
  307. *
  308. * @param {String} data The data from an affix file.
  309. * @return {String} The cleaned-up data.
  310. */
  311. _removeAffixComments : function (data) {
  312. // Remove comments
  313. // This used to remove any string starting with '#' up to the end of the line,
  314. // but some COMPOUNDRULE definitions include '#' as part of the rule.
  315. // I haven't seen any affix files that use comments on the same line as real data,
  316. // so I don't think this will break anything.
  317. data = data.replace(/^\s*#.*$/mg, "");
  318. // Trim each line
  319. data = data.replace(/^\s\s*/m, '').replace(/\s\s*$/m, '');
  320. // Remove blank lines.
  321. data = data.replace(/\n{2,}/g, "\n");
  322. // Trim the entire string
  323. data = data.replace(/^\s\s*/, '').replace(/\s\s*$/, '');
  324. return data;
  325. },
  326. /**
  327. * Parses the words out from the .dic file.
  328. *
  329. * @param {String} data The data from the dictionary file.
  330. * @returns object The lookup table containing all of the words and
  331. * word forms from the dictionary.
  332. */
  333. _parseDIC : function (data) {
  334. data = this._removeDicComments(data);
  335. var lines = data.split(/\r?\n/);
  336. var dictionaryTable = {};
  337. function addWord(word, rules) {
  338. // Some dictionaries will list the same word multiple times with different rule sets.
  339. if (!dictionaryTable.hasOwnProperty(word)) {
  340. dictionaryTable[word] = null;
  341. }
  342. if (rules.length > 0) {
  343. if (dictionaryTable[word] === null) {
  344. dictionaryTable[word] = [];
  345. }
  346. dictionaryTable[word].push(rules);
  347. }
  348. }
  349. // The first line is the number of words in the dictionary.
  350. for (var i = 1, _len = lines.length; i < _len; i++) {
  351. var line = lines[i];
  352. if (!line) {
  353. // Ignore empty lines.
  354. continue;
  355. }
  356. var parts = line.split("/", 2);
  357. var word = parts[0];
  358. // Now for each affix rule, generate that form of the word.
  359. if (parts.length > 1) {
  360. var ruleCodesArray = this.parseRuleCodes(parts[1]);
  361. // Save the ruleCodes for compound word situations.
  362. if (!("NEEDAFFIX" in this.flags) || ruleCodesArray.indexOf(this.flags.NEEDAFFIX) == -1) {
  363. addWord(word, ruleCodesArray);
  364. }
  365. for (var j = 0, _jlen = ruleCodesArray.length; j < _jlen; j++) {
  366. var code = ruleCodesArray[j];
  367. var rule = this.rules[code];
  368. if (rule) {
  369. var newWords = this._applyRule(word, rule);
  370. for (var ii = 0, _iilen = newWords.length; ii < _iilen; ii++) {
  371. var newWord = newWords[ii];
  372. addWord(newWord, []);
  373. if (rule.combineable) {
  374. for (var k = j + 1; k < _jlen; k++) {
  375. var combineCode = ruleCodesArray[k];
  376. var combineRule = this.rules[combineCode];
  377. if (combineRule) {
  378. if (combineRule.combineable && (rule.type != combineRule.type)) {
  379. var otherNewWords = this._applyRule(newWord, combineRule);
  380. for (var iii = 0, _iiilen = otherNewWords.length; iii < _iiilen; iii++) {
  381. var otherNewWord = otherNewWords[iii];
  382. addWord(otherNewWord, []);
  383. }
  384. }
  385. }
  386. }
  387. }
  388. }
  389. }
  390. if (code in this.compoundRuleCodes) {
  391. this.compoundRuleCodes[code].push(word);
  392. }
  393. }
  394. }
  395. else {
  396. addWord(word.trim(), []);
  397. }
  398. }
  399. return dictionaryTable;
  400. },
  401. /**
  402. * Removes comment lines and then cleans up blank lines and trailing whitespace.
  403. *
  404. * @param {String} data The data from a .dic file.
  405. * @return {String} The cleaned-up data.
  406. */
  407. _removeDicComments : function (data) {
  408. // I can't find any official documentation on it, but at least the de_DE
  409. // dictionary uses tab-indented lines as comments.
  410. // Remove comments
  411. data = data.replace(/^\t.*$/mg, "");
  412. return data;
  413. },
  414. parseRuleCodes : function (textCodes) {
  415. if (!textCodes) {
  416. return [];
  417. }
  418. else if (!("FLAG" in this.flags)) {
  419. return textCodes.split("");
  420. }
  421. else if (this.flags.FLAG === "long") {
  422. var flags = [];
  423. for (var i = 0, _len = textCodes.length; i < _len; i += 2) {
  424. flags.push(textCodes.substr(i, 2));
  425. }
  426. return flags;
  427. }
  428. else if (this.flags.FLAG === "num") {
  429. return textCodes.split(",");
  430. }
  431. },
  432. /**
  433. * Applies an affix rule to a word.
  434. *
  435. * @param {String} word The base word.
  436. * @param {Object} rule The affix rule.
  437. * @returns {String[]} The new words generated by the rule.
  438. */
  439. _applyRule : function (word, rule) {
  440. var entries = rule.entries;
  441. var newWords = [];
  442. for (var i = 0, _len = entries.length; i < _len; i++) {
  443. var entry = entries[i];
  444. if (!entry.match || word.match(entry.match)) {
  445. var newWord = word;
  446. if (entry.remove) {
  447. newWord = newWord.replace(entry.remove, "");
  448. }
  449. if (rule.type === "SFX") {
  450. newWord = newWord + entry.add;
  451. }
  452. else {
  453. newWord = entry.add + newWord;
  454. }
  455. newWords.push(newWord);
  456. if ("continuationClasses" in entry) {
  457. for (var j = 0, _jlen = entry.continuationClasses.length; j < _jlen; j++) {
  458. var continuationRule = this.rules[entry.continuationClasses[j]];
  459. if (continuationRule) {
  460. newWords = newWords.concat(this._applyRule(newWord, continuationRule));
  461. }
  462. /*
  463. else {
  464. // This shouldn't happen, but it does, at least in the de_DE dictionary.
  465. // I think the author mistakenly supplied lower-case rule codes instead
  466. // of upper-case.
  467. }
  468. */
  469. }
  470. }
  471. }
  472. }
  473. return newWords;
  474. },
  475. /**
  476. * Checks whether a word or a capitalization variant exists in the current dictionary.
  477. * The word is trimmed and several variations of capitalizations are checked.
  478. * If you want to check a word without any changes made to it, call checkExact()
  479. *
  480. * @see http://blog.stevenlevithan.com/archives/faster-trim-javascript re:trimming function
  481. *
  482. * @param {String} aWord The word to check.
  483. * @returns {Boolean}
  484. */
  485. check : function (aWord) {
  486. if (!this.loaded) {
  487. throw "Dictionary not loaded.";
  488. }
  489. // Remove leading and trailing whitespace
  490. var trimmedWord = aWord.replace(/^\s\s*/, '').replace(/\s\s*$/, '');
  491. if (this.checkExact(trimmedWord)) {
  492. return true;
  493. }
  494. // The exact word is not in the dictionary.
  495. if (trimmedWord.toUpperCase() === trimmedWord) {
  496. // The word was supplied in all uppercase.
  497. // Check for a capitalized form of the word.
  498. var capitalizedWord = trimmedWord[0] + trimmedWord.substring(1).toLowerCase();
  499. if (this.hasFlag(capitalizedWord, "KEEPCASE")) {
  500. // Capitalization variants are not allowed for this word.
  501. return false;
  502. }
  503. if (this.checkExact(capitalizedWord)) {
  504. return true;
  505. }
  506. }
  507. var lowercaseWord = trimmedWord.toLowerCase();
  508. if (lowercaseWord !== trimmedWord) {
  509. if (this.hasFlag(lowercaseWord, "KEEPCASE")) {
  510. // Capitalization variants are not allowed for this word.
  511. return false;
  512. }
  513. // Check for a lowercase form
  514. if (this.checkExact(lowercaseWord)) {
  515. return true;
  516. }
  517. }
  518. return false;
  519. },
  520. /**
  521. * Checks whether a word exists in the current dictionary.
  522. *
  523. * @param {String} word The word to check.
  524. * @returns {Boolean}
  525. */
  526. checkExact : function (word) {
  527. if (!this.loaded) {
  528. throw "Dictionary not loaded.";
  529. }
  530. var ruleCodes = this.dictionaryTable[word];
  531. var i, _len;
  532. if (typeof ruleCodes === 'undefined') {
  533. // Check if this might be a compound word.
  534. if ("COMPOUNDMIN" in this.flags && word.length >= this.flags.COMPOUNDMIN) {
  535. for (i = 0, _len = this.compoundRules.length; i < _len; i++) {
  536. if (word.match(this.compoundRules[i])) {
  537. return true;
  538. }
  539. }
  540. }
  541. }
  542. else if (ruleCodes === null) {
  543. // a null (but not undefined) value for an entry in the dictionary table
  544. // means that the word is in the dictionary but has no flags.
  545. return true;
  546. }
  547. else if (typeof ruleCodes === 'object') { // this.dictionary['hasOwnProperty'] will be a function.
  548. for (i = 0, _len = ruleCodes.length; i < _len; i++) {
  549. if (!this.hasFlag(word, "ONLYINCOMPOUND", ruleCodes[i])) {
  550. return true;
  551. }
  552. }
  553. }
  554. return false;
  555. },
  556. /**
  557. * Looks up whether a given word is flagged with a given flag.
  558. *
  559. * @param {String} word The word in question.
  560. * @param {String} flag The flag in question.
  561. * @return {Boolean}
  562. */
  563. hasFlag : function (word, flag, wordFlags) {
  564. if (!this.loaded) {
  565. throw "Dictionary not loaded.";
  566. }
  567. if (flag in this.flags) {
  568. if (typeof wordFlags === 'undefined') {
  569. wordFlags = Array.prototype.concat.apply([], this.dictionaryTable[word]);
  570. }
  571. if (wordFlags && wordFlags.indexOf(this.flags[flag]) !== -1) {
  572. return true;
  573. }
  574. }
  575. return false;
  576. },
  577. /**
  578. * Returns a list of suggestions for a misspelled word.
  579. *
  580. * @see http://www.norvig.com/spell-correct.html for the basis of this suggestor.
  581. * This suggestor is primitive, but it works.
  582. *
  583. * @param {String} word The misspelling.
  584. * @param {Number} [limit=5] The maximum number of suggestions to return.
  585. * @returns {String[]} The array of suggestions.
  586. */
  587. alphabet : "",
  588. suggest : function (word, limit) {
  589. if (!this.loaded) {
  590. throw "Dictionary not loaded.";
  591. }
  592. limit = limit || 5;
  593. if (this.memoized.hasOwnProperty(word)) {
  594. var memoizedLimit = this.memoized[word]['limit'];
  595. // Only return the cached list if it's big enough or if there weren't enough suggestions
  596. // to fill a smaller limit.
  597. if (limit <= memoizedLimit || this.memoized[word]['suggestions'].length < memoizedLimit) {
  598. return this.memoized[word]['suggestions'].slice(0, limit);
  599. }
  600. }
  601. if (this.check(word)) return [];
  602. // Check the replacement table.
  603. for (var i = 0, _len = this.replacementTable.length; i < _len; i++) {
  604. var replacementEntry = this.replacementTable[i];
  605. if (word.indexOf(replacementEntry[0]) !== -1) {
  606. var correctedWord = word.replace(replacementEntry[0], replacementEntry[1]);
  607. if (this.check(correctedWord)) {
  608. return [ correctedWord ];
  609. }
  610. }
  611. }
  612. var self = this;
  613. self.alphabet = "abcdefghijklmnopqrstuvwxyz";
  614. /*
  615. if (!self.alphabet) {
  616. // Use the alphabet as implicitly defined by the words in the dictionary.
  617. var alphaHash = {};
  618. for (var i in self.dictionaryTable) {
  619. for (var j = 0, _len = i.length; j < _len; j++) {
  620. alphaHash[i[j]] = true;
  621. }
  622. }
  623. for (var i in alphaHash) {
  624. self.alphabet += i;
  625. }
  626. var alphaArray = self.alphabet.split("");
  627. alphaArray.sort();
  628. self.alphabet = alphaArray.join("");
  629. }
  630. */
  631. /**
  632. * Returns a hash keyed by all of the strings that can be made by making a single edit to the word (or words in) `words`
  633. * The value of each entry is the number of unique ways that the resulting word can be made.
  634. *
  635. * @arg mixed words Either a hash keyed by words or a string word to operate on.
  636. * @arg bool known_only Whether this function should ignore strings that are not in the dictionary.
  637. */
  638. function edits1(words, known_only) {
  639. var rv = {};
  640. var i, j, _iilen, _len, _jlen, _edit;
  641. if (typeof words == 'string') {
  642. var word = words;
  643. words = {};
  644. words[word] = true;
  645. }
  646. for (var word in words) {
  647. for (i = 0, _len = word.length + 1; i < _len; i++) {
  648. var s = [ word.substring(0, i), word.substring(i) ];
  649. if (s[1]) {
  650. _edit = s[0] + s[1].substring(1);
  651. if (!known_only || self.check(_edit)) {
  652. if (!(_edit in rv)) {
  653. rv[_edit] = 1;
  654. }
  655. else {
  656. rv[_edit] += 1;
  657. }
  658. }
  659. }
  660. // Eliminate transpositions of identical letters
  661. if (s[1].length > 1 && s[1][1] !== s[1][0]) {
  662. _edit = s[0] + s[1][1] + s[1][0] + s[1].substring(2);
  663. if (!known_only || self.check(_edit)) {
  664. if (!(_edit in rv)) {
  665. rv[_edit] = 1;
  666. }
  667. else {
  668. rv[_edit] += 1;
  669. }
  670. }
  671. }
  672. if (s[1]) {
  673. for (j = 0, _jlen = self.alphabet.length; j < _jlen; j++) {
  674. // Eliminate replacement of a letter by itself
  675. if (self.alphabet[j] != s[1].substring(0,1)){
  676. _edit = s[0] + self.alphabet[j] + s[1].substring(1);
  677. if (!known_only || self.check(_edit)) {
  678. if (!(_edit in rv)) {
  679. rv[_edit] = 1;
  680. }
  681. else {
  682. rv[_edit] += 1;
  683. }
  684. }
  685. }
  686. }
  687. }
  688. if (s[1]) {
  689. for (j = 0, _jlen = self.alphabet.length; j < _jlen; j++) {
  690. _edit = s[0] + self.alphabet[j] + s[1];
  691. if (!known_only || self.check(_edit)) {
  692. if (!(_edit in rv)) {
  693. rv[_edit] = 1;
  694. }
  695. else {
  696. rv[_edit] += 1;
  697. }
  698. }
  699. }
  700. }
  701. }
  702. }
  703. return rv;
  704. }
  705. function correct(word) {
  706. // Get the edit-distance-1 and edit-distance-2 forms of this word.
  707. var ed1 = edits1(word);
  708. var ed2 = edits1(ed1, true);
  709. // Sort the edits based on how many different ways they were created.
  710. var weighted_corrections = ed2;
  711. for (var ed1word in ed1) {
  712. if (!self.check(ed1word)) {
  713. continue;
  714. }
  715. if (ed1word in weighted_corrections) {
  716. weighted_corrections[ed1word] += ed1[ed1word];
  717. }
  718. else {
  719. weighted_corrections[ed1word] = ed1[ed1word];
  720. }
  721. }
  722. var i, _len;
  723. var sorted_corrections = [];
  724. for (i in weighted_corrections) {
  725. if (weighted_corrections.hasOwnProperty(i)) {
  726. sorted_corrections.push([ i, weighted_corrections[i] ]);
  727. }
  728. }
  729. function sorter(a, b) {
  730. var a_val = a[1];
  731. var b_val = b[1];
  732. if (a_val < b_val) {
  733. return -1;
  734. } else if (a_val > b_val) {
  735. return 1;
  736. }
  737. // @todo If a and b are equally weighted, add our own weight based on something like the key locations on this language's default keyboard.
  738. return b[0].localeCompare(a[0]);
  739. }
  740. sorted_corrections.sort(sorter).reverse();
  741. var rv = [];
  742. var capitalization_scheme = "lowercase";
  743. if (word.toUpperCase() === word) {
  744. capitalization_scheme = "uppercase";
  745. }
  746. else if (word.substr(0, 1).toUpperCase() + word.substr(1).toLowerCase() === word) {
  747. capitalization_scheme = "capitalized";
  748. }
  749. var working_limit = limit;
  750. for (i = 0; i < Math.min(working_limit, sorted_corrections.length); i++) {
  751. if ("uppercase" === capitalization_scheme) {
  752. sorted_corrections[i][0] = sorted_corrections[i][0].toUpperCase();
  753. }
  754. else if ("capitalized" === capitalization_scheme) {
  755. sorted_corrections[i][0] = sorted_corrections[i][0].substr(0, 1).toUpperCase() + sorted_corrections[i][0].substr(1);
  756. }
  757. if (!self.hasFlag(sorted_corrections[i][0], "NOSUGGEST") && rv.indexOf(sorted_corrections[i][0]) == -1) {
  758. rv.push(sorted_corrections[i][0]);
  759. }
  760. else {
  761. // If one of the corrections is not eligible as a suggestion , make sure we still return the right number of suggestions.
  762. working_limit++;
  763. }
  764. }
  765. return rv;
  766. }
  767. this.memoized[word] = {
  768. 'suggestions': correct(word),
  769. 'limit': limit
  770. };
  771. return this.memoized[word]['suggestions'];
  772. }
  773. };
  774. })();
  775. // Support for use as a node.js module.
  776. if (typeof module !== 'undefined') {
  777. module.exports = Typo;
  778. }