typo.js 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001
  1. /* globals chrome: false */
  2. /* globals __dirname: false */
  3. /* globals require: false */
  4. /* globals Buffer: false */
  5. /* globals module: false */
  6. /**
  7. * Typo is a JavaScript implementation of a spellchecker using hunspell-style
  8. * dictionaries.
  9. */
  10. var Typo;
  11. (function () {
  12. "use strict";
  13. /**
  14. * Typo constructor.
  15. *
  16. * @param {String} [dictionary] The locale code of the dictionary being used. e.g.,
  17. * "en_US". This is only used to auto-load dictionaries.
  18. * @param {String} [affData] The data from the dictionary's .aff file. If omitted
  19. * and Typo.js is being used in a Chrome extension, the .aff
  20. * file will be loaded automatically from
  21. * lib/typo/dictionaries/[dictionary]/[dictionary].aff
  22. * In other environments, it will be loaded from
  23. * [settings.dictionaryPath]/dictionaries/[dictionary]/[dictionary].aff
  24. * @param {String} [wordsData] The data from the dictionary's .dic file. If omitted
  25. * and Typo.js is being used in a Chrome extension, the .dic
  26. * file will be loaded automatically from
  27. * lib/typo/dictionaries/[dictionary]/[dictionary].dic
  28. * In other environments, it will be loaded from
  29. * [settings.dictionaryPath]/dictionaries/[dictionary]/[dictionary].dic
  30. * @param {Object} [settings] Constructor settings. Available properties are:
  31. * {String} [dictionaryPath]: path to load dictionary from in non-chrome
  32. * environment.
  33. * {Object} [flags]: flag information.
  34. * {Boolean} [asyncLoad]: If true, affData and wordsData will be loaded
  35. * asynchronously.
  36. * {Function} [loadedCallback]: Called when both affData and wordsData
  37. * have been loaded. Only used if asyncLoad is set to true. The parameter
  38. * is the instantiated Typo object.
  39. *
  40. * @returns {Typo} A Typo object.
  41. */
  42. Typo = function (dictionary, affData, wordsData, settings) {
  43. settings = settings || {};
  44. this.dictionary = null;
  45. this.rules = {};
  46. this.dictionaryTable = {};
  47. this.compoundRules = [];
  48. this.compoundRuleCodes = {};
  49. this.replacementTable = [];
  50. this.flags = settings.flags || {};
  51. this.memoized = {};
  52. this.loaded = false;
  53. var self = this;
  54. var path;
  55. // Loop-control variables.
  56. var i, j, _len, _jlen;
  57. if (dictionary) {
  58. self.dictionary = dictionary;
  59. // If the data is preloaded, just setup the Typo object.
  60. if (affData && wordsData) {
  61. setup();
  62. }
  63. // Loading data for Chrome extentions.
  64. else if (typeof window !== 'undefined' && 'chrome' in window && 'extension' in window.chrome && 'getURL' in window.chrome.extension) {
  65. if (settings.dictionaryPath) {
  66. path = settings.dictionaryPath;
  67. }
  68. else {
  69. path = "typo/dictionaries";
  70. }
  71. if (!affData) readDataFile(chrome.extension.getURL(path + "/" + dictionary + "/" + dictionary + ".aff"), setAffData);
  72. if (!wordsData) readDataFile(chrome.extension.getURL(path + "/" + dictionary + "/" + dictionary + ".dic"), setWordsData);
  73. }
  74. else {
  75. if (settings.dictionaryPath) {
  76. path = settings.dictionaryPath;
  77. }
  78. else if (typeof __dirname !== 'undefined') {
  79. path = __dirname + '/dictionaries';
  80. }
  81. else {
  82. path = './dictionaries';
  83. }
  84. if (!affData) readDataFile(path + "/" + dictionary + "/" + dictionary + ".aff", setAffData);
  85. if (!wordsData) readDataFile(path + "/" + dictionary + "/" + dictionary + ".dic", setWordsData);
  86. }
  87. }
  88. function readDataFile(url, setFunc) {
  89. var response = self._readFile(url, null, settings.asyncLoad);
  90. if (settings.asyncLoad) {
  91. response.then(function(data) {
  92. setFunc(data);
  93. });
  94. }
  95. else {
  96. setFunc(response);
  97. }
  98. }
  99. function setAffData(data) {
  100. affData = data;
  101. if (wordsData) {
  102. setup();
  103. }
  104. }
  105. function setWordsData(data) {
  106. wordsData = data;
  107. if (affData) {
  108. setup();
  109. }
  110. }
  111. function setup() {
  112. self.rules = self._parseAFF(affData);
  113. // Save the rule codes that are used in compound rules.
  114. self.compoundRuleCodes = {};
  115. for (i = 0, _len = self.compoundRules.length; i < _len; i++) {
  116. var rule = self.compoundRules[i];
  117. for (j = 0, _jlen = rule.length; j < _jlen; j++) {
  118. self.compoundRuleCodes[rule[j]] = [];
  119. }
  120. }
  121. // If we add this ONLYINCOMPOUND flag to self.compoundRuleCodes, then _parseDIC
  122. // will do the work of saving the list of words that are compound-only.
  123. if ("ONLYINCOMPOUND" in self.flags) {
  124. self.compoundRuleCodes[self.flags.ONLYINCOMPOUND] = [];
  125. }
  126. self.dictionaryTable = self._parseDIC(wordsData);
  127. // Get rid of any codes from the compound rule codes that are never used
  128. // (or that were special regex characters). Not especially necessary...
  129. for (i in self.compoundRuleCodes) {
  130. if (self.compoundRuleCodes[i].length === 0) {
  131. delete self.compoundRuleCodes[i];
  132. }
  133. }
  134. // Build the full regular expressions for each compound rule.
  135. // I have a feeling (but no confirmation yet) that this method of
  136. // testing for compound words is probably slow.
  137. for (i = 0, _len = self.compoundRules.length; i < _len; i++) {
  138. var ruleText = self.compoundRules[i];
  139. var expressionText = "";
  140. for (j = 0, _jlen = ruleText.length; j < _jlen; j++) {
  141. var character = ruleText[j];
  142. if (character in self.compoundRuleCodes) {
  143. expressionText += "(" + self.compoundRuleCodes[character].join("|") + ")";
  144. }
  145. else {
  146. expressionText += character;
  147. }
  148. }
  149. self.compoundRules[i] = new RegExp(expressionText, "i");
  150. }
  151. self.loaded = true;
  152. if (settings.asyncLoad && settings.loadedCallback) {
  153. settings.loadedCallback(self);
  154. }
  155. }
  156. return this;
  157. };
  158. Typo.prototype = {
  159. /**
  160. * Loads a Typo instance from a hash of all of the Typo properties.
  161. *
  162. * @param object obj A hash of Typo properties, probably gotten from a JSON.parse(JSON.stringify(typo_instance)).
  163. */
  164. load : function (obj) {
  165. for (var i in obj) {
  166. if (obj.hasOwnProperty(i)) {
  167. this[i] = obj[i];
  168. }
  169. }
  170. return this;
  171. },
  172. /**
  173. * Read the contents of a file.
  174. *
  175. * @param {String} path The path (relative) to the file.
  176. * @param {String} [charset="ISO8859-1"] The expected charset of the file
  177. * @param {Boolean} async If true, the file will be read asynchronously. For node.js this does nothing, all
  178. * files are read synchronously.
  179. * @returns {String} The file data if async is false, otherwise a promise object. If running node.js, the data is
  180. * always returned.
  181. */
  182. _readFile : function (path, charset, async) {
  183. charset = charset || "utf8";
  184. if (typeof XMLHttpRequest !== 'undefined') {
  185. var promise;
  186. var req = new XMLHttpRequest();
  187. req.open("GET", path, async);
  188. if (async) {
  189. promise = new Promise(function(resolve, reject) {
  190. req.onload = function() {
  191. if (req.status === 200) {
  192. resolve(req.responseText);
  193. }
  194. else {
  195. reject(req.statusText);
  196. }
  197. };
  198. req.onerror = function() {
  199. reject(req.statusText);
  200. }
  201. });
  202. }
  203. if (req.overrideMimeType)
  204. req.overrideMimeType("text/plain; charset=" + charset);
  205. req.send(null);
  206. return async ? promise : req.responseText;
  207. }
  208. else if (typeof require !== 'undefined') {
  209. // Node.js
  210. var fs = require("fs");
  211. try {
  212. if (fs.existsSync(path)) {
  213. return fs.readFileSync(path, charset);
  214. }
  215. else {
  216. console.log("Path " + path + " does not exist.");
  217. }
  218. } catch (e) {
  219. console.log(e);
  220. return '';
  221. }
  222. }
  223. },
  224. /**
  225. * Parse the rules out from a .aff file.
  226. *
  227. * @param {String} data The contents of the affix file.
  228. * @returns object The rules from the file.
  229. */
  230. _parseAFF : function (data) {
  231. var rules = {};
  232. var line, subline, numEntries, lineParts;
  233. var i, j, _len, _jlen;
  234. var lines = data.split(/\r?\n/);
  235. for (i = 0, _len = lines.length; i < _len; i++) {
  236. // Remove comment lines
  237. line = this._removeAffixComments(lines[i]);
  238. line = line.trim();
  239. if ( ! line ) {
  240. continue;
  241. }
  242. var definitionParts = line.split(/\s+/);
  243. var ruleType = definitionParts[0];
  244. if (ruleType == "PFX" || ruleType == "SFX") {
  245. var ruleCode = definitionParts[1];
  246. var combineable = definitionParts[2];
  247. numEntries = parseInt(definitionParts[3], 10);
  248. var entries = [];
  249. for (j = i + 1, _jlen = i + 1 + numEntries; j < _jlen; j++) {
  250. subline = lines[j];
  251. lineParts = subline.split(/\s+/);
  252. var charactersToRemove = lineParts[2];
  253. var additionParts = lineParts[3].split("/");
  254. var charactersToAdd = additionParts[0];
  255. if (charactersToAdd === "0") charactersToAdd = "";
  256. var continuationClasses = this.parseRuleCodes(additionParts[1]);
  257. var regexToMatch = lineParts[4];
  258. var entry = {};
  259. entry.add = charactersToAdd;
  260. if (continuationClasses.length > 0) entry.continuationClasses = continuationClasses;
  261. if (regexToMatch !== ".") {
  262. if (ruleType === "SFX") {
  263. entry.match = new RegExp(regexToMatch + "$");
  264. }
  265. else {
  266. entry.match = new RegExp("^" + regexToMatch);
  267. }
  268. }
  269. if (charactersToRemove != "0") {
  270. if (ruleType === "SFX") {
  271. entry.remove = new RegExp(charactersToRemove + "$");
  272. }
  273. else {
  274. entry.remove = charactersToRemove;
  275. }
  276. }
  277. entries.push(entry);
  278. }
  279. rules[ruleCode] = { "type" : ruleType, "combineable" : (combineable == "Y"), "entries" : entries };
  280. i += numEntries;
  281. }
  282. else if (ruleType === "COMPOUNDRULE") {
  283. numEntries = parseInt(definitionParts[1], 10);
  284. for (j = i + 1, _jlen = i + 1 + numEntries; j < _jlen; j++) {
  285. line = lines[j];
  286. lineParts = line.split(/\s+/);
  287. this.compoundRules.push(lineParts[1]);
  288. }
  289. i += numEntries;
  290. }
  291. else if (ruleType === "REP") {
  292. lineParts = line.split(/\s+/);
  293. if (lineParts.length === 3) {
  294. this.replacementTable.push([ lineParts[1], lineParts[2] ]);
  295. }
  296. }
  297. else {
  298. // ONLYINCOMPOUND
  299. // COMPOUNDMIN
  300. // FLAG
  301. // KEEPCASE
  302. // NEEDAFFIX
  303. this.flags[ruleType] = definitionParts[1];
  304. }
  305. }
  306. return rules;
  307. },
  308. /**
  309. * Removes comments.
  310. *
  311. * @param {String} data A line from an affix file.
  312. * @return {String} The cleaned-up line.
  313. */
  314. _removeAffixComments : function (line) {
  315. // This used to remove any string starting with '#' up to the end of the line,
  316. // but some COMPOUNDRULE definitions include '#' as part of the rule.
  317. // So, only remove lines that begin with a comment, optionally preceded by whitespace.
  318. if ( line.match( /^\s*#/, "" ) ) {
  319. return '';
  320. }
  321. return line;
  322. },
  323. /**
  324. * Parses the words out from the .dic file.
  325. *
  326. * @param {String} data The data from the dictionary file.
  327. * @returns object The lookup table containing all of the words and
  328. * word forms from the dictionary.
  329. */
  330. _parseDIC : function (data) {
  331. data = this._removeDicComments(data);
  332. var lines = data.split(/\r?\n/);
  333. var dictionaryTable = {};
  334. function addWord(word, rules) {
  335. // Some dictionaries will list the same word multiple times with different rule sets.
  336. if (!dictionaryTable.hasOwnProperty(word)) {
  337. dictionaryTable[word] = null;
  338. }
  339. if (rules.length > 0) {
  340. if (dictionaryTable[word] === null) {
  341. dictionaryTable[word] = [];
  342. }
  343. dictionaryTable[word].push(rules);
  344. }
  345. }
  346. // The first line is the number of words in the dictionary.
  347. for (var i = 1, _len = lines.length; i < _len; i++) {
  348. var line = lines[i];
  349. if (!line) {
  350. // Ignore empty lines.
  351. continue;
  352. }
  353. var parts = line.split("/", 2);
  354. var word = parts[0];
  355. // Now for each affix rule, generate that form of the word.
  356. if (parts.length > 1) {
  357. var ruleCodesArray = this.parseRuleCodes(parts[1]);
  358. // Save the ruleCodes for compound word situations.
  359. if (!("NEEDAFFIX" in this.flags) || ruleCodesArray.indexOf(this.flags.NEEDAFFIX) == -1) {
  360. addWord(word, ruleCodesArray);
  361. }
  362. for (var j = 0, _jlen = ruleCodesArray.length; j < _jlen; j++) {
  363. var code = ruleCodesArray[j];
  364. var rule = this.rules[code];
  365. if (rule) {
  366. var newWords = this._applyRule(word, rule);
  367. for (var ii = 0, _iilen = newWords.length; ii < _iilen; ii++) {
  368. var newWord = newWords[ii];
  369. addWord(newWord, []);
  370. if (rule.combineable) {
  371. for (var k = j + 1; k < _jlen; k++) {
  372. var combineCode = ruleCodesArray[k];
  373. var combineRule = this.rules[combineCode];
  374. if (combineRule) {
  375. if (combineRule.combineable && (rule.type != combineRule.type)) {
  376. var otherNewWords = this._applyRule(newWord, combineRule);
  377. for (var iii = 0, _iiilen = otherNewWords.length; iii < _iiilen; iii++) {
  378. var otherNewWord = otherNewWords[iii];
  379. addWord(otherNewWord, []);
  380. }
  381. }
  382. }
  383. }
  384. }
  385. }
  386. }
  387. if (code in this.compoundRuleCodes) {
  388. this.compoundRuleCodes[code].push(word);
  389. }
  390. }
  391. }
  392. else {
  393. addWord(word.trim(), []);
  394. }
  395. }
  396. return dictionaryTable;
  397. },
  398. /**
  399. * Removes comment lines and then cleans up blank lines and trailing whitespace.
  400. *
  401. * @param {String} data The data from a .dic file.
  402. * @return {String} The cleaned-up data.
  403. */
  404. _removeDicComments : function (data) {
  405. // I can't find any official documentation on it, but at least the de_DE
  406. // dictionary uses tab-indented lines as comments.
  407. // Remove comments
  408. data = data.replace(/^\t.*$/mg, "");
  409. return data;
  410. },
  411. parseRuleCodes : function (textCodes) {
  412. if (!textCodes) {
  413. return [];
  414. }
  415. else if (!("FLAG" in this.flags)) {
  416. return textCodes.split("");
  417. }
  418. else if (this.flags.FLAG === "long") {
  419. var flags = [];
  420. for (var i = 0, _len = textCodes.length; i < _len; i += 2) {
  421. flags.push(textCodes.substr(i, 2));
  422. }
  423. return flags;
  424. }
  425. else if (this.flags.FLAG === "num") {
  426. return textCodes.split(",");
  427. }
  428. },
  429. /**
  430. * Applies an affix rule to a word.
  431. *
  432. * @param {String} word The base word.
  433. * @param {Object} rule The affix rule.
  434. * @returns {String[]} The new words generated by the rule.
  435. */
  436. _applyRule : function (word, rule) {
  437. var entries = rule.entries;
  438. var newWords = [];
  439. for (var i = 0, _len = entries.length; i < _len; i++) {
  440. var entry = entries[i];
  441. if (!entry.match || word.match(entry.match)) {
  442. var newWord = word;
  443. if (entry.remove) {
  444. newWord = newWord.replace(entry.remove, "");
  445. }
  446. if (rule.type === "SFX") {
  447. newWord = newWord + entry.add;
  448. }
  449. else {
  450. newWord = entry.add + newWord;
  451. }
  452. newWords.push(newWord);
  453. if ("continuationClasses" in entry) {
  454. for (var j = 0, _jlen = entry.continuationClasses.length; j < _jlen; j++) {
  455. var continuationRule = this.rules[entry.continuationClasses[j]];
  456. if (continuationRule) {
  457. newWords = newWords.concat(this._applyRule(newWord, continuationRule));
  458. }
  459. /*
  460. else {
  461. // This shouldn't happen, but it does, at least in the de_DE dictionary.
  462. // I think the author mistakenly supplied lower-case rule codes instead
  463. // of upper-case.
  464. }
  465. */
  466. }
  467. }
  468. }
  469. }
  470. return newWords;
  471. },
  472. /**
  473. * Checks whether a word or a capitalization variant exists in the current dictionary.
  474. * The word is trimmed and several variations of capitalizations are checked.
  475. * If you want to check a word without any changes made to it, call checkExact()
  476. *
  477. * @see http://blog.stevenlevithan.com/archives/faster-trim-javascript re:trimming function
  478. *
  479. * @param {String} aWord The word to check.
  480. * @returns {Boolean}
  481. */
  482. check : function (aWord) {
  483. if (!this.loaded) {
  484. throw "Dictionary not loaded.";
  485. }
  486. // Remove leading and trailing whitespace
  487. var trimmedWord = aWord.replace(/^\s\s*/, '').replace(/\s\s*$/, '');
  488. if (this.checkExact(trimmedWord)) {
  489. return true;
  490. }
  491. // The exact word is not in the dictionary.
  492. if (trimmedWord.toUpperCase() === trimmedWord) {
  493. // The word was supplied in all uppercase.
  494. // Check for a capitalized form of the word.
  495. var capitalizedWord = trimmedWord[0] + trimmedWord.substring(1).toLowerCase();
  496. if (this.hasFlag(capitalizedWord, "KEEPCASE")) {
  497. // Capitalization variants are not allowed for this word.
  498. return false;
  499. }
  500. if (this.checkExact(capitalizedWord)) {
  501. // The all-caps word is a capitalized word spelled correctly.
  502. return true;
  503. }
  504. if (this.checkExact(trimmedWord.toLowerCase())) {
  505. // The all-caps is a lowercase word spelled correctly.
  506. return true;
  507. }
  508. }
  509. var uncapitalizedWord = trimmedWord[0].toLowerCase() + trimmedWord.substring(1);
  510. if (uncapitalizedWord !== trimmedWord) {
  511. if (this.hasFlag(uncapitalizedWord, "KEEPCASE")) {
  512. // Capitalization variants are not allowed for this word.
  513. return false;
  514. }
  515. // Check for an uncapitalized form
  516. if (this.checkExact(uncapitalizedWord)) {
  517. // The word is spelled correctly but with the first letter capitalized.
  518. return true;
  519. }
  520. }
  521. return false;
  522. },
  523. /**
  524. * Checks whether a word exists in the current dictionary.
  525. *
  526. * @param {String} word The word to check.
  527. * @returns {Boolean}
  528. */
  529. checkExact : function (word) {
  530. if (!this.loaded) {
  531. throw "Dictionary not loaded.";
  532. }
  533. var ruleCodes = this.dictionaryTable[word];
  534. var i, _len;
  535. if (typeof ruleCodes === 'undefined') {
  536. // Check if this might be a compound word.
  537. if ("COMPOUNDMIN" in this.flags && word.length >= this.flags.COMPOUNDMIN) {
  538. for (i = 0, _len = this.compoundRules.length; i < _len; i++) {
  539. if (word.match(this.compoundRules[i])) {
  540. return true;
  541. }
  542. }
  543. }
  544. }
  545. else if (ruleCodes === null) {
  546. // a null (but not undefined) value for an entry in the dictionary table
  547. // means that the word is in the dictionary but has no flags.
  548. return true;
  549. }
  550. else if (typeof ruleCodes === 'object') { // this.dictionary['hasOwnProperty'] will be a function.
  551. for (i = 0, _len = ruleCodes.length; i < _len; i++) {
  552. if (!this.hasFlag(word, "ONLYINCOMPOUND", ruleCodes[i])) {
  553. return true;
  554. }
  555. }
  556. }
  557. return false;
  558. },
  559. /**
  560. * Looks up whether a given word is flagged with a given flag.
  561. *
  562. * @param {String} word The word in question.
  563. * @param {String} flag The flag in question.
  564. * @return {Boolean}
  565. */
  566. hasFlag : function (word, flag, wordFlags) {
  567. if (!this.loaded) {
  568. throw "Dictionary not loaded.";
  569. }
  570. if (flag in this.flags) {
  571. if (typeof wordFlags === 'undefined') {
  572. wordFlags = Array.prototype.concat.apply([], this.dictionaryTable[word]);
  573. }
  574. if (wordFlags && wordFlags.indexOf(this.flags[flag]) !== -1) {
  575. return true;
  576. }
  577. }
  578. return false;
  579. },
  580. /**
  581. * Returns a list of suggestions for a misspelled word.
  582. *
  583. * @see http://www.norvig.com/spell-correct.html for the basis of this suggestor.
  584. * This suggestor is primitive, but it works.
  585. *
  586. * @param {String} word The misspelling.
  587. * @param {Number} [limit=5] The maximum number of suggestions to return.
  588. * @returns {String[]} The array of suggestions.
  589. */
  590. alphabet : "",
  591. suggest : function (word, limit) {
  592. if (!this.loaded) {
  593. throw "Dictionary not loaded.";
  594. }
  595. limit = limit || 5;
  596. if (this.memoized.hasOwnProperty(word)) {
  597. var memoizedLimit = this.memoized[word]['limit'];
  598. // Only return the cached list if it's big enough or if there weren't enough suggestions
  599. // to fill a smaller limit.
  600. if (limit <= memoizedLimit || this.memoized[word]['suggestions'].length < memoizedLimit) {
  601. return this.memoized[word]['suggestions'].slice(0, limit);
  602. }
  603. }
  604. if (this.check(word)) return [];
  605. // Check the replacement table.
  606. for (var i = 0, _len = this.replacementTable.length; i < _len; i++) {
  607. var replacementEntry = this.replacementTable[i];
  608. if (word.indexOf(replacementEntry[0]) !== -1) {
  609. var correctedWord = word.replace(replacementEntry[0], replacementEntry[1]);
  610. if (this.check(correctedWord)) {
  611. return [ correctedWord ];
  612. }
  613. }
  614. }
  615. var self = this;
  616. self.alphabet = "abcdefghijklmnopqrstuvwxyz";
  617. /*
  618. if (!self.alphabet) {
  619. // Use the alphabet as implicitly defined by the words in the dictionary.
  620. var alphaHash = {};
  621. for (var i in self.dictionaryTable) {
  622. for (var j = 0, _len = i.length; j < _len; j++) {
  623. alphaHash[i[j]] = true;
  624. }
  625. }
  626. for (var i in alphaHash) {
  627. self.alphabet += i;
  628. }
  629. var alphaArray = self.alphabet.split("");
  630. alphaArray.sort();
  631. self.alphabet = alphaArray.join("");
  632. }
  633. */
  634. /**
  635. * Returns a hash keyed by all of the strings that can be made by making a single edit to the word (or words in) `words`
  636. * The value of each entry is the number of unique ways that the resulting word can be made.
  637. *
  638. * @arg mixed words Either a hash keyed by words or a string word to operate on.
  639. * @arg bool known_only Whether this function should ignore strings that are not in the dictionary.
  640. */
  641. function edits1(words, known_only) {
  642. var rv = {};
  643. var i, j, _iilen, _len, _jlen, _edit;
  644. var alphabetLength = self.alphabet.length;
  645. if (typeof words == 'string') {
  646. var word = words;
  647. words = {};
  648. words[word] = true;
  649. }
  650. for (var word in words) {
  651. for (i = 0, _len = word.length + 1; i < _len; i++) {
  652. var s = [ word.substring(0, i), word.substring(i) ];
  653. // Remove a letter.
  654. if (s[1]) {
  655. _edit = s[0] + s[1].substring(1);
  656. if (!known_only || self.check(_edit)) {
  657. if (!(_edit in rv)) {
  658. rv[_edit] = 1;
  659. }
  660. else {
  661. rv[_edit] += 1;
  662. }
  663. }
  664. }
  665. // Transpose letters
  666. // Eliminate transpositions of identical letters
  667. if (s[1].length > 1 && s[1][1] !== s[1][0]) {
  668. _edit = s[0] + s[1][1] + s[1][0] + s[1].substring(2);
  669. if (!known_only || self.check(_edit)) {
  670. if (!(_edit in rv)) {
  671. rv[_edit] = 1;
  672. }
  673. else {
  674. rv[_edit] += 1;
  675. }
  676. }
  677. }
  678. if (s[1]) {
  679. // Replace a letter with another letter.
  680. var lettercase = (s[1].substring(0,1).toUpperCase() === s[1].substring(0,1)) ? 'uppercase' : 'lowercase';
  681. for (j = 0; j < alphabetLength; j++) {
  682. var replacementLetter = self.alphabet[j];
  683. // Set the case of the replacement letter to the same as the letter being replaced.
  684. if ( 'uppercase' === lettercase ) {
  685. replacementLetter = replacementLetter.toUpperCase();
  686. }
  687. // Eliminate replacement of a letter by itself
  688. if (replacementLetter != s[1].substring(0,1)){
  689. _edit = s[0] + replacementLetter + s[1].substring(1);
  690. if (!known_only || self.check(_edit)) {
  691. if (!(_edit in rv)) {
  692. rv[_edit] = 1;
  693. }
  694. else {
  695. rv[_edit] += 1;
  696. }
  697. }
  698. }
  699. }
  700. }
  701. if (s[1]) {
  702. // Add a letter between each letter.
  703. for (j = 0; j < alphabetLength; j++) {
  704. // If the letters on each side are capitalized, capitalize the replacement.
  705. var lettercase = (s[0].substring(-1).toUpperCase() === s[0].substring(-1) && s[1].substring(0,1).toUpperCase() === s[1].substring(0,1)) ? 'uppercase' : 'lowercase';
  706. var replacementLetter = self.alphabet[j];
  707. if ( 'uppercase' === lettercase ) {
  708. replacementLetter = replacementLetter.toUpperCase();
  709. }
  710. _edit = s[0] + replacementLetter + s[1];
  711. if (!known_only || self.check(_edit)) {
  712. if (!(_edit in rv)) {
  713. rv[_edit] = 1;
  714. }
  715. else {
  716. rv[_edit] += 1;
  717. }
  718. }
  719. }
  720. }
  721. }
  722. }
  723. return rv;
  724. }
  725. function correct(word) {
  726. // Get the edit-distance-1 and edit-distance-2 forms of this word.
  727. var ed1 = edits1(word);
  728. var ed2 = edits1(ed1, true);
  729. // Sort the edits based on how many different ways they were created.
  730. var weighted_corrections = ed2;
  731. for (var ed1word in ed1) {
  732. if (!self.check(ed1word)) {
  733. continue;
  734. }
  735. if (ed1word in weighted_corrections) {
  736. weighted_corrections[ed1word] += ed1[ed1word];
  737. }
  738. else {
  739. weighted_corrections[ed1word] = ed1[ed1word];
  740. }
  741. }
  742. var i, _len;
  743. var sorted_corrections = [];
  744. for (i in weighted_corrections) {
  745. if (weighted_corrections.hasOwnProperty(i)) {
  746. sorted_corrections.push([ i, weighted_corrections[i] ]);
  747. }
  748. }
  749. function sorter(a, b) {
  750. var a_val = a[1];
  751. var b_val = b[1];
  752. if (a_val < b_val) {
  753. return -1;
  754. } else if (a_val > b_val) {
  755. return 1;
  756. }
  757. // @todo If a and b are equally weighted, add our own weight based on something like the key locations on this language's default keyboard.
  758. return b[0].localeCompare(a[0]);
  759. }
  760. sorted_corrections.sort(sorter).reverse();
  761. var rv = [];
  762. var capitalization_scheme = "lowercase";
  763. if (word.toUpperCase() === word) {
  764. capitalization_scheme = "uppercase";
  765. }
  766. else if (word.substr(0, 1).toUpperCase() + word.substr(1).toLowerCase() === word) {
  767. capitalization_scheme = "capitalized";
  768. }
  769. var working_limit = limit;
  770. for (i = 0; i < Math.min(working_limit, sorted_corrections.length); i++) {
  771. if ("uppercase" === capitalization_scheme) {
  772. sorted_corrections[i][0] = sorted_corrections[i][0].toUpperCase();
  773. }
  774. else if ("capitalized" === capitalization_scheme) {
  775. sorted_corrections[i][0] = sorted_corrections[i][0].substr(0, 1).toUpperCase() + sorted_corrections[i][0].substr(1);
  776. }
  777. if (!self.hasFlag(sorted_corrections[i][0], "NOSUGGEST") && rv.indexOf(sorted_corrections[i][0]) == -1) {
  778. rv.push(sorted_corrections[i][0]);
  779. }
  780. else {
  781. // If one of the corrections is not eligible as a suggestion , make sure we still return the right number of suggestions.
  782. working_limit++;
  783. }
  784. }
  785. return rv;
  786. }
  787. this.memoized[word] = {
  788. 'suggestions': correct(word),
  789. 'limit': limit
  790. };
  791. return this.memoized[word]['suggestions'];
  792. }
  793. };
  794. })();
  795. // Support for use as a node.js module.
  796. if (typeof module !== 'undefined') {
  797. module.exports = Typo;
  798. }