123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670 |
- /**
- * Unidecode takes UTF-8 data and tries to represent it in US-ASCII characters (i.e., the universally displayable characters between 0x00 and 0x7F).
- * The representation is almost always an attempt at transliteration -- i.e., conveying, in Roman letters, the pronunciation expressed by the text in
- * some other writing system.
- *
- * The tables used (in data) are converted from the tables provided in the perl library Text::Unidecode (http://search.cpan.org/dist/Text-Unidecode/lib/Text/Unidecode.pm)
- * and are distributed under the perl license
- *
- * @author Francois-Guillaume Ribreau
- *
- * Based on the port of unidecode for php
- */
- 'use strict';
- var tr = {};
- var utf8_rx = /(?![\x00-\x7F]|[\xC0-\xDF][\x80-\xBF]|[\xE0-\xEF][\x80-\xBF]{2}|[\xF0-\xF7][\x80-\xBF]{3})./g;
- module.exports = function (str) {
- return str.replace(utf8_rx, unidecode_internal_replace);
- };
- function unidecode_internal_replace(match) {
- var utf16 = utf8_to_utf16(match);
- if (utf16 > 0xFFFF) {
- return '_';
- } else {
- var h = utf16 >> 8;
- var l = utf16 & 0xFF;
- // (18) 18 > h < 1e (30)
- if (h > 24 && h < 30) return '';
- //(d7) 215 > h < 249 (f9) no supported
- if (h > 215 && h < 249) return '';
- if (!tr[h]) {
- switch (dec2hex(h)) {
- case '00':
- tr[h] = require('./data/x00');
- break;
- case '01':
- tr[h] = require('./data/x01');
- break;
- case '02':
- tr[h] = require('./data/x02');
- break;
- case '03':
- tr[h] = require('./data/x03');
- break;
- case '04':
- tr[h] = require('./data/x04');
- break;
- case '05':
- tr[h] = require('./data/x05');
- break;
- case '06':
- tr[h] = require('./data/x06');
- break;
- case '07':
- tr[h] = require('./data/x07');
- break;
- case '09':
- tr[h] = require('./data/x09');
- break;
- case '0a':
- tr[h] = require('./data/x0a');
- break;
- case '0b':
- tr[h] = require('./data/x0b');
- break;
- case '0c':
- tr[h] = require('./data/x0c');
- break;
- case '0d':
- tr[h] = require('./data/x0d');
- break;
- case '0e':
- tr[h] = require('./data/x0e');
- break;
- case '0f':
- tr[h] = require('./data/x0f');
- break;
- case '10':
- tr[h] = require('./data/x10');
- break;
- case '11':
- tr[h] = require('./data/x11');
- break;
- case '12':
- tr[h] = require('./data/x12');
- break;
- case '13':
- tr[h] = require('./data/x13');
- break;
- case '14':
- tr[h] = require('./data/x14');
- break;
- case '15':
- tr[h] = require('./data/x15');
- break;
- case '16':
- tr[h] = require('./data/x16');
- break;
- case '17':
- tr[h] = require('./data/x17');
- break;
- case '18':
- tr[h] = require('./data/x18');
- break;
- case '1e':
- tr[h] = require('./data/x1e');
- break;
- case '1f':
- tr[h] = require('./data/x1f');
- break;
- case '20':
- tr[h] = require('./data/x20');
- break;
- case '21':
- tr[h] = require('./data/x21');
- break;
- case '22':
- tr[h] = require('./data/x22');
- break;
- case '23':
- tr[h] = require('./data/x23');
- break;
- case '24':
- tr[h] = require('./data/x24');
- break;
- case '25':
- tr[h] = require('./data/x25');
- break;
- case '26':
- tr[h] = require('./data/x26');
- break;
- case '27':
- tr[h] = require('./data/x27');
- break;
- case '28':
- tr[h] = require('./data/x28');
- break;
- case '2e':
- tr[h] = require('./data/x2e');
- break;
- case '2f':
- tr[h] = require('./data/x2f');
- break;
- case '30':
- tr[h] = require('./data/x30');
- break;
- case '31':
- tr[h] = require('./data/x31');
- break;
- case '32':
- tr[h] = require('./data/x32');
- break;
- case '33':
- tr[h] = require('./data/x33');
- break;
- case '4d':
- tr[h] = require('./data/x4d');
- break;
- case '4e':
- tr[h] = require('./data/x4e');
- break;
- case '4f':
- tr[h] = require('./data/x4f');
- break;
- case '50':
- tr[h] = require('./data/x50');
- break;
- case '51':
- tr[h] = require('./data/x51');
- break;
- case '52':
- tr[h] = require('./data/x52');
- break;
- case '53':
- tr[h] = require('./data/x53');
- break;
- case '54':
- tr[h] = require('./data/x54');
- break;
- case '55':
- tr[h] = require('./data/x55');
- break;
- case '56':
- tr[h] = require('./data/x56');
- break;
- case '57':
- tr[h] = require('./data/x57');
- break;
- case '58':
- tr[h] = require('./data/x58');
- break;
- case '59':
- tr[h] = require('./data/x59');
- break;
- case '5a':
- tr[h] = require('./data/x5a');
- break;
- case '5b':
- tr[h] = require('./data/x5b');
- break;
- case '5c':
- tr[h] = require('./data/x5c');
- break;
- case '5d':
- tr[h] = require('./data/x5d');
- break;
- case '5e':
- tr[h] = require('./data/x5e');
- break;
- case '5f':
- tr[h] = require('./data/x5f');
- break;
- case '60':
- tr[h] = require('./data/x60');
- break;
- case '61':
- tr[h] = require('./data/x61');
- break;
- case '62':
- tr[h] = require('./data/x62');
- break;
- case '63':
- tr[h] = require('./data/x63');
- break;
- case '64':
- tr[h] = require('./data/x64');
- break;
- case '65':
- tr[h] = require('./data/x65');
- break;
- case '66':
- tr[h] = require('./data/x66');
- break;
- case '67':
- tr[h] = require('./data/x67');
- break;
- case '68':
- tr[h] = require('./data/x68');
- break;
- case '69':
- tr[h] = require('./data/x69');
- break;
- case '6a':
- tr[h] = require('./data/x6a');
- break;
- case '6b':
- tr[h] = require('./data/x6b');
- break;
- case '6c':
- tr[h] = require('./data/x6c');
- break;
- case '6d':
- tr[h] = require('./data/x6d');
- break;
- case '6e':
- tr[h] = require('./data/x6e');
- break;
- case '6f':
- tr[h] = require('./data/x6f');
- break;
- case '70':
- tr[h] = require('./data/x70');
- break;
- case '71':
- tr[h] = require('./data/x71');
- break;
- case '72':
- tr[h] = require('./data/x72');
- break;
- case '73':
- tr[h] = require('./data/x73');
- break;
- case '74':
- tr[h] = require('./data/x74');
- break;
- case '75':
- tr[h] = require('./data/x75');
- break;
- case '76':
- tr[h] = require('./data/x76');
- break;
- case '77':
- tr[h] = require('./data/x77');
- break;
- case '78':
- tr[h] = require('./data/x78');
- break;
- case '79':
- tr[h] = require('./data/x79');
- break;
- case '7a':
- tr[h] = require('./data/x7a');
- break;
- case '7b':
- tr[h] = require('./data/x7b');
- break;
- case '7c':
- tr[h] = require('./data/x7c');
- break;
- case '7d':
- tr[h] = require('./data/x7d');
- break;
- case '7e':
- tr[h] = require('./data/x7e');
- break;
- case '7f':
- tr[h] = require('./data/x7f');
- break;
- case '80':
- tr[h] = require('./data/x80');
- break;
- case '81':
- tr[h] = require('./data/x81');
- break;
- case '82':
- tr[h] = require('./data/x82');
- break;
- case '83':
- tr[h] = require('./data/x83');
- break;
- case '84':
- tr[h] = require('./data/x84');
- break;
- case '85':
- tr[h] = require('./data/x85');
- break;
- case '86':
- tr[h] = require('./data/x86');
- break;
- case '87':
- tr[h] = require('./data/x87');
- break;
- case '88':
- tr[h] = require('./data/x88');
- break;
- case '89':
- tr[h] = require('./data/x89');
- break;
- case '8a':
- tr[h] = require('./data/x8a');
- break;
- case '8b':
- tr[h] = require('./data/x8b');
- break;
- case '8c':
- tr[h] = require('./data/x8c');
- break;
- case '8d':
- tr[h] = require('./data/x8d');
- break;
- case '8e':
- tr[h] = require('./data/x8e');
- break;
- case '8f':
- tr[h] = require('./data/x8f');
- break;
- case '90':
- tr[h] = require('./data/x90');
- break;
- case '91':
- tr[h] = require('./data/x91');
- break;
- case '92':
- tr[h] = require('./data/x92');
- break;
- case '93':
- tr[h] = require('./data/x93');
- break;
- case '94':
- tr[h] = require('./data/x94');
- break;
- case '95':
- tr[h] = require('./data/x95');
- break;
- case '96':
- tr[h] = require('./data/x96');
- break;
- case '97':
- tr[h] = require('./data/x97');
- break;
- case '98':
- tr[h] = require('./data/x98');
- break;
- case '99':
- tr[h] = require('./data/x99');
- break;
- case '9a':
- tr[h] = require('./data/x9a');
- break;
- case '9b':
- tr[h] = require('./data/x9b');
- break;
- case '9c':
- tr[h] = require('./data/x9c');
- break;
- case '9d':
- tr[h] = require('./data/x9d');
- break;
- case '9e':
- tr[h] = require('./data/x9e');
- break;
- case '9f':
- tr[h] = require('./data/x9f');
- break;
- case 'a0':
- tr[h] = require('./data/xa0');
- break;
- case 'a1':
- tr[h] = require('./data/xa1');
- break;
- case 'a2':
- tr[h] = require('./data/xa2');
- break;
- case 'a3':
- tr[h] = require('./data/xa3');
- break;
- case 'a4':
- tr[h] = require('./data/xa4');
- break;
- case 'ac':
- tr[h] = require('./data/xac');
- break;
- case 'ad':
- tr[h] = require('./data/xad');
- break;
- case 'ae':
- tr[h] = require('./data/xae');
- break;
- case 'af':
- tr[h] = require('./data/xaf');
- break;
- case 'b0':
- tr[h] = require('./data/xb0');
- break;
- case 'b1':
- tr[h] = require('./data/xb1');
- break;
- case 'b2':
- tr[h] = require('./data/xb2');
- break;
- case 'b3':
- tr[h] = require('./data/xb3');
- break;
- case 'b4':
- tr[h] = require('./data/xb4');
- break;
- case 'b5':
- tr[h] = require('./data/xb5');
- break;
- case 'b6':
- tr[h] = require('./data/xb6');
- break;
- case 'b7':
- tr[h] = require('./data/xb7');
- break;
- case 'b8':
- tr[h] = require('./data/xb8');
- break;
- case 'b9':
- tr[h] = require('./data/xb9');
- break;
- case 'ba':
- tr[h] = require('./data/xba');
- break;
- case 'bb':
- tr[h] = require('./data/xbb');
- break;
- case 'bc':
- tr[h] = require('./data/xbc');
- break;
- case 'bd':
- tr[h] = require('./data/xbd');
- break;
- case 'be':
- tr[h] = require('./data/xbe');
- break;
- case 'bf':
- tr[h] = require('./data/xbf');
- break;
- case 'c0':
- tr[h] = require('./data/xc0');
- break;
- case 'c1':
- tr[h] = require('./data/xc1');
- break;
- case 'c2':
- tr[h] = require('./data/xc2');
- break;
- case 'c3':
- tr[h] = require('./data/xc3');
- break;
- case 'c4':
- tr[h] = require('./data/xc4');
- break;
- case 'c5':
- tr[h] = require('./data/xc5');
- break;
- case 'c6':
- tr[h] = require('./data/xc6');
- break;
- case 'c7':
- tr[h] = require('./data/xc7');
- break;
- case 'c8':
- tr[h] = require('./data/xc8');
- break;
- case 'c9':
- tr[h] = require('./data/xc9');
- break;
- case 'ca':
- tr[h] = require('./data/xca');
- break;
- case 'cb':
- tr[h] = require('./data/xcb');
- break;
- case 'cc':
- tr[h] = require('./data/xcc');
- break;
- case 'cd':
- tr[h] = require('./data/xcd');
- break;
- case 'ce':
- tr[h] = require('./data/xce');
- break;
- case 'cf':
- tr[h] = require('./data/xcf');
- break;
- case 'd0':
- tr[h] = require('./data/xd0');
- break;
- case 'd1':
- tr[h] = require('./data/xd1');
- break;
- case 'd2':
- tr[h] = require('./data/xd2');
- break;
- case 'd3':
- tr[h] = require('./data/xd3');
- break;
- case 'd4':
- tr[h] = require('./data/xd4');
- break;
- case 'd5':
- tr[h] = require('./data/xd5');
- break;
- case 'd6':
- tr[h] = require('./data/xd6');
- break;
- case 'd7':
- tr[h] = require('./data/xd7');
- break;
- case 'f9':
- tr[h] = require('./data/xf9');
- break;
- case 'fa':
- tr[h] = require('./data/xfa');
- break;
- case 'fb':
- tr[h] = require('./data/xfb');
- break;
- case 'fc':
- tr[h] = require('./data/xfc');
- break;
- case 'fd':
- tr[h] = require('./data/xfd');
- break;
- case 'fe':
- tr[h] = require('./data/xfe');
- break;
- case 'ff':
- tr[h] = require('./data/xff');
- break;
- default:
- // console.error("Unidecode file not found for h=", h);
- return '';
- }
- }
- return tr[h][l];
- }
- }
- function dec2hex(i) {
- return (i + 0x100).toString(16).substr(-2);
- }
- function utf8_to_utf16(raw) {
- var b1, b2, b3, b4,
- x, y, z;
- while (Array.isArray(raw)) raw = raw[0];
- switch (raw.length) {
- case 1:
- return ord(raw);
- // http://en.wikipedia.org/wiki/UTF-8
- case 2:
- b1 = ord(raw.substr(0, 1));
- b2 = ord(raw.substr(1, 1));
- x = ((b1 & 0x03) << 6) | (b2 & 0x3F);
- y = (b1 & 0x1C) >> 2;
- return (y << 8) | x;
- case 3:
- b1 = ord(raw.substr(0, 1));
- b2 = ord(raw.substr(1, 1));
- b3 = ord(raw.substr(2, 1));
- x = ((b2 & 0x03) << 6) | (b3 & 0x3F);
- y = ((b1 & 0x0F) << 4) | ((b2 & 0x3C) >> 2);
- return (y << 8) | x;
- default:
- b1 = ord(raw.substr(0, 1));
- b2 = ord(raw.substr(1, 1));
- b3 = ord(raw.substr(2, 1));
- b4 = ord(raw.substr(3, 1));
- x = ((b3 & 0x03) << 6) | (b4 & 0x3F);
- y = ((b2 & 0x0F) << 4) | ((b3 & 0x3C) >> 2);
- z = ((b1 & 0x07) << 5) | ((b2 & 0x30) >> 4);
- return (z << 16) | (y << 8) | x;
- }
- }
- /* From php.js */
- function ord(string) {
- // Returns the codepoint value of a character
- //
- // version: 1109.2015
- // discuss at: http://phpjs.org/functions/ord
- // + original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
- // + bugfixed by: Onno Marsman
- // + improved by: Brett Zamir (http://brett-zamir.me)
- // + input by: incidence
- // * example 1: ord('K');
- // * returns 1: 75
- // * example 2: ord('\uD800\uDC00'); // surrogate pair to create a single Unicode character
- // * returns 2: 65536
- var str = string + '',
- code = str.charCodeAt(0);
- if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters)
- var hi = code;
- if (str.length === 1) {
- return code; // This is just a high surrogate with no following low surrogate, so we return its value;
- // we could also throw an error as it is not a complete character, but someone may want to know
- }
- var low = str.charCodeAt(1);
- return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
- }
- if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate
- return code; // This is just a low surrogate with no preceding high surrogate, so we return its value;
- // we could also throw an error as it is not a complete character, but someone may want to know
- }
- return code;
- }
|