DecodedBitStreamParser.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. <?php
  2. /*
  3. * Copyright 2007 ZXing authors
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. namespace Zxing\Qrcode\Decoder;
  18. use Zxing\DecodeHintType;
  19. use Zxing\FormatException;
  20. use Zxing\Common\BitSource;
  21. use Zxing\Common\CharacterSetECI;
  22. use Zxing\Common\DecoderResult;
  23. use Zxing\Common\StringUtils;
  24. /**
  25. * <p>QR Codes can encode text as bits in one of several modes, and can use multiple modes
  26. * in one QR Code. This class decodes the bits back into text.</p>
  27. *
  28. * <p>See ISO 18004:2006, 6.4.3 - 6.4.7</p>
  29. *
  30. * @author Sean Owen
  31. */
  32. final class DecodedBitStreamParser
  33. {
  34. /**
  35. * See ISO 18004:2006, 6.4.4 Table 5
  36. */
  37. private static $ALPHANUMERIC_CHARS = [
  38. '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
  39. 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  40. 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  41. ' ', '$', '%', '*', '+', '-', '.', '/', ':',
  42. ];
  43. private static $GB2312_SUBSET = 1;
  44. public static function decode($bytes,
  45. $version,
  46. $ecLevel,
  47. $hints)
  48. {
  49. $bits = new BitSource($bytes);
  50. $result = '';//new StringBuilder(50);
  51. $byteSegments = [];
  52. $symbolSequence = -1;
  53. $parityData = -1;
  54. try {
  55. $currentCharacterSetECI = null;
  56. $fc1InEffect = false;
  57. $mode = '';
  58. do {
  59. // While still another segment to read...
  60. if ($bits->available() < 4) {
  61. // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
  62. $mode = Mode::$TERMINATOR;
  63. } else {
  64. $mode = Mode::forBits($bits->readBits(4)); // mode is encoded by 4 bits
  65. }
  66. if ($mode != Mode::$TERMINATOR) {
  67. if ($mode == Mode::$FNC1_FIRST_POSITION || $mode == Mode::$FNC1_SECOND_POSITION) {
  68. // We do little with FNC1 except alter the parsed result a bit according to the spec
  69. $fc1InEffect = true;
  70. } else if ($mode == Mode::$STRUCTURED_APPEND) {
  71. if ($bits->available() < 16) {
  72. throw FormatException::getFormatInstance();
  73. }
  74. // sequence number and parity is added later to the result metadata
  75. // Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue
  76. $symbolSequence = $bits->readBits(8);
  77. $parityData = $bits->readBits(8);
  78. } else if ($mode == Mode::$ECI) {
  79. // Count doesn't apply to ECI
  80. $value = self::parseECIValue($bits);
  81. $currentCharacterSetECI = CharacterSetECI::getCharacterSetECIByValue($value);
  82. if ($currentCharacterSetECI == null) {
  83. throw FormatException::getFormatInstance();
  84. }
  85. } else {
  86. // First handle Hanzi mode which does not start with character count
  87. if ($mode == Mode::$HANZI) {
  88. //chinese mode contains a sub set indicator right after mode indicator
  89. $subset = $bits->readBits(4);
  90. $countHanzi = $bits->readBits($mode->getCharacterCountBits($version));
  91. if ($subset == self::$GB2312_SUBSET) {
  92. self::decodeHanziSegment($bits, $result, $countHanzi);
  93. }
  94. } else {
  95. // "Normal" QR code modes:
  96. // How many characters will follow, encoded in this mode?
  97. $count = $bits->readBits($mode->getCharacterCountBits($version));
  98. if ($mode == Mode::$NUMERIC) {
  99. self::decodeNumericSegment($bits, $result, $count);
  100. } else if ($mode == Mode::$ALPHANUMERIC) {
  101. self::decodeAlphanumericSegment($bits, $result, $count, $fc1InEffect);
  102. } else if ($mode == Mode::$BYTE) {
  103. self::decodeByteSegment($bits, $result, $count, $currentCharacterSetECI, $byteSegments, $hints);
  104. } else if ($mode == Mode::$KANJI) {
  105. self::decodeKanjiSegment($bits, $result, $count);
  106. } else {
  107. throw FormatException::getFormatInstance();
  108. }
  109. }
  110. }
  111. }
  112. } while ($mode != Mode::$TERMINATOR);
  113. } catch (\InvalidArgumentException $iae) {
  114. // from readBits() calls
  115. throw FormatException::getFormatInstance();
  116. }
  117. return new DecoderResult($bytes,
  118. $result,
  119. empty($byteSegments) ? null : $byteSegments,
  120. $ecLevel == null ? null : 'L',//ErrorCorrectionLevel::toString($ecLevel),
  121. $symbolSequence,
  122. $parityData);
  123. }
  124. private static function parseECIValue($bits)
  125. {
  126. $firstByte = $bits->readBits(8);
  127. if (($firstByte & 0x80) == 0) {
  128. // just one byte
  129. return $firstByte & 0x7F;
  130. }
  131. if (($firstByte & 0xC0) == 0x80) {
  132. // two bytes
  133. $secondByte = $bits->readBits(8);
  134. return (($firstByte & 0x3F) << 8) | $secondByte;
  135. }
  136. if (($firstByte & 0xE0) == 0xC0) {
  137. // three bytes
  138. $secondThirdBytes = $bits->readBits(16);
  139. return (($firstByte & 0x1F) << 16) | $secondThirdBytes;
  140. }
  141. throw FormatException::getFormatInstance();
  142. }
  143. /**
  144. * See specification GBT 18284-2000
  145. */
  146. private static function decodeHanziSegment($bits,
  147. &$result,
  148. $count)
  149. {
  150. // Don't crash trying to read more bits than we have available.
  151. if ($count * 13 > $bits->available()) {
  152. throw FormatException::getFormatInstance();
  153. }
  154. // Each character will require 2 bytes. Read the characters as 2-byte pairs
  155. // and decode as GB2312 afterwards
  156. $buffer = fill_array(0, 2 * $count, 0);
  157. $offset = 0;
  158. while ($count > 0) {
  159. // Each 13 bits encodes a 2-byte character
  160. $twoBytes = $bits->readBits(13);
  161. $assembledTwoBytes = (($twoBytes / 0x060) << 8) | ($twoBytes % 0x060);
  162. if ($assembledTwoBytes < 0x003BF) {
  163. // In the 0xA1A1 to 0xAAFE range
  164. $assembledTwoBytes += 0x0A1A1;
  165. } else {
  166. // In the 0xB0A1 to 0xFAFE range
  167. $assembledTwoBytes += 0x0A6A1;
  168. }
  169. $buffer[$offset] = (($assembledTwoBytes >> 8) & 0xFF);//(byte)
  170. $buffer[$offset + 1] = ($assembledTwoBytes & 0xFF);//(byte)
  171. $offset += 2;
  172. $count--;
  173. }
  174. $result .= iconv('GB2312', 'UTF-8', implode($buffer));
  175. }
  176. private static function decodeNumericSegment($bits,
  177. &$result,
  178. $count)
  179. {
  180. // Read three digits at a time
  181. while ($count >= 3) {
  182. // Each 10 bits encodes three digits
  183. if ($bits->available() < 10) {
  184. throw FormatException::getFormatInstance();
  185. }
  186. $threeDigitsBits = $bits->readBits(10);
  187. if ($threeDigitsBits >= 1000) {
  188. throw FormatException::getFormatInstance();
  189. }
  190. $result .= (self::toAlphaNumericChar($threeDigitsBits / 100));
  191. $result .= (self::toAlphaNumericChar(($threeDigitsBits / 10) % 10));
  192. $result .= (self::toAlphaNumericChar($threeDigitsBits % 10));
  193. $count -= 3;
  194. }
  195. if ($count == 2) {
  196. // Two digits left over to read, encoded in 7 bits
  197. if ($bits->available() < 7) {
  198. throw FormatException::getFormatInstance();
  199. }
  200. $twoDigitsBits = $bits->readBits(7);
  201. if ($twoDigitsBits >= 100) {
  202. throw FormatException::getFormatInstance();
  203. }
  204. $result .= (self::toAlphaNumericChar($twoDigitsBits / 10));
  205. $result .= (self::toAlphaNumericChar($twoDigitsBits % 10));
  206. } else if ($count == 1) {
  207. // One digit left over to read
  208. if ($bits->available() < 4) {
  209. throw FormatException::getFormatInstance();
  210. }
  211. $digitBits = $bits->readBits(4);
  212. if ($digitBits >= 10) {
  213. throw FormatException::getFormatInstance();
  214. }
  215. $result .= (self::toAlphaNumericChar($digitBits));
  216. }
  217. }
  218. private static function toAlphaNumericChar($value)
  219. {
  220. if ($value >= count(self::$ALPHANUMERIC_CHARS)) {
  221. throw FormatException::getFormatInstance();
  222. }
  223. return self::$ALPHANUMERIC_CHARS[$value];
  224. }
  225. private static function decodeAlphanumericSegment($bits,
  226. &$result,
  227. $count,
  228. $fc1InEffect)
  229. {
  230. // Read two characters at a time
  231. $start = strlen($result);
  232. while ($count > 1) {
  233. if ($bits->available() < 11) {
  234. throw FormatException::getFormatInstance();
  235. }
  236. $nextTwoCharsBits = $bits->readBits(11);
  237. $result .= (self::toAlphaNumericChar($nextTwoCharsBits / 45));
  238. $result .= (self::toAlphaNumericChar($nextTwoCharsBits % 45));
  239. $count -= 2;
  240. }
  241. if ($count == 1) {
  242. // special case: one character left
  243. if ($bits->available() < 6) {
  244. throw FormatException::getFormatInstance();
  245. }
  246. $result .= self::toAlphaNumericChar($bits->readBits(6));
  247. }
  248. // See section 6.4.8.1, 6.4.8.2
  249. if ($fc1InEffect) {
  250. // We need to massage the result a bit if in an FNC1 mode:
  251. for ($i = $start; $i < strlen($result); $i++) {
  252. if ($result[$i] == '%') {
  253. if ($i < strlen($result) - 1 && $result[$i + 1] == '%') {
  254. // %% is rendered as %
  255. $result = substr_replace($result, '', $i + 1, 1);//deleteCharAt(i + 1);
  256. } else {
  257. // In alpha mode, % should be converted to FNC1 separator 0x1D
  258. $result . setCharAt($i, chr(0x1D));
  259. }
  260. }
  261. }
  262. }
  263. }
  264. private static function decodeByteSegment($bits,
  265. &$result,
  266. $count,
  267. $currentCharacterSetECI,
  268. &$byteSegments,
  269. $hints)
  270. {
  271. // Don't crash trying to read more bits than we have available.
  272. if (8 * $count > $bits->available()) {
  273. throw FormatException::getFormatInstance();
  274. }
  275. $readBytes = fill_array(0, $count, 0);
  276. for ($i = 0; $i < $count; $i++) {
  277. $readBytes[$i] = $bits->readBits(8);//(byte)
  278. }
  279. $text = implode(array_map('chr', $readBytes));
  280. $encoding = '';
  281. if ($currentCharacterSetECI == null) {
  282. // The spec isn't clear on this mode; see
  283. // section 6.4.5: t does not say which encoding to assuming
  284. // upon decoding. I have seen ISO-8859-1 used as well as
  285. // Shift_JIS -- without anything like an ECI designator to
  286. // give a hint.
  287. $encoding = mb_detect_encoding($text, $hints);
  288. } else {
  289. $encoding = $currentCharacterSetECI->name();
  290. }
  291. // $result.= mb_convert_encoding($text ,$encoding);//(new String(readBytes, encoding));
  292. $result .= $text;//(new String(readBytes, encoding));
  293. $byteSegments = array_merge($byteSegments, $readBytes);
  294. }
  295. private static function decodeKanjiSegment($bits,
  296. &$result,
  297. $count)
  298. {
  299. // Don't crash trying to read more bits than we have available.
  300. if ($count * 13 > $bits->available()) {
  301. throw FormatException::getFormatInstance();
  302. }
  303. // Each character will require 2 bytes. Read the characters as 2-byte pairs
  304. // and decode as Shift_JIS afterwards
  305. $buffer = [0, 2 * $count, 0];
  306. $offset = 0;
  307. while ($count > 0) {
  308. // Each 13 bits encodes a 2-byte character
  309. $twoBytes = $bits->readBits(13);
  310. $assembledTwoBytes = (($twoBytes / 0x0C0) << 8) | ($twoBytes % 0x0C0);
  311. if ($assembledTwoBytes < 0x01F00) {
  312. // In the 0x8140 to 0x9FFC range
  313. $assembledTwoBytes += 0x08140;
  314. } else {
  315. // In the 0xE040 to 0xEBBF range
  316. $assembledTwoBytes += 0x0C140;
  317. }
  318. $buffer[$offset] = ($assembledTwoBytes >> 8);//(byte)
  319. $buffer[$offset + 1] = $assembledTwoBytes; //(byte)
  320. $offset += 2;
  321. $count--;
  322. }
  323. // Shift_JIS may not be supported in some environments:
  324. $result .= iconv('shift-jis', 'utf-8', implode($buffer));
  325. }
  326. private function DecodedBitStreamParser()
  327. {
  328. }
  329. }