DecodedBitStreamParser.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. <?php
  2. /*
  3. * Copyright 2007 ZXing authors
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. namespace Zxing\Qrcode\Decoder;
  18. use Zxing\Common\BitSource;
  19. use Zxing\Common\CharacterSetECI;
  20. use Zxing\Common\DecoderResult;
  21. use Zxing\FormatException;
  22. /**
  23. * <p>QR Codes can encode text as bits in one of several modes, and can use multiple modes
  24. * in one QR Code. This class decodes the bits back into text.</p>
  25. *
  26. * <p>See ISO 18004:2006, 6.4.3 - 6.4.7</p>
  27. *
  28. * @author Sean Owen
  29. */
  30. final class DecodedBitStreamParser
  31. {
  32. /**
  33. * See ISO 18004:2006, 6.4.4 Table 5
  34. */
  35. private static array $ALPHANUMERIC_CHARS = [
  36. '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
  37. 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  38. 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  39. ' ', '$', '%', '*', '+', '-', '.', '/', ':',
  40. ];
  41. private static int $GB2312_SUBSET = 1;
  42. public static function decode(
  43. $bytes,
  44. $version,
  45. $ecLevel,
  46. $hints
  47. ): \Zxing\Common\DecoderResult
  48. {
  49. $bits = new BitSource($bytes);
  50. $result = '';//new StringBuilder(50);
  51. $byteSegments = [];
  52. $symbolSequence = -1;
  53. $parityData = -1;
  54. try {
  55. $currentCharacterSetECI = null;
  56. $fc1InEffect = false;
  57. $mode = '';
  58. do {
  59. // While still another segment to read...
  60. if ($bits->available() < 4) {
  61. // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
  62. $mode = Mode::$TERMINATOR;
  63. } else {
  64. $mode = Mode::forBits($bits->readBits(4)); // mode is encoded by 4 bits
  65. }
  66. if ($mode != Mode::$TERMINATOR) {
  67. if ($mode == Mode::$FNC1_FIRST_POSITION || $mode == Mode::$FNC1_SECOND_POSITION) {
  68. // We do little with FNC1 except alter the parsed result a bit according to the spec
  69. $fc1InEffect = true;
  70. } elseif ($mode == Mode::$STRUCTURED_APPEND) {
  71. if ($bits->available() < 16) {
  72. throw FormatException::getFormatInstance();
  73. }
  74. // sequence number and parity is added later to the result metadata
  75. // Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue
  76. $symbolSequence = $bits->readBits(8);
  77. $parityData = $bits->readBits(8);
  78. } elseif ($mode == Mode::$ECI) {
  79. // Count doesn't apply to ECI
  80. $value = self::parseECIValue($bits);
  81. $currentCharacterSetECI = CharacterSetECI::getCharacterSetECIByValue($value);
  82. if ($currentCharacterSetECI == null) {
  83. throw FormatException::getFormatInstance();
  84. }
  85. } else {
  86. // First handle Hanzi mode which does not start with character count
  87. if ($mode == Mode::$HANZI) {
  88. //chinese mode contains a sub set indicator right after mode indicator
  89. $subset = $bits->readBits(4);
  90. $countHanzi = $bits->readBits($mode->getCharacterCountBits($version));
  91. if ($subset == self::$GB2312_SUBSET) {
  92. self::decodeHanziSegment($bits, $result, $countHanzi);
  93. }
  94. } else {
  95. // "Normal" QR code modes:
  96. // How many characters will follow, encoded in this mode?
  97. $count = $bits->readBits($mode->getCharacterCountBits($version));
  98. if ($mode == Mode::$NUMERIC) {
  99. self::decodeNumericSegment($bits, $result, $count);
  100. } elseif ($mode == Mode::$ALPHANUMERIC) {
  101. self::decodeAlphanumericSegment($bits, $result, $count, $fc1InEffect);
  102. } elseif ($mode == Mode::$BYTE) {
  103. self::decodeByteSegment($bits, $result, $count, $currentCharacterSetECI, $byteSegments, $hints);
  104. } elseif ($mode == Mode::$KANJI) {
  105. self::decodeKanjiSegment($bits, $result, $count);
  106. } else {
  107. throw FormatException::getFormatInstance();
  108. }
  109. }
  110. }
  111. }
  112. } while ($mode != Mode::$TERMINATOR);
  113. } catch (\InvalidArgumentException) {
  114. // from readBits() calls
  115. throw FormatException::getFormatInstance();
  116. }
  117. return new DecoderResult(
  118. $bytes,
  119. $result,
  120. empty($byteSegments) ? null : $byteSegments,
  121. $ecLevel == null ? null : 'L',//ErrorCorrectionLevel::toString($ecLevel),
  122. $symbolSequence,
  123. $parityData
  124. );
  125. }
  126. private static function parseECIValue($bits)
  127. {
  128. $firstByte = $bits->readBits(8);
  129. if (($firstByte & 0x80) == 0) {
  130. // just one byte
  131. return $firstByte & 0x7F;
  132. }
  133. if (($firstByte & 0xC0) == 0x80) {
  134. // two bytes
  135. $secondByte = $bits->readBits(8);
  136. return (($firstByte & 0x3F) << 8) | $secondByte;
  137. }
  138. if (($firstByte & 0xE0) == 0xC0) {
  139. // three bytes
  140. $secondThirdBytes = $bits->readBits(16);
  141. return (($firstByte & 0x1F) << 16) | $secondThirdBytes;
  142. }
  143. throw FormatException::getFormatInstance();
  144. }
  145. /**
  146. * See specification GBT 18284-2000
  147. */
  148. private static function decodeHanziSegment(
  149. $bits,
  150. &$result,
  151. $count
  152. )
  153. {
  154. // Don't crash trying to read more bits than we have available.
  155. if ($count * 13 > $bits->available()) {
  156. throw FormatException::getFormatInstance();
  157. }
  158. // Each character will require 2 bytes. Read the characters as 2-byte pairs
  159. // and decode as GB2312 afterwards
  160. $buffer = fill_array(0, 2 * $count, 0);
  161. $offset = 0;
  162. while ($count > 0) {
  163. // Each 13 bits encodes a 2-byte character
  164. $twoBytes = $bits->readBits(13);
  165. $assembledTwoBytes = (($twoBytes / 0x060) << 8) | ($twoBytes % 0x060);
  166. if ($assembledTwoBytes < 0x003BF) {
  167. // In the 0xA1A1 to 0xAAFE range
  168. $assembledTwoBytes += 0x0A1A1;
  169. } else {
  170. // In the 0xB0A1 to 0xFAFE range
  171. $assembledTwoBytes += 0x0A6A1;
  172. }
  173. $buffer[$offset] = (($assembledTwoBytes >> 8) & 0xFF);//(byte)
  174. $buffer[$offset + 1] = ($assembledTwoBytes & 0xFF);//(byte)
  175. $offset += 2;
  176. $count--;
  177. }
  178. $result .= iconv('GB2312', 'UTF-8', implode($buffer));
  179. }
  180. private static function decodeNumericSegment(
  181. $bits,
  182. &$result,
  183. $count
  184. )
  185. {
  186. // Read three digits at a time
  187. while ($count >= 3) {
  188. // Each 10 bits encodes three digits
  189. if ($bits->available() < 10) {
  190. throw FormatException::getFormatInstance();
  191. }
  192. $threeDigitsBits = $bits->readBits(10);
  193. if ($threeDigitsBits >= 1000) {
  194. throw FormatException::getFormatInstance();
  195. }
  196. $result .= (self::toAlphaNumericChar($threeDigitsBits / 100));
  197. $result .= (self::toAlphaNumericChar(($threeDigitsBits / 10) % 10));
  198. $result .= (self::toAlphaNumericChar($threeDigitsBits % 10));
  199. $count -= 3;
  200. }
  201. if ($count == 2) {
  202. // Two digits left over to read, encoded in 7 bits
  203. if ($bits->available() < 7) {
  204. throw FormatException::getFormatInstance();
  205. }
  206. $twoDigitsBits = $bits->readBits(7);
  207. if ($twoDigitsBits >= 100) {
  208. throw FormatException::getFormatInstance();
  209. }
  210. $result .= (self::toAlphaNumericChar($twoDigitsBits / 10));
  211. $result .= (self::toAlphaNumericChar($twoDigitsBits % 10));
  212. } elseif ($count == 1) {
  213. // One digit left over to read
  214. if ($bits->available() < 4) {
  215. throw FormatException::getFormatInstance();
  216. }
  217. $digitBits = $bits->readBits(4);
  218. if ($digitBits >= 10) {
  219. throw FormatException::getFormatInstance();
  220. }
  221. $result .= (self::toAlphaNumericChar($digitBits));
  222. }
  223. }
  224. private static function toAlphaNumericChar($value)
  225. {
  226. if ($value >= count(self::$ALPHANUMERIC_CHARS)) {
  227. throw FormatException::getFormatInstance();
  228. }
  229. return self::$ALPHANUMERIC_CHARS[$value];
  230. }
  231. private static function decodeAlphanumericSegment(
  232. $bits,
  233. &$result,
  234. $count,
  235. $fc1InEffect
  236. )
  237. {
  238. // Read two characters at a time
  239. $start = strlen((string) $result);
  240. while ($count > 1) {
  241. if ($bits->available() < 11) {
  242. throw FormatException::getFormatInstance();
  243. }
  244. $nextTwoCharsBits = $bits->readBits(11);
  245. $result .= (self::toAlphaNumericChar($nextTwoCharsBits / 45));
  246. $result .= (self::toAlphaNumericChar($nextTwoCharsBits % 45));
  247. $count -= 2;
  248. }
  249. if ($count == 1) {
  250. // special case: one character left
  251. if ($bits->available() < 6) {
  252. throw FormatException::getFormatInstance();
  253. }
  254. $result .= self::toAlphaNumericChar($bits->readBits(6));
  255. }
  256. // See section 6.4.8.1, 6.4.8.2
  257. if ($fc1InEffect) {
  258. // We need to massage the result a bit if in an FNC1 mode:
  259. for ($i = $start; $i < strlen((string) $result); $i++) {
  260. if ($result[$i] == '%') {
  261. if ($i < strlen((string) $result) - 1 && $result[$i + 1] == '%') {
  262. // %% is rendered as %
  263. $result = substr_replace($result, '', $i + 1, 1);//deleteCharAt(i + 1);
  264. } else {
  265. // In alpha mode, % should be converted to FNC1 separator 0x1D
  266. $result . setCharAt($i, chr(0x1D));
  267. }
  268. }
  269. }
  270. }
  271. }
  272. private static function decodeByteSegment(
  273. $bits,
  274. &$result,
  275. $count,
  276. $currentCharacterSetECI,
  277. &$byteSegments,
  278. $hints
  279. )
  280. {
  281. // Don't crash trying to read more bits than we have available.
  282. if (8 * $count > $bits->available()) {
  283. throw FormatException::getFormatInstance();
  284. }
  285. $readBytes = fill_array(0, $count, 0);
  286. for ($i = 0; $i < $count; $i++) {
  287. $readBytes[$i] = $bits->readBits(8);//(byte)
  288. }
  289. $text = implode(array_map('chr', $readBytes));
  290. $encoding = '';
  291. if ($currentCharacterSetECI == null) {
  292. // The spec isn't clear on this mode; see
  293. // section 6.4.5: t does not say which encoding to assuming
  294. // upon decoding. I have seen ISO-8859-1 used as well as
  295. // Shift_JIS -- without anything like an ECI designator to
  296. // give a hint.
  297. $encoding = mb_detect_encoding($text, $hints);
  298. } else {
  299. $encoding = $currentCharacterSetECI->name();
  300. }
  301. // $result.= mb_convert_encoding($text ,$encoding);//(new String(readBytes, encoding));
  302. $result .= $text;//(new String(readBytes, encoding));
  303. $byteSegments = array_merge($byteSegments, $readBytes);
  304. }
  305. private static function decodeKanjiSegment(
  306. $bits,
  307. &$result,
  308. $count
  309. )
  310. {
  311. // Don't crash trying to read more bits than we have available.
  312. if ($count * 13 > $bits->available()) {
  313. throw FormatException::getFormatInstance();
  314. }
  315. // Each character will require 2 bytes. Read the characters as 2-byte pairs
  316. // and decode as Shift_JIS afterwards
  317. $buffer = [0, 2 * $count, 0];
  318. $offset = 0;
  319. while ($count > 0) {
  320. // Each 13 bits encodes a 2-byte character
  321. $twoBytes = $bits->readBits(13);
  322. $assembledTwoBytes = (($twoBytes / 0x0C0) << 8) | ($twoBytes % 0x0C0);
  323. if ($assembledTwoBytes < 0x01F00) {
  324. // In the 0x8140 to 0x9FFC range
  325. $assembledTwoBytes += 0x08140;
  326. } else {
  327. // In the 0xE040 to 0xEBBF range
  328. $assembledTwoBytes += 0x0C140;
  329. }
  330. $buffer[$offset] = ($assembledTwoBytes >> 8);//(byte)
  331. $buffer[$offset + 1] = $assembledTwoBytes; //(byte)
  332. $offset += 2;
  333. $count--;
  334. }
  335. // Shift_JIS may not be supported in some environments:
  336. $result .= iconv('shift-jis', 'utf-8', implode($buffer));
  337. }
  338. private function DecodedBitStreamParser(): void
  339. {
  340. }
  341. }