123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355 |
- <?php
- /*
- * Copyright 2007 ZXing authors
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- namespace Zxing\Qrcode\Decoder;
- use Zxing\DecodeHintType;
- use Zxing\FormatException;
- use Zxing\Common\BitSource;
- use Zxing\Common\CharacterSetECI;
- use Zxing\Common\DecoderResult;
- use Zxing\Common\StringUtils;
- /**
- * <p>QR Codes can encode text as bits in one of several modes, and can use multiple modes
- * in one QR Code. This class decodes the bits back into text.</p>
- *
- * <p>See ISO 18004:2006, 6.4.3 - 6.4.7</p>
- *
- * @author Sean Owen
- */
- final class DecodedBitStreamParser
- {
- /**
- * See ISO 18004:2006, 6.4.4 Table 5
- */
- private static $ALPHANUMERIC_CHARS = [
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
- 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
- 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
- ' ', '$', '%', '*', '+', '-', '.', '/', ':',
- ];
- private static $GB2312_SUBSET = 1;
- public static function decode($bytes,
- $version,
- $ecLevel,
- $hints)
- {
- $bits = new BitSource($bytes);
- $result = '';//new StringBuilder(50);
- $byteSegments = [];
- $symbolSequence = -1;
- $parityData = -1;
- try {
- $currentCharacterSetECI = null;
- $fc1InEffect = false;
- $mode = '';
- do {
- // While still another segment to read...
- if ($bits->available() < 4) {
- // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
- $mode = Mode::$TERMINATOR;
- } else {
- $mode = Mode::forBits($bits->readBits(4)); // mode is encoded by 4 bits
- }
- if ($mode != Mode::$TERMINATOR) {
- if ($mode == Mode::$FNC1_FIRST_POSITION || $mode == Mode::$FNC1_SECOND_POSITION) {
- // We do little with FNC1 except alter the parsed result a bit according to the spec
- $fc1InEffect = true;
- } else if ($mode == Mode::$STRUCTURED_APPEND) {
- if ($bits->available() < 16) {
- throw FormatException::getFormatInstance();
- }
- // sequence number and parity is added later to the result metadata
- // Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue
- $symbolSequence = $bits->readBits(8);
- $parityData = $bits->readBits(8);
- } else if ($mode == Mode::$ECI) {
- // Count doesn't apply to ECI
- $value = self::parseECIValue($bits);
- $currentCharacterSetECI = CharacterSetECI::getCharacterSetECIByValue($value);
- if ($currentCharacterSetECI == null) {
- throw FormatException::getFormatInstance();
- }
- } else {
- // First handle Hanzi mode which does not start with character count
- if ($mode == Mode::$HANZI) {
- //chinese mode contains a sub set indicator right after mode indicator
- $subset = $bits->readBits(4);
- $countHanzi = $bits->readBits($mode->getCharacterCountBits($version));
- if ($subset == self::$GB2312_SUBSET) {
- self::decodeHanziSegment($bits, $result, $countHanzi);
- }
- } else {
- // "Normal" QR code modes:
- // How many characters will follow, encoded in this mode?
- $count = $bits->readBits($mode->getCharacterCountBits($version));
- if ($mode == Mode::$NUMERIC) {
- self::decodeNumericSegment($bits, $result, $count);
- } else if ($mode == Mode::$ALPHANUMERIC) {
- self::decodeAlphanumericSegment($bits, $result, $count, $fc1InEffect);
- } else if ($mode == Mode::$BYTE) {
- self::decodeByteSegment($bits, $result, $count, $currentCharacterSetECI, $byteSegments, $hints);
- } else if ($mode == Mode::$KANJI) {
- self::decodeKanjiSegment($bits, $result, $count);
- } else {
- throw FormatException::getFormatInstance();
- }
- }
- }
- }
- } while ($mode != Mode::$TERMINATOR);
- } catch (\InvalidArgumentException $iae) {
- // from readBits() calls
- throw FormatException::getFormatInstance();
- }
- return new DecoderResult($bytes,
- $result,
- empty($byteSegments) ? null : $byteSegments,
- $ecLevel == null ? null : 'L',//ErrorCorrectionLevel::toString($ecLevel),
- $symbolSequence,
- $parityData);
- }
- private static function parseECIValue($bits)
- {
- $firstByte = $bits->readBits(8);
- if (($firstByte & 0x80) == 0) {
- // just one byte
- return $firstByte & 0x7F;
- }
- if (($firstByte & 0xC0) == 0x80) {
- // two bytes
- $secondByte = $bits->readBits(8);
- return (($firstByte & 0x3F) << 8) | $secondByte;
- }
- if (($firstByte & 0xE0) == 0xC0) {
- // three bytes
- $secondThirdBytes = $bits->readBits(16);
- return (($firstByte & 0x1F) << 16) | $secondThirdBytes;
- }
- throw FormatException::getFormatInstance();
- }
- /**
- * See specification GBT 18284-2000
- */
- private static function decodeHanziSegment($bits,
- &$result,
- $count)
- {
- // Don't crash trying to read more bits than we have available.
- if ($count * 13 > $bits->available()) {
- throw FormatException::getFormatInstance();
- }
- // Each character will require 2 bytes. Read the characters as 2-byte pairs
- // and decode as GB2312 afterwards
- $buffer = fill_array(0, 2 * $count, 0);
- $offset = 0;
- while ($count > 0) {
- // Each 13 bits encodes a 2-byte character
- $twoBytes = $bits->readBits(13);
- $assembledTwoBytes = (($twoBytes / 0x060) << 8) | ($twoBytes % 0x060);
- if ($assembledTwoBytes < 0x003BF) {
- // In the 0xA1A1 to 0xAAFE range
- $assembledTwoBytes += 0x0A1A1;
- } else {
- // In the 0xB0A1 to 0xFAFE range
- $assembledTwoBytes += 0x0A6A1;
- }
- $buffer[$offset] = (($assembledTwoBytes >> 8) & 0xFF);//(byte)
- $buffer[$offset + 1] = ($assembledTwoBytes & 0xFF);//(byte)
- $offset += 2;
- $count--;
- }
- $result .= iconv('GB2312', 'UTF-8', implode($buffer));
- }
- private static function decodeNumericSegment($bits,
- &$result,
- $count)
- {
- // Read three digits at a time
- while ($count >= 3) {
- // Each 10 bits encodes three digits
- if ($bits->available() < 10) {
- throw FormatException::getFormatInstance();
- }
- $threeDigitsBits = $bits->readBits(10);
- if ($threeDigitsBits >= 1000) {
- throw FormatException::getFormatInstance();
- }
- $result .= (self::toAlphaNumericChar($threeDigitsBits / 100));
- $result .= (self::toAlphaNumericChar(($threeDigitsBits / 10) % 10));
- $result .= (self::toAlphaNumericChar($threeDigitsBits % 10));
- $count -= 3;
- }
- if ($count == 2) {
- // Two digits left over to read, encoded in 7 bits
- if ($bits->available() < 7) {
- throw FormatException::getFormatInstance();
- }
- $twoDigitsBits = $bits->readBits(7);
- if ($twoDigitsBits >= 100) {
- throw FormatException::getFormatInstance();
- }
- $result .= (self::toAlphaNumericChar($twoDigitsBits / 10));
- $result .= (self::toAlphaNumericChar($twoDigitsBits % 10));
- } else if ($count == 1) {
- // One digit left over to read
- if ($bits->available() < 4) {
- throw FormatException::getFormatInstance();
- }
- $digitBits = $bits->readBits(4);
- if ($digitBits >= 10) {
- throw FormatException::getFormatInstance();
- }
- $result .= (self::toAlphaNumericChar($digitBits));
- }
- }
- private static function toAlphaNumericChar($value)
- {
- if ($value >= count(self::$ALPHANUMERIC_CHARS)) {
- throw FormatException::getFormatInstance();
- }
- return self::$ALPHANUMERIC_CHARS[$value];
- }
- private static function decodeAlphanumericSegment($bits,
- &$result,
- $count,
- $fc1InEffect)
- {
- // Read two characters at a time
- $start = strlen($result);
- while ($count > 1) {
- if ($bits->available() < 11) {
- throw FormatException::getFormatInstance();
- }
- $nextTwoCharsBits = $bits->readBits(11);
- $result .= (self::toAlphaNumericChar($nextTwoCharsBits / 45));
- $result .= (self::toAlphaNumericChar($nextTwoCharsBits % 45));
- $count -= 2;
- }
- if ($count == 1) {
- // special case: one character left
- if ($bits->available() < 6) {
- throw FormatException::getFormatInstance();
- }
- $result .= self::toAlphaNumericChar($bits->readBits(6));
- }
- // See section 6.4.8.1, 6.4.8.2
- if ($fc1InEffect) {
- // We need to massage the result a bit if in an FNC1 mode:
- for ($i = $start; $i < strlen($result); $i++) {
- if ($result[$i] == '%') {
- if ($i < strlen($result) - 1 && $result[$i + 1] == '%') {
- // %% is rendered as %
- $result = substr_replace($result, '', $i + 1, 1);//deleteCharAt(i + 1);
- } else {
- // In alpha mode, % should be converted to FNC1 separator 0x1D
- $result . setCharAt($i, chr(0x1D));
- }
- }
- }
- }
- }
- private static function decodeByteSegment($bits,
- &$result,
- $count,
- $currentCharacterSetECI,
- &$byteSegments,
- $hints)
- {
- // Don't crash trying to read more bits than we have available.
- if (8 * $count > $bits->available()) {
- throw FormatException::getFormatInstance();
- }
- $readBytes = fill_array(0, $count, 0);
- for ($i = 0; $i < $count; $i++) {
- $readBytes[$i] = $bits->readBits(8);//(byte)
- }
- $text = implode(array_map('chr', $readBytes));
- $encoding = '';
- if ($currentCharacterSetECI == null) {
- // The spec isn't clear on this mode; see
- // section 6.4.5: t does not say which encoding to assuming
- // upon decoding. I have seen ISO-8859-1 used as well as
- // Shift_JIS -- without anything like an ECI designator to
- // give a hint.
- $encoding = mb_detect_encoding($text, $hints);
- } else {
- $encoding = $currentCharacterSetECI->name();
- }
- // $result.= mb_convert_encoding($text ,$encoding);//(new String(readBytes, encoding));
- $result .= $text;//(new String(readBytes, encoding));
- $byteSegments = array_merge($byteSegments, $readBytes);
- }
- private static function decodeKanjiSegment($bits,
- &$result,
- $count)
- {
- // Don't crash trying to read more bits than we have available.
- if ($count * 13 > $bits->available()) {
- throw FormatException::getFormatInstance();
- }
- // Each character will require 2 bytes. Read the characters as 2-byte pairs
- // and decode as Shift_JIS afterwards
- $buffer = [0, 2 * $count, 0];
- $offset = 0;
- while ($count > 0) {
- // Each 13 bits encodes a 2-byte character
- $twoBytes = $bits->readBits(13);
- $assembledTwoBytes = (($twoBytes / 0x0C0) << 8) | ($twoBytes % 0x0C0);
- if ($assembledTwoBytes < 0x01F00) {
- // In the 0x8140 to 0x9FFC range
- $assembledTwoBytes += 0x08140;
- } else {
- // In the 0xE040 to 0xEBBF range
- $assembledTwoBytes += 0x0C140;
- }
- $buffer[$offset] = ($assembledTwoBytes >> 8);//(byte)
- $buffer[$offset + 1] = $assembledTwoBytes; //(byte)
- $offset += 2;
- $count--;
- }
- // Shift_JIS may not be supported in some environments:
- $result .= iconv('shift-jis', 'utf-8', implode($buffer));
- }
- private function DecodedBitStreamParser()
- {
- }
- }
|