<?php /** * Zend Framework (http://framework.zend.com/) * * @link http://github.com/zendframework/zf2 for the canonical source repository * @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */ namespace Zend\Json; use stdClass; use Zend\Json\Exception\InvalidArgumentException; use Zend\Json\Exception\RuntimeException; /** * Decode JSON encoded string to PHP variable constructs */ class Decoder { /** * Parse tokens used to decode the JSON object. These are not * for public consumption, they are just used internally to the * class. */ const EOF = 0; const DATUM = 1; const LBRACE = 2; const LBRACKET = 3; const RBRACE = 4; const RBRACKET = 5; const COMMA = 6; const COLON = 7; /** * Use to maintain a "pointer" to the source being decoded * * @var string */ protected $source; /** * Caches the source length * * @var int */ protected $sourceLength; /** * The offset within the source being decoded * * @var int * */ protected $offset; /** * The current token being considered in the parser cycle * * @var int */ protected $token; /** * Flag indicating how objects should be decoded * * @var int * @access protected */ protected $decodeType; /** * @var $_tokenValue */ protected $tokenValue; /** * Constructor * * @param string $source String source to decode * @param int $decodeType How objects should be decoded -- see * {@link Zend\Json\Json::TYPE_ARRAY} and {@link Zend\Json\Json::TYPE_OBJECT} for * valid values * @throws InvalidArgumentException */ protected function __construct($source, $decodeType) { // Set defaults $this->source = self::decodeUnicodeString($source); $this->sourceLength = strlen($this->source); $this->token = self::EOF; $this->offset = 0; switch ($decodeType) { case Json::TYPE_ARRAY: case Json::TYPE_OBJECT: $this->decodeType = $decodeType; break; default: throw new InvalidArgumentException("Unknown decode type '{$decodeType}', please use one of the constants Json::TYPE_*"); } // Set pointer at first token $this->_getNextToken(); } /** * Decode a JSON source string * * Decodes a JSON encoded string. The value returned will be one of the * following: * - integer * - float * - boolean * - null * - stdClass * - array * - array of one or more of the above types * * By default, decoded objects will be returned as associative arrays; to * return a stdClass object instead, pass {@link Zend\Json\Json::TYPE_OBJECT} to * the $objectDecodeType parameter. * * @static * @access public * @param string $source String to be decoded * @param int $objectDecodeType How objects should be decoded; should be * either or {@link Zend\Json\Json::TYPE_ARRAY} or * {@link Zend\Json\Json::TYPE_OBJECT}; defaults to TYPE_ARRAY * @return mixed */ public static function decode($source, $objectDecodeType = Json::TYPE_OBJECT) { $decoder = new static($source, $objectDecodeType); return $decoder->_decodeValue(); } /** * Recursive driving routine for supported toplevel tops * * @return mixed */ protected function _decodeValue() { switch ($this->token) { case self::DATUM: $result = $this->tokenValue; $this->_getNextToken(); return($result); break; case self::LBRACE: return($this->_decodeObject()); break; case self::LBRACKET: return($this->_decodeArray()); break; default: return null; break; } } /** * Decodes an object of the form: * { "attribute: value, "attribute2" : value,...} * * If Zend\Json\Encoder was used to encode the original object then * a special attribute called __className which specifies a class * name that should wrap the data contained within the encoded source. * * Decodes to either an array or stdClass object, based on the value of * {@link $decodeType}. If invalid $decodeType present, returns as an * array. * * @return array|stdClass * @throws RuntimeException */ protected function _decodeObject() { $members = array(); $tok = $this->_getNextToken(); while ($tok && $tok != self::RBRACE) { if ($tok != self::DATUM || ! is_string($this->tokenValue)) { throw new RuntimeException('Missing key in object encoding: ' . $this->source); } $key = $this->tokenValue; $tok = $this->_getNextToken(); if ($tok != self::COLON) { throw new RuntimeException('Missing ":" in object encoding: ' . $this->source); } $tok = $this->_getNextToken(); $members[$key] = $this->_decodeValue(); $tok = $this->token; if ($tok == self::RBRACE) { break; } if ($tok != self::COMMA) { throw new RuntimeException('Missing "," in object encoding: ' . $this->source); } $tok = $this->_getNextToken(); } switch ($this->decodeType) { case Json::TYPE_OBJECT: // Create new stdClass and populate with $members $result = new stdClass(); foreach ($members as $key => $value) { if ($key === '') { $key = '_empty_'; } $result->$key = $value; } break; case Json::TYPE_ARRAY: default: $result = $members; break; } $this->_getNextToken(); return $result; } /** * Decodes a JSON array format: * [element, element2,...,elementN] * * @return array * @throws RuntimeException */ protected function _decodeArray() { $result = array(); $tok = $this->_getNextToken(); // Move past the '[' $index = 0; while ($tok && $tok != self::RBRACKET) { $result[$index++] = $this->_decodeValue(); $tok = $this->token; if ($tok == self::RBRACKET || !$tok) { break; } if ($tok != self::COMMA) { throw new RuntimeException('Missing "," in array encoding: ' . $this->source); } $tok = $this->_getNextToken(); } $this->_getNextToken(); return $result; } /** * Removes whitespace characters from the source input */ protected function _eatWhitespace() { if (preg_match( '/([\t\b\f\n\r ])*/s', $this->source, $matches, PREG_OFFSET_CAPTURE, $this->offset) && $matches[0][1] == $this->offset) { $this->offset += strlen($matches[0][0]); } } /** * Retrieves the next token from the source stream * * @return int Token constant value specified in class definition * @throws RuntimeException */ protected function _getNextToken() { $this->token = self::EOF; $this->tokenValue = null; $this->_eatWhitespace(); if ($this->offset >= $this->sourceLength) { return(self::EOF); } $str = $this->source; $strLength = $this->sourceLength; $i = $this->offset; $start = $i; switch ($str{$i}) { case '{': $this->token = self::LBRACE; break; case '}': $this->token = self::RBRACE; break; case '[': $this->token = self::LBRACKET; break; case ']': $this->token = self::RBRACKET; break; case ',': $this->token = self::COMMA; break; case ':': $this->token = self::COLON; break; case '"': $result = ''; do { $i++; if ($i >= $strLength) { break; } $chr = $str{$i}; if ($chr == '\\') { $i++; if ($i >= $strLength) { break; } $chr = $str{$i}; switch ($chr) { case '"' : $result .= '"'; break; case '\\': $result .= '\\'; break; case '/' : $result .= '/'; break; case 'b' : $result .= "\x08"; break; case 'f' : $result .= "\x0c"; break; case 'n' : $result .= "\x0a"; break; case 'r' : $result .= "\x0d"; break; case 't' : $result .= "\x09"; break; case '\'' : $result .= '\''; break; default: throw new RuntimeException("Illegal escape sequence '{$chr}'"); } } elseif ($chr == '"') { break; } else { $result .= $chr; } } while ($i < $strLength); $this->token = self::DATUM; //$this->tokenValue = substr($str, $start + 1, $i - $start - 1); $this->tokenValue = $result; break; case 't': if (($i+ 3) < $strLength && substr($str, $start, 4) == "true") { $this->token = self::DATUM; } $this->tokenValue = true; $i += 3; break; case 'f': if (($i+ 4) < $strLength && substr($str, $start, 5) == "false") { $this->token = self::DATUM; } $this->tokenValue = false; $i += 4; break; case 'n': if (($i+ 3) < $strLength && substr($str, $start, 4) == "null") { $this->token = self::DATUM; } $this->tokenValue = NULL; $i += 3; break; } if ($this->token != self::EOF) { $this->offset = $i + 1; // Consume the last token character return($this->token); } $chr = $str{$i}; if ($chr == '-' || $chr == '.' || ($chr >= '0' && $chr <= '9')) { if (preg_match('/-?([0-9])*(\.[0-9]*)?((e|E)((-|\+)?)[0-9]+)?/s', $str, $matches, PREG_OFFSET_CAPTURE, $start) && $matches[0][1] == $start) { $datum = $matches[0][0]; if (is_numeric($datum)) { if (preg_match('/^0\d+$/', $datum)) { throw new RuntimeException("Octal notation not supported by JSON (value: {$datum})"); } else { $val = intval($datum); $fVal = floatval($datum); $this->tokenValue = ($val == $fVal ? $val : $fVal); } } else { throw new RuntimeException("Illegal number format: {$datum}"); } $this->token = self::DATUM; $this->offset = $start + strlen($datum); } } else { throw new RuntimeException('Illegal Token'); } return $this->token; } /** * Decode Unicode Characters from \u0000 ASCII syntax. * * This algorithm was originally developed for the * Solar Framework by Paul M. Jones * * @link http://solarphp.com/ * @link https://github.com/solarphp/core/blob/master/Solar/Json.php * @param string $chrs * @return string */ public static function decodeUnicodeString($chrs) { $chrs = (string) $chrs; $utf8 = ''; $strlenChrs = strlen($chrs); for ($i = 0; $i < $strlenChrs; $i++) { $ordChrsC = ord($chrs[$i]); switch (true) { case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $i, 6)): // single, escaped unicode character $utf16 = chr(hexdec(substr($chrs, ($i + 2), 2))) . chr(hexdec(substr($chrs, ($i + 4), 2))); $utf8char = self::_utf162utf8($utf16); $search = array('\\', "\n", "\t", "\r", chr(0x08), chr(0x0C), '"', '\'', '/'); if (in_array($utf8char, $search)) { $replace = array('\\\\', '\\n', '\\t', '\\r', '\\b', '\\f', '\\"', '\\\'', '\\/'); $utf8char = str_replace($search, $replace, $utf8char); } $utf8 .= $utf8char; $i += 5; break; case ($ordChrsC >= 0x20) && ($ordChrsC <= 0x7F): $utf8 .= $chrs{$i}; break; case ($ordChrsC & 0xE0) == 0xC0: // characters U-00000080 - U-000007FF, mask 110XXXXX //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $utf8 .= substr($chrs, $i, 2); ++$i; break; case ($ordChrsC & 0xF0) == 0xE0: // characters U-00000800 - U-0000FFFF, mask 1110XXXX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $utf8 .= substr($chrs, $i, 3); $i += 2; break; case ($ordChrsC & 0xF8) == 0xF0: // characters U-00010000 - U-001FFFFF, mask 11110XXX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $utf8 .= substr($chrs, $i, 4); $i += 3; break; case ($ordChrsC & 0xFC) == 0xF8: // characters U-00200000 - U-03FFFFFF, mask 111110XX // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $utf8 .= substr($chrs, $i, 5); $i += 4; break; case ($ordChrsC & 0xFE) == 0xFC: // characters U-04000000 - U-7FFFFFFF, mask 1111110X // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 $utf8 .= substr($chrs, $i, 6); $i += 5; break; } } return $utf8; } /** * Convert a string from one UTF-16 char to one UTF-8 char. * * Normally should be handled by mb_convert_encoding, but * provides a slower PHP-only method for installations * that lack the multibyte string extension. * * This method is from the Solar Framework by Paul M. Jones * * @link http://solarphp.com * @param string $utf16 UTF-16 character * @return string UTF-8 character */ protected static function _utf162utf8($utf16) { // Check for mb extension otherwise do by hand. if (function_exists('mb_convert_encoding')) { return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16'); } $bytes = (ord($utf16{0}) << 8) | ord($utf16{1}); switch (true) { case ((0x7F & $bytes) == $bytes): // this case should never be reached, because we are in ASCII range // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 return chr(0x7F & $bytes); case (0x07FF & $bytes) == $bytes: // return a 2-byte UTF-8 character // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 return chr(0xC0 | (($bytes >> 6) & 0x1F)) . chr(0x80 | ($bytes & 0x3F)); case (0xFFFF & $bytes) == $bytes: // return a 3-byte UTF-8 character // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 return chr(0xE0 | (($bytes >> 12) & 0x0F)) . chr(0x80 | (($bytes >> 6) & 0x3F)) . chr(0x80 | ($bytes & 0x3F)); } // ignoring UTF-32 for now, sorry return ''; } }