<?php
/**
* Zend Framework (http://framework.zend.com/)
*
* @link http://github.com/zendframework/zf2 for the canonical source repository
* @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
namespace Zend\Dom;
use DOMDocument;
use DOMXPath;
use Zend\Stdlib\ErrorHandler;
/**
* Query DOM structures based on CSS selectors and/or XPath
*/
class Query
{
/**#@+
* Document types
*/
const DOC_XML = 'docXml';
const DOC_HTML = 'docHtml';
const DOC_XHTML = 'docXhtml';
/**#@-*/
/**
* @var string
*/
protected $document;
/**
* DOMDocument errors, if any
* @var false|array
*/
protected $documentErrors = false;
/**
* Document type
* @var string
*/
protected $docType;
/**
* Document encoding
* @var null|string
*/
protected $encoding;
/**
* XPath namespaces
* @var array
*/
protected $xpathNamespaces = array();
/**
* XPath PHP Functions
* @var mixed
*/
protected $xpathPhpFunctions;
/**
* Constructor
*
* @param null|string $document
* @param null|string $encoding
*/
public function __construct($document = null, $encoding = null)
{
$this->setEncoding($encoding);
$this->setDocument($document);
}
/**
* Set document encoding
*
* @param string $encoding
* @return Query
*/
public function setEncoding($encoding)
{
$this->encoding = (null === $encoding) ? null : (string) $encoding;
return $this;
}
/**
* Get document encoding
*
* @return null|string
*/
public function getEncoding()
{
return $this->encoding;
}
/**
* Set document to query
*
* @param string $document
* @param null|string $encoding Document encoding
* @return Query
*/
public function setDocument($document, $encoding = null)
{
if (0 === strlen($document)) {
return $this;
}
// breaking XML declaration to make syntax highlighting work
if ('<' . '?xml' == substr(trim($document), 0, 5)) {
if (preg_match('/<html[^>]*xmlns="([^"]+)"[^>]*>/i', $document, $matches)) {
$this->xpathNamespaces[] = $matches[1];
return $this->setDocumentXhtml($document, $encoding);
}
return $this->setDocumentXml($document, $encoding);
}
if (strstr($document, 'DTD XHTML')) {
return $this->setDocumentXhtml($document, $encoding);
}
return $this->setDocumentHtml($document, $encoding);
}
/**
* Register HTML document
*
* @param string $document
* @param null|string $encoding Document encoding
* @return Query
*/
public function setDocumentHtml($document, $encoding = null)
{
$this->document = (string) $document;
$this->docType = self::DOC_HTML;
if (null !== $encoding) {
$this->setEncoding($encoding);
}
return $this;
}
/**
* Register XHTML document
*
* @param string $document
* @param null|string $encoding Document encoding
* @return Query
*/
public function setDocumentXhtml($document, $encoding = null)
{
$this->document = (string) $document;
$this->docType = self::DOC_XHTML;
if (null !== $encoding) {
$this->setEncoding($encoding);
}
return $this;
}
/**
* Register XML document
*
* @param string $document
* @param null|string $encoding Document encoding
* @return Query
*/
public function setDocumentXml($document, $encoding = null)
{
$this->document = (string) $document;
$this->docType = self::DOC_XML;
if (null !== $encoding) {
$this->setEncoding($encoding);
}
return $this;
}
/**
* Retrieve current document
*
* @return string
*/
public function getDocument()
{
return $this->document;
}
/**
* Get document type
*
* @return string
*/
public function getDocumentType()
{
return $this->docType;
}
/**
* Get any DOMDocument errors found
*
* @return false|array
*/
public function getDocumentErrors()
{
return $this->documentErrors;
}
/**
* Perform a CSS selector query
*
* @param string $query
* @return NodeList
*/
public function execute($query)
{
$xpathQuery = Css2Xpath::transform($query);
return $this->queryXpath($xpathQuery, $query);
}
/**
* Perform an XPath query
*
* @param string|array $xpathQuery
* @param string|null $query CSS selector query
* @throws Exception\RuntimeException
* @return NodeList
*/
public function queryXpath($xpathQuery, $query = null)
{
if (null === ($document = $this->getDocument())) {
throw new Exception\RuntimeException('Cannot query; no document registered');
}
$encoding = $this->getEncoding();
libxml_use_internal_errors(true);
libxml_disable_entity_loader(true);
if (null === $encoding) {
$domDoc = new DOMDocument('1.0');
} else {
$domDoc = new DOMDocument('1.0', $encoding);
}
$type = $this->getDocumentType();
switch ($type) {
case self::DOC_XML:
$success = $domDoc->loadXML($document);
foreach ($domDoc->childNodes as $child) {
if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
throw new Exception\RuntimeException(
'Invalid XML: Detected use of illegal DOCTYPE'
);
}
}
break;
case self::DOC_HTML:
case self::DOC_XHTML:
default:
$success = $domDoc->loadHTML($document);
break;
}
$errors = libxml_get_errors();
if (!empty($errors)) {
$this->documentErrors = $errors;
libxml_clear_errors();
}
libxml_disable_entity_loader(false);
libxml_use_internal_errors(false);
if (!$success) {
throw new Exception\RuntimeException(sprintf('Error parsing document (type == %s)', $type));
}
$nodeList = $this->getNodeList($domDoc, $xpathQuery);
return new NodeList($query, $xpathQuery, $domDoc, $nodeList);
}
/**
* Register XPath namespaces
*
* @param array $xpathNamespaces
* @return void
*/
public function registerXpathNamespaces($xpathNamespaces)
{
$this->xpathNamespaces = $xpathNamespaces;
}
/**
* Register PHP Functions to use in internal DOMXPath
*
* @param bool $xpathPhpFunctions
* @return void
*/
public function registerXpathPhpFunctions($xpathPhpFunctions = true)
{
$this->xpathPhpFunctions = $xpathPhpFunctions;
}
/**
* Prepare node list
*
* @param DOMDocument $document
* @param string|array $xpathQuery
* @return array
*/
protected function getNodeList($document, $xpathQuery)
{
$xpath = new DOMXPath($document);
foreach ($this->xpathNamespaces as $prefix => $namespaceUri) {
$xpath->registerNamespace($prefix, $namespaceUri);
}
if ($this->xpathPhpFunctions) {
$xpath->registerNamespace("php", "http://php.net/xpath");
($this->xpathPhpFunctions === true) ?
$xpath->registerPHPFunctions()
: $xpath->registerPHPFunctions($this->xpathPhpFunctions);
}
$xpathQuery = (string) $xpathQuery;
ErrorHandler::start();
$nodeList = $xpath->query($xpathQuery);
$error = ErrorHandler::stop();
if ($error) {
throw $error;
}
return $nodeList;
}
}
# |
Change |
User |
Description |
Committed |
|
#1
|
18334 |
Liz Lam |
initial add of jambox |
|
|