first commit
This commit is contained in:
@@ -0,0 +1,246 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @defgroup xml XML
|
||||
* Implements XML parsing and creation concerns.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file classes/xml/PKPXMLParser.php
|
||||
*
|
||||
* Copyright (c) 2014-2021 Simon Fraser University
|
||||
* Copyright (c) 2000-2021 John Willinsky
|
||||
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
||||
*
|
||||
* @class PKPXMLParser
|
||||
*
|
||||
* @ingroup xml
|
||||
*
|
||||
* @brief Generic class for parsing an XML document into a data structure.
|
||||
*/
|
||||
|
||||
namespace PKP\xml;
|
||||
|
||||
use PKP\file\FileManager;
|
||||
use XMLParser;
|
||||
|
||||
class PKPXMLParser
|
||||
{
|
||||
public const XML_PARSER_SOURCE_ENCODING = 'utf-8';
|
||||
|
||||
public const XML_PARSER_TARGET_ENCODING = 'utf-8';
|
||||
|
||||
/** @var XMLParserHandler instance of XMLParserHandler */
|
||||
public $handler;
|
||||
|
||||
/** @var array List of error strings */
|
||||
public $errors;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* Initialize parser and set parser options.
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
$this->errors = [];
|
||||
}
|
||||
|
||||
public function parseText($text)
|
||||
{
|
||||
$parser = $this->createParser();
|
||||
$handler = null;
|
||||
if (!isset($this->handler)) {
|
||||
// Use default handler for parsing
|
||||
$handler = new XMLParserDOMHandler();
|
||||
$this->setHandler($handler);
|
||||
}
|
||||
|
||||
xml_set_object($parser, $this->handler);
|
||||
xml_set_element_handler($parser, 'startElement', 'endElement');
|
||||
xml_set_character_data_handler($parser, 'characterData');
|
||||
|
||||
if (!xml_parse($parser, $text, true)) {
|
||||
$this->addError(xml_error_string(xml_get_error_code($parser)));
|
||||
}
|
||||
|
||||
$result = $this->handler->getResult();
|
||||
$this->destroyParser($parser);
|
||||
if (isset($handler)) {
|
||||
$handler->destroy();
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse an XML file using the specified handler.
|
||||
* If no handler has been specified, XMLParserDOMHandler is used by default, returning a tree structure representing the document.
|
||||
*
|
||||
* @param string $file full path to the XML file
|
||||
*
|
||||
* @return ?object|false actual return type depends on the handler
|
||||
*/
|
||||
public function parse($file)
|
||||
{
|
||||
$parser = $this->createParser();
|
||||
$handler = null;
|
||||
if (!isset($this->handler)) {
|
||||
// Use default handler for parsing
|
||||
$handler = new XMLParserDOMHandler();
|
||||
$this->setHandler($handler);
|
||||
}
|
||||
|
||||
xml_set_object($parser, $this->handler);
|
||||
xml_set_element_handler($parser, 'startElement', 'endElement');
|
||||
xml_set_character_data_handler($parser, 'characterData');
|
||||
|
||||
if (!$stream = FileManager::getStream($file)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
while (($data = $stream->read(16384)) !== '') {
|
||||
if (!xml_parse($parser, $data, $stream->eof())) {
|
||||
$this->addError(xml_error_string(xml_get_error_code($parser)));
|
||||
}
|
||||
}
|
||||
|
||||
$stream->close();
|
||||
$result = $this->handler->getResult();
|
||||
$this->destroyParser($parser);
|
||||
if (isset($handler)) {
|
||||
$handler->destroy();
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an error to the current error list
|
||||
*
|
||||
* @param string $error
|
||||
*/
|
||||
public function addError($error)
|
||||
{
|
||||
array_push($this->errors, $error);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current list of errors
|
||||
*/
|
||||
public function getErrors()
|
||||
{
|
||||
return $this->errors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether or not the parser encountered an error (false)
|
||||
* or completed successfully (true)
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function getStatus()
|
||||
{
|
||||
return empty($this->errors);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the handler to use for parse(...).
|
||||
*
|
||||
* @param XMLParserHandler $handler
|
||||
*/
|
||||
public function setHandler($handler)
|
||||
{
|
||||
$this->handler = $handler;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse XML data using xml_parse_into_struct and return data in an array.
|
||||
* This is best suited for XML documents with fairly simple structure.
|
||||
*
|
||||
* @param string $text XML data
|
||||
* @param array $tagsToMatch optional, if set tags not in the array will be skipped
|
||||
*
|
||||
* @return array|null a struct of the form ($TAG => array('attributes' => array( ... ), 'value' => $VALUE), ... )
|
||||
*/
|
||||
public function parseTextStruct($text, $tagsToMatch = [])
|
||||
{
|
||||
$parser = $this->createParser();
|
||||
$result = xml_parse_into_struct($parser, $text, $values, $tags);
|
||||
$this->destroyParser($parser);
|
||||
if (!$result) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Clean up data struct, removing undesired tags if necessary
|
||||
$data = [];
|
||||
foreach ($tags as $key => $indices) {
|
||||
if (!empty($tagsToMatch) && !in_array($key, $tagsToMatch)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$data[$key] = [];
|
||||
|
||||
foreach ($indices as $index) {
|
||||
if (!isset($values[$index]['type']) || ($values[$index]['type'] != 'open' && $values[$index]['type'] != 'complete')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$data[$key][] = [
|
||||
'attributes' => $values[$index]['attributes'] ?? [],
|
||||
'value' => $values[$index]['value'] ?? ''
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse an XML file using xml_parse_into_struct and return data in an array.
|
||||
* This is best suited for XML documents with fairly simple structure.
|
||||
*
|
||||
* @param string $file full path to the XML file
|
||||
* @param array $tagsToMatch optional, if set tags not in the array will be skipped
|
||||
*
|
||||
* @return bool|array|null a struct of the form ($TAG => array('attributes' => array( ... ), 'value' => $VALUE), ... )
|
||||
*/
|
||||
public function parseStruct($file, $tagsToMatch = [])
|
||||
{
|
||||
$stream = FileManager::getStream($file);
|
||||
$fileContents = $stream->getContents();
|
||||
if (!$fileContents) {
|
||||
return false;
|
||||
}
|
||||
return $this->parseTextStruct($fileContents, $tagsToMatch);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize a new XML parser.
|
||||
*
|
||||
* @return XMLParser
|
||||
*/
|
||||
public function createParser()
|
||||
{
|
||||
$parser = xml_parser_create(static::XML_PARSER_SOURCE_ENCODING);
|
||||
xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, static::XML_PARSER_TARGET_ENCODING);
|
||||
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false);
|
||||
return $parser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy XML parser.
|
||||
*
|
||||
* @param XMLParser $parser
|
||||
*/
|
||||
public function destroyParser($parser)
|
||||
{
|
||||
xml_parser_free($parser);
|
||||
}
|
||||
}
|
||||
|
||||
if (!PKP_STRICT_MODE) {
|
||||
class_alias('\PKP\xml\PKPXMLParser', '\PKPXMLParser');
|
||||
|
||||
// For PHP < 8.x, this class used to be called XMLParser. Alias for compatibility when possible.
|
||||
if (!class_exists('XMLParser')) {
|
||||
class_alias('\PKPXMLParser', '\XMLParser');
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,278 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file classes/xml/XMLNode.php
|
||||
*
|
||||
* Copyright (c) 2014-2021 Simon Fraser University
|
||||
* Copyright (c) 2000-2021 John Willinsky
|
||||
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
||||
*
|
||||
* @class XMLNode
|
||||
*
|
||||
* @ingroup xml
|
||||
*
|
||||
* @brief Default handler for PKPXMLParser returning a simple DOM-style object.
|
||||
* This handler parses an XML document into a tree structure of XMLNode objects.
|
||||
*/
|
||||
|
||||
namespace PKP\xml;
|
||||
|
||||
class XMLNode
|
||||
{
|
||||
/** @var string the element (tag) name */
|
||||
public $name;
|
||||
|
||||
/** @var XMLNode reference to the parent node (null if this is the root node) */
|
||||
public $parent;
|
||||
|
||||
/** @var array the element's attributes */
|
||||
public $attributes;
|
||||
|
||||
/** @var string the element's value */
|
||||
public $value;
|
||||
|
||||
/** @var array references to the XMLNode children of this node */
|
||||
public $children;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param string $name element/tag name
|
||||
*/
|
||||
public function __construct($name = null)
|
||||
{
|
||||
$this->name = $name;
|
||||
$this->parent = null;
|
||||
$this->attributes = [];
|
||||
$this->value = null;
|
||||
$this->children = [];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param bool $includeNamespace
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getName($includeNamespace = true)
|
||||
{
|
||||
if (
|
||||
$includeNamespace ||
|
||||
($i = strpos($this->name, ':')) === false
|
||||
) {
|
||||
return $this->name;
|
||||
}
|
||||
return substr($this->name, $i + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $name
|
||||
*/
|
||||
public function setName($name)
|
||||
{
|
||||
$this->name = $name;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return XMLNode
|
||||
*/
|
||||
public function &getParent()
|
||||
{
|
||||
return $this->parent;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param XMLNode $parent
|
||||
*/
|
||||
public function setParent(&$parent)
|
||||
{
|
||||
$this->parent = & $parent;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array all attributes
|
||||
*/
|
||||
public function getAttributes()
|
||||
{
|
||||
return $this->attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $name attribute name
|
||||
*
|
||||
* @return string attribute value
|
||||
*/
|
||||
public function getAttribute($name)
|
||||
{
|
||||
return $this->attributes[$name] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $name attribute name
|
||||
* @param string $value attribute value
|
||||
*/
|
||||
public function setAttribute($name, $value)
|
||||
{
|
||||
$this->attributes[$name] = $value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array $attributes
|
||||
*/
|
||||
public function setAttributes($attributes)
|
||||
{
|
||||
$this->attributes = $attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function &getValue()
|
||||
{
|
||||
return $this->value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $value
|
||||
*/
|
||||
public function setValue($value)
|
||||
{
|
||||
$this->value = & $value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array this node's children (XMLNode objects)
|
||||
*/
|
||||
public function &getChildren()
|
||||
{
|
||||
return $this->children;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $name
|
||||
* @param int $index
|
||||
*
|
||||
* @return ?XMLNode the ($index+1)th child matching the specified name
|
||||
*/
|
||||
public function &getChildByName($name, $index = 0)
|
||||
{
|
||||
if (!is_array($name)) {
|
||||
$name = [$name];
|
||||
}
|
||||
foreach ($this->children as $key => $junk) {
|
||||
$child = & $this->children[$key];
|
||||
if (in_array($child->getName(), $name)) {
|
||||
if ($index == 0) {
|
||||
return $child;
|
||||
} else {
|
||||
$index--;
|
||||
}
|
||||
}
|
||||
unset($child);
|
||||
}
|
||||
$child = null;
|
||||
return $child;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the value of a child node.
|
||||
*
|
||||
* @param string $name name of node
|
||||
* @param int $index Optional index of child node to find
|
||||
*/
|
||||
public function &getChildValue($name, $index = 0)
|
||||
{
|
||||
$node = & $this->getChildByName($name);
|
||||
if ($node) {
|
||||
$returner = & $node->getValue();
|
||||
} else {
|
||||
$returner = null;
|
||||
}
|
||||
return $returner;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param XMLNode $node the child node to add
|
||||
*/
|
||||
public function addChild(&$node)
|
||||
{
|
||||
$this->children[] = & $node;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param resource $output file handle to write to, or true for stdout, or null if XML to be returned as string
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function &toXml($output = null)
|
||||
{
|
||||
$nullVar = null;
|
||||
$out = '';
|
||||
|
||||
if ($this->parent === null) {
|
||||
// This is the root node. Output information about the document.
|
||||
$out .= '<?xml version="' . $this->getAttribute('version') . "\" encoding=\"UTF-8\"?>\n";
|
||||
if ($this->getAttribute('type') != '') {
|
||||
if ($this->getAttribute('url') != '') {
|
||||
$out .= '<!DOCTYPE ' . $this->getAttribute('type') . ' PUBLIC "' . $this->getAttribute('dtd') . '" "' . $this->getAttribute('url') . '">';
|
||||
} else {
|
||||
$out .= '<!DOCTYPE ' . $this->getAttribute('type') . ' SYSTEM "' . $this->getAttribute('dtd') . '">';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->name !== null) {
|
||||
$out .= '<' . $this->name;
|
||||
foreach ($this->attributes as $name => $value) {
|
||||
$value = XMLNode::xmlentities($value);
|
||||
$out .= " {$name}=\"{$value}\"";
|
||||
}
|
||||
if ($this->name !== '!--') {
|
||||
$out .= '>';
|
||||
}
|
||||
}
|
||||
$out .= XMLNode::xmlentities($this->value, ENT_NOQUOTES);
|
||||
foreach ($this->children as $child) {
|
||||
if ($output !== null) {
|
||||
if ($output === true) {
|
||||
echo $out;
|
||||
} else {
|
||||
fwrite($output, $out);
|
||||
}
|
||||
$out = '';
|
||||
}
|
||||
$out .= $child->toXml($output);
|
||||
}
|
||||
if ($this->name === '!--') {
|
||||
$out .= '-->';
|
||||
} elseif ($this->name !== null) {
|
||||
$out .= '</' . $this->name . '>';
|
||||
}
|
||||
if ($output !== null) {
|
||||
if ($output === true) {
|
||||
echo $out;
|
||||
} else {
|
||||
fwrite($output, $out);
|
||||
}
|
||||
return $nullVar;
|
||||
}
|
||||
return $out;
|
||||
}
|
||||
|
||||
public static function xmlentities($string, $quote_style = ENT_QUOTES)
|
||||
{
|
||||
return htmlspecialchars($string, $quote_style, 'UTF-8');
|
||||
}
|
||||
|
||||
public function destroy()
|
||||
{
|
||||
unset($this->value, $this->attributes, $this->parent, $this->name);
|
||||
foreach ($this->children as $child) {
|
||||
$child->destroy();
|
||||
}
|
||||
unset($this->children);
|
||||
}
|
||||
}
|
||||
|
||||
if (!PKP_STRICT_MODE) {
|
||||
class_alias('\PKP\xml\XMLNode', '\XMLNode');
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file classes/xml/XMLParserDOMHandler.php
|
||||
*
|
||||
* Copyright (c) 2014-2021 Simon Fraser University
|
||||
* Copyright (c) 2000-2021 John Willinsky
|
||||
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
||||
*
|
||||
* @class XMLParserDOMHandler
|
||||
*
|
||||
* @ingroup xml
|
||||
*
|
||||
* @see PKPXMLParser
|
||||
*
|
||||
* @brief Default handler for PKPXMLParser returning a simple DOM-style object.
|
||||
* This handler parses an XML document into a tree structure of XMLNode objects.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace PKP\xml;
|
||||
|
||||
class XMLParserDOMHandler extends XMLParserHandler
|
||||
{
|
||||
/** @var XMLNode reference to the root node */
|
||||
public $rootNode;
|
||||
|
||||
/** @var XMLNode reference to the node currently being parsed */
|
||||
public $currentNode;
|
||||
|
||||
/** @var string reference to the current data */
|
||||
public $currentData;
|
||||
|
||||
/** @var XMLNode[] */
|
||||
public $rootNodes = [];
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
$this->rootNodes = [];
|
||||
$this->currentNode = null;
|
||||
}
|
||||
|
||||
public function destroy()
|
||||
{
|
||||
unset($this->currentNode, $this->currentData, $this->rootNode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function to act as the start element handler.
|
||||
*
|
||||
* @param PKPXMLParser $parser
|
||||
* @param string $tag
|
||||
* @param array $attributes
|
||||
*/
|
||||
public function startElement($parser, $tag, $attributes)
|
||||
{
|
||||
$this->currentData = null;
|
||||
$node = new XMLNode($tag);
|
||||
$node->setAttributes($attributes);
|
||||
|
||||
if (isset($this->currentNode)) {
|
||||
$this->currentNode->addChild($node);
|
||||
$node->setParent($this->currentNode);
|
||||
} else {
|
||||
$this->rootNode = & $node;
|
||||
}
|
||||
|
||||
$this->currentNode = & $node;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function to act as the end element handler.
|
||||
*
|
||||
* @param PKPXMLParser $parser
|
||||
* @param string $tag
|
||||
*/
|
||||
public function endElement($parser, $tag)
|
||||
{
|
||||
$this->currentNode->setValue($this->currentData);
|
||||
$this->currentNode = & $this->currentNode->getParent();
|
||||
$this->currentData = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function to act as the character data handler.
|
||||
*
|
||||
* @param PKPXMLParser $parser
|
||||
* @param string $data
|
||||
*/
|
||||
public function characterData($parser, $data)
|
||||
{
|
||||
$this->currentData .= $data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a reference to the root node of the tree representing the document.
|
||||
*
|
||||
* @return XMLNode
|
||||
*/
|
||||
public function getResult()
|
||||
{
|
||||
return $this->rootNode;
|
||||
}
|
||||
}
|
||||
|
||||
if (!PKP_STRICT_MODE) {
|
||||
class_alias('\PKP\xml\XMLParserDOMHandler', '\XMLParserDOMHandler');
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file classes/xml/XMLParserHandler.php
|
||||
*
|
||||
* Copyright (c) 2014-2021 Simon Fraser University
|
||||
* Copyright (c) 2000-2021 John Willinsky
|
||||
* Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
|
||||
*
|
||||
* @class XMLParserHandler
|
||||
*
|
||||
* @ingroup xml
|
||||
*
|
||||
* @brief Interface for handler class used by PKPXMLParser.
|
||||
* All XML parser handler classes must implement these methods.
|
||||
*/
|
||||
|
||||
namespace PKP\xml;
|
||||
|
||||
class XMLParserHandler
|
||||
{
|
||||
/**
|
||||
* Callback function to act as the start element handler.
|
||||
*
|
||||
* @param PKPXMLParser $parser
|
||||
* @param string $tag
|
||||
* @param array $attributes
|
||||
*/
|
||||
public function startElement($parser, $tag, $attributes)
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function to act as the end element handler.
|
||||
*
|
||||
* @param PKPXMLParser $parser
|
||||
* @param string $tag
|
||||
*/
|
||||
public function endElement($parser, $tag)
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function to act as the character data handler.
|
||||
*
|
||||
* @param PKPXMLParser $parser
|
||||
* @param string $data
|
||||
*/
|
||||
public function characterData($parser, $data)
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a resulting data structure representing the parsed content.
|
||||
* The format of this object is specific to the handler.
|
||||
*/
|
||||
public function getResult()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform clean up for this object
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
public function destroy()
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
if (!PKP_STRICT_MODE) {
|
||||
class_alias('\PKP\xml\XMLParserHandler', '\XMLParserHandler');
|
||||
}
|
||||
Reference in New Issue
Block a user