283 lines
8.1 KiB
C++
283 lines
8.1 KiB
C++
//****************************************************************************
|
|
// Filename: XmlParser.h
|
|
// Copyright 1999 Daniel X. Pape. All rights reserved.
|
|
//
|
|
// Description: A set of classes for reading and parsing simple XML files.
|
|
//
|
|
//****************************************************************************
|
|
// Revision History:
|
|
// Thursday, July 08, 1999 - Original. Heavily based on "A Simple XML
|
|
// Parser" by Sebastien Andrivet. See Documentation.
|
|
//****************************************************************************
|
|
|
|
#ifndef _XMLPARSER_H_
|
|
#define _XMLPARSER_H_
|
|
|
|
#ifdef _MSC_VER
|
|
// Disable stupid MSVC warning about identifiers > 255 chars long
|
|
#pragma warning (disable: 4786)
|
|
#endif
|
|
|
|
// STL
|
|
#include <memory>
|
|
#include <vector>
|
|
#include <map>
|
|
#include <string>
|
|
|
|
#include "XmlElements.h"
|
|
|
|
namespace SimpleXMLParser
|
|
{
|
|
|
|
// ***************************************************************************
|
|
// Class: XmlException
|
|
// Desc: parsing error
|
|
// ***************************************************************************
|
|
class XmlException
|
|
{
|
|
|
|
public:
|
|
|
|
XmlException(int line, int column);
|
|
|
|
int GetLine() const;
|
|
int GetColumn() const;
|
|
|
|
private:
|
|
|
|
int line_;
|
|
int column_;
|
|
|
|
};
|
|
|
|
// ***************************************************************************
|
|
// Class: XmlParser
|
|
// Desc:
|
|
// ***************************************************************************
|
|
class XmlParser
|
|
{
|
|
|
|
public:
|
|
|
|
XmlParser();
|
|
|
|
Element& Parse(const char * szSource, int nSourceSize);
|
|
|
|
private:
|
|
|
|
XmlParser(const XmlParser&);
|
|
XmlParser& operator=(const XmlParser&);
|
|
|
|
// ***********************************************************************
|
|
// Class: Bookmark
|
|
// Desc: record the current position in the document
|
|
// ***********************************************************************
|
|
class Bookmark
|
|
{
|
|
private:
|
|
Bookmark(const Bookmark&);
|
|
Bookmark& operator=(const Bookmark&);
|
|
|
|
XmlParser& parser_; // XmlParser
|
|
const char* sourceCurrent_; // Position recorded
|
|
int line_; // Line recorded
|
|
int column_; // Column recorded
|
|
|
|
public:
|
|
Bookmark(XmlParser& reader);
|
|
// Change back the position
|
|
void Restore();
|
|
// Get the sub-string between the current and
|
|
// the recorded positions
|
|
void GetSubString(std::string& strString, int nNumEndSkip = 0);
|
|
// Record the current position
|
|
void Reset();
|
|
};
|
|
|
|
friend class Bookmark;
|
|
|
|
const char* source_; // XML document
|
|
const char* sourceCurrent_; // Current position
|
|
const char* sourceEnd_; // End of the document
|
|
int line_; // Current line
|
|
int column_; // Current column
|
|
|
|
std::string xmlVersion_; // Version of XML used in doc.
|
|
std::auto_ptr<Element> rootElem_; // Root element
|
|
|
|
// parsing
|
|
|
|
char NextChar();
|
|
void PreviousChar();
|
|
|
|
// All of these following member functions can throw exceptions
|
|
|
|
bool ParseSpaces();
|
|
bool ParseString(const char* pString);
|
|
bool ParseStringNoCase(const char* pString);
|
|
bool ParseNumber(int& nNum);
|
|
bool ParseHexNumber(int& nNum);
|
|
bool ParseChar(char c);
|
|
bool ParseName(std::string& strName);
|
|
|
|
bool ParseDeclBegining(const char * szString);
|
|
bool ParseXMLDecl();
|
|
bool ParseEq();
|
|
bool ParseVersionInfo(std::string& strVersion);
|
|
bool ParseVersionNum(std::string& strVersion);
|
|
bool ParseEncodingDecl();
|
|
bool ParseEncName();
|
|
void ParseMiscs();
|
|
bool ParseReference(char& c);
|
|
bool ParseAttValue(std::string& strValue);
|
|
bool ParseAttribute(ElementTag* pElem);
|
|
bool ParseETag(Element& element);
|
|
void ParseContentETag(ElementTag& element);
|
|
bool ParseMarkup(Element& element);
|
|
bool ParseCDATA(Element& element);
|
|
|
|
ElementComment* ParseComment();
|
|
ElementTag* ParseTagBegining();
|
|
ElementTag* ParseElement();
|
|
Element* ParseDocument();
|
|
|
|
bool MapReferenceName(const std::string& strName, char& c);
|
|
|
|
void SyntaxError();
|
|
};
|
|
|
|
inline XmlException::XmlException(int line, int column)
|
|
: line_(line), column_(column)
|
|
{
|
|
}
|
|
|
|
inline int XmlException::GetLine() const
|
|
{
|
|
return(line_);
|
|
}
|
|
|
|
inline int XmlException::GetColumn() const
|
|
{
|
|
return(column_);
|
|
}
|
|
|
|
inline XmlParser::Bookmark::Bookmark(XmlParser& reader)
|
|
: parser_(reader), sourceCurrent_(reader.sourceCurrent_),
|
|
line_(0), column_(0)
|
|
{
|
|
}
|
|
|
|
// ***************************************************************************
|
|
// Function: Restore
|
|
// Desc: Change back the position
|
|
// ***************************************************************************
|
|
inline void XmlParser::Bookmark::Restore()
|
|
{
|
|
parser_.sourceCurrent_ = sourceCurrent_;
|
|
}
|
|
|
|
// ***************************************************************************
|
|
// Function: GetSubString
|
|
// Desc: Get the sub-string between the current and the recorded positions
|
|
// ***************************************************************************
|
|
inline void XmlParser::Bookmark::GetSubString(std::string& strString, int nNumEndSkip)
|
|
{
|
|
// ASSERT(parser_.sourceCurrent_ + nNumEndSkip >= sourceCurrent_);
|
|
strString = std::string(sourceCurrent_, parser_.sourceCurrent_ -
|
|
sourceCurrent_ - nNumEndSkip);
|
|
}
|
|
|
|
inline void XmlParser::Bookmark::Reset()
|
|
{
|
|
sourceCurrent_ = parser_.sourceCurrent_;
|
|
}
|
|
|
|
// ***************************************************************************
|
|
// Function: IsSpace
|
|
// Desc: Space, tabulation, line feed or return
|
|
// ***************************************************************************
|
|
inline bool IsSpace(char c)
|
|
{
|
|
return(c == ' ' || c == '\t' || c == '\r' || c == '\n');
|
|
}
|
|
|
|
// ***************************************************************************
|
|
// Function: IsAlpha
|
|
// Desc: [a-zA-Z]
|
|
// ***************************************************************************
|
|
inline bool IsAlpha(char c)
|
|
{
|
|
return((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));
|
|
}
|
|
|
|
// ***************************************************************************
|
|
// Function: IsDigit
|
|
// Desc: [0-9]
|
|
// ***************************************************************************
|
|
inline bool IsDigit(char c)
|
|
{
|
|
return(c >= '0' && c <= '9');
|
|
}
|
|
|
|
// ***************************************************************************
|
|
// Function: IsHexDigit
|
|
// Desc: [0-9a-fA-F]
|
|
// ***************************************************************************
|
|
inline bool IsHexDigit(char c)
|
|
{
|
|
return(IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
|
|
}
|
|
|
|
// ***************************************************************************
|
|
// Function: HexDigitValue
|
|
// Desc: [0-9a-fA-F]
|
|
// ***************************************************************************
|
|
inline int HexDigitValue(char c)
|
|
{
|
|
return((c >= '0' && c <= '9') ? c - '0'
|
|
: ((c >= 'a' && c <= 'f') ? c - 'a' + 10
|
|
: c - 'A' + 10));
|
|
|
|
}
|
|
|
|
// ***************************************************************************
|
|
// Function: IsAlphaDigit
|
|
// Desc: [a-zA-Z0-9]
|
|
// ***************************************************************************
|
|
inline bool IsAlphaDigit(char c)
|
|
{
|
|
return(IsAlpha(c) || IsDigit(c));
|
|
}
|
|
|
|
// ***************************************************************************
|
|
// Function: IsAlphaDigitEx
|
|
// Desc: [a-zA-Z0-9_.:] | '-'
|
|
// ***************************************************************************
|
|
inline bool IsAlphaDigitEx(char c)
|
|
{
|
|
return(IsAlphaDigit(c) || c == '_' || c == '.' || c == ':' || c == '-');
|
|
}
|
|
|
|
// ***************************************************************************
|
|
// Function: LowCase
|
|
// Desc:
|
|
// ***************************************************************************
|
|
inline char LowCase(char c)
|
|
{
|
|
return(c >= 'A' && c <= 'Z' ? c - 'A' + 'a' : c);
|
|
}
|
|
|
|
// ***************************************************************************
|
|
// Function: IsXmlChar
|
|
// Desc:
|
|
// ***************************************************************************
|
|
inline bool IsXmlChar(char c)
|
|
{
|
|
return(c == 0x9 || c == 0xa || c == 0xd || c >= 0x20);
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|