Merge pull request #960 from leethomason/kcsaul-pedantic-whitespace
Integrate branch with Pedantic whitespace
This commit is contained in:
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@@ -6,7 +6,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
os: [ windows-2019, macos-10.15, ubuntu-20.04 ]
|
os: [ windows-2019, macos-latest, ubuntu-20.04 ]
|
||||||
cmake: [ 3.15, 3.x ]
|
cmake: [ 3.15, 3.x ]
|
||||||
include:
|
include:
|
||||||
- os: windows-2019
|
- os: windows-2019
|
||||||
@@ -17,7 +17,7 @@ jobs:
|
|||||||
- os: ubuntu-20.04
|
- os: ubuntu-20.04
|
||||||
tree: tree
|
tree: tree
|
||||||
|
|
||||||
- os: macos-10.15
|
- os: macos-latest
|
||||||
tree: find
|
tree: find
|
||||||
|
|
||||||
- cmake: 3.15
|
- cmake: 3.15
|
||||||
|
|||||||
18
readme.md
18
readme.md
@@ -1,9 +1,7 @@
|
|||||||
TinyXML-2
|
TinyXML-2
|
||||||
=========
|
=========
|
||||||
|
|
||||||

|
[](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml)
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
TinyXML-2 is a simple, small, efficient, C++ XML parser that can be
|
TinyXML-2 is a simple, small, efficient, C++ XML parser that can be
|
||||||
easily integrated into other programs.
|
easily integrated into other programs.
|
||||||
@@ -93,7 +91,7 @@ by the Document. When the Document is deleted, so are all the nodes it contains.
|
|||||||
|
|
||||||
### White Space
|
### White Space
|
||||||
|
|
||||||
#### Whitespace Preservation (default)
|
#### Whitespace Preservation (default, PRESERVE_WHITESPACE)
|
||||||
|
|
||||||
Microsoft has an excellent article on white space: http://msdn.microsoft.com/en-us/library/ms256097.aspx
|
Microsoft has an excellent article on white space: http://msdn.microsoft.com/en-us/library/ms256097.aspx
|
||||||
|
|
||||||
@@ -125,7 +123,7 @@ valuable. TinyXML-2 sees these as the same XML:
|
|||||||
|
|
||||||
<document><data>1</data><data>2</data><data>3</data></document>
|
<document><data>1</data><data>2</data><data>3</data></document>
|
||||||
|
|
||||||
#### Whitespace Collapse
|
#### Whitespace Collapse (COLLAPSE_WHITESPACE)
|
||||||
|
|
||||||
For some applications, it is preferable to collapse whitespace. Collapsing
|
For some applications, it is preferable to collapse whitespace. Collapsing
|
||||||
whitespace gives you "HTML-like" behavior, which is sometimes more suitable
|
whitespace gives you "HTML-like" behavior, which is sometimes more suitable
|
||||||
@@ -143,7 +141,15 @@ However, you may also use COLLAPSE_WHITESPACE, which will:
|
|||||||
Note that (currently) there is a performance impact for using COLLAPSE_WHITESPACE.
|
Note that (currently) there is a performance impact for using COLLAPSE_WHITESPACE.
|
||||||
It essentially causes the XML to be parsed twice.
|
It essentially causes the XML to be parsed twice.
|
||||||
|
|
||||||
#### Error Reporting
|
#### Pedantic Whitespace (PEDANTIC_WHITESPACE)
|
||||||
|
|
||||||
|
For applications that need to know about text nodes that are composed entirely of
|
||||||
|
whitespace, PEDANTIC_WHITESPACE is available. PEDANTIC_WHITESPACE maintains all the
|
||||||
|
whilespace between elements.
|
||||||
|
|
||||||
|
PEDANTIC_WHITESPACE is a new mode and not as tested as the other whitespace modes.
|
||||||
|
|
||||||
|
### Error Reporting
|
||||||
|
|
||||||
TinyXML-2 reports the line number of any errors in an XML document that
|
TinyXML-2 reports the line number of any errors in an XML document that
|
||||||
cannot be parsed correctly. In addition, all nodes (elements, declarations,
|
cannot be parsed correctly. In addition, all nodes (elements, declarations,
|
||||||
|
|||||||
18
tinyxml2.cpp
18
tinyxml2.cpp
@@ -715,7 +715,7 @@ bool XMLUtil::ToUnsigned64(const char* str, uint64_t* value) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
char* XMLDocument::Identify( char* p, XMLNode** node )
|
char* XMLDocument::Identify( char* p, XMLNode** node, bool first )
|
||||||
{
|
{
|
||||||
TIXMLASSERT( node );
|
TIXMLASSERT( node );
|
||||||
TIXMLASSERT( p );
|
TIXMLASSERT( p );
|
||||||
@@ -767,10 +767,20 @@ char* XMLDocument::Identify( char* p, XMLNode** node )
|
|||||||
p += dtdHeaderLen;
|
p += dtdHeaderLen;
|
||||||
}
|
}
|
||||||
else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) {
|
else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) {
|
||||||
returnNode = CreateUnlinkedNode<XMLElement>( _elementPool );
|
|
||||||
|
// Preserve whitespace pedantically before closing tag, when it's immediately after opening tag
|
||||||
|
if (WhitespaceMode() == PEDANTIC_WHITESPACE && first && p != start && *(p + elementHeaderLen) == '/') {
|
||||||
|
returnNode = CreateUnlinkedNode<XMLText>(_textPool);
|
||||||
|
returnNode->_parseLineNum = startLine;
|
||||||
|
p = start; // Back it up, all the text counts.
|
||||||
|
_parseCurLineNum = startLine;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
returnNode = CreateUnlinkedNode<XMLElement>(_elementPool);
|
||||||
returnNode->_parseLineNum = _parseCurLineNum;
|
returnNode->_parseLineNum = _parseCurLineNum;
|
||||||
p += elementHeaderLen;
|
p += elementHeaderLen;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
returnNode = CreateUnlinkedNode<XMLText>( _textPool );
|
returnNode = CreateUnlinkedNode<XMLText>( _textPool );
|
||||||
returnNode->_parseLineNum = _parseCurLineNum; // Report line of first non-whitespace character
|
returnNode->_parseLineNum = _parseCurLineNum; // Report line of first non-whitespace character
|
||||||
@@ -1098,14 +1108,16 @@ char* XMLNode::ParseDeep( char* p, StrPair* parentEndTag, int* curLineNumPtr )
|
|||||||
if (_document->Error())
|
if (_document->Error())
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
bool first = true;
|
||||||
while( p && *p ) {
|
while( p && *p ) {
|
||||||
XMLNode* node = 0;
|
XMLNode* node = 0;
|
||||||
|
|
||||||
p = _document->Identify( p, &node );
|
p = _document->Identify( p, &node, first );
|
||||||
TIXMLASSERT( p );
|
TIXMLASSERT( p );
|
||||||
if ( node == 0 ) {
|
if ( node == 0 ) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
first = false;
|
||||||
|
|
||||||
const int initialLineNum = node->_parseLineNum;
|
const int initialLineNum = node->_parseLineNum;
|
||||||
|
|
||||||
|
|||||||
@@ -1710,7 +1710,8 @@ private:
|
|||||||
|
|
||||||
enum Whitespace {
|
enum Whitespace {
|
||||||
PRESERVE_WHITESPACE,
|
PRESERVE_WHITESPACE,
|
||||||
COLLAPSE_WHITESPACE
|
COLLAPSE_WHITESPACE,
|
||||||
|
PEDANTIC_WHITESPACE
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -1921,7 +1922,7 @@ public:
|
|||||||
void DeepCopy(XMLDocument* target) const;
|
void DeepCopy(XMLDocument* target) const;
|
||||||
|
|
||||||
// internal
|
// internal
|
||||||
char* Identify( char* p, XMLNode** node );
|
char* Identify( char* p, XMLNode** node, bool first );
|
||||||
|
|
||||||
// internal
|
// internal
|
||||||
void MarkInUse(const XMLNode* const);
|
void MarkInUse(const XMLNode* const);
|
||||||
|
|||||||
172
xmltest.cpp
172
xmltest.cpp
@@ -1869,6 +1869,178 @@ int main( int argc, const char ** argv )
|
|||||||
XMLTest( "Whitespace all space", true, 0 == doc.FirstChildElement()->FirstChild() );
|
XMLTest( "Whitespace all space", true, 0 == doc.FirstChildElement()->FirstChild() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ----------- Preserve Whitespace ------------
|
||||||
|
{
|
||||||
|
const char* xml = "<element>This is ' \n\n text '</element>";
|
||||||
|
XMLDocument doc(true, PRESERVE_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with whitespace preserved", false, doc.Error());
|
||||||
|
XMLTest("Whitespace preserved", "This is ' \n\n text '", doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char* xml = "<element> This \nis ' text ' </element>";
|
||||||
|
XMLDocument doc(true, PRESERVE_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with whitespace preserved", false, doc.Error());
|
||||||
|
XMLTest("Whitespace preserved", " This \nis ' text ' ", doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char* xml = "<element> \n This is ' text ' \n</element>";
|
||||||
|
XMLDocument doc(true, PRESERVE_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with whitespace preserved", false, doc.Error());
|
||||||
|
XMLTest("Whitespace preserved", " \n This is ' text ' \n", doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Following cases are for text that is all whitespace which are not preserved intentionally
|
||||||
|
{
|
||||||
|
const char* xml = "<element> </element>";
|
||||||
|
XMLDocument doc(true, PRESERVE_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with whitespace preserved", false, doc.Error());
|
||||||
|
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char* xml = "<element> </element>";
|
||||||
|
XMLDocument doc(true, PRESERVE_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with whitespace preserved", false, doc.Error());
|
||||||
|
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char* xml = "<element>\n\n</element>";
|
||||||
|
XMLDocument doc(true, PRESERVE_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with whitespace preserved", false, doc.Error());
|
||||||
|
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char* xml = "<element> \n</element>";
|
||||||
|
XMLDocument doc(true, PRESERVE_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with whitespace preserved", false, doc.Error());
|
||||||
|
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char* xml = "<element> \n \n </element>";
|
||||||
|
XMLDocument doc(true, PRESERVE_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with whitespace preserved", false, doc.Error());
|
||||||
|
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------- Pedantic Whitespace ------------
|
||||||
|
{
|
||||||
|
const char* xml = "<element>This is ' \n\n text '</element>";
|
||||||
|
XMLDocument doc(true, PEDANTIC_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with pedantic whitespace", false, doc.Error());
|
||||||
|
XMLTest("Pedantic whitespace", "This is ' \n\n text '", doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char* xml = "<element> This \nis ' text ' </element>";
|
||||||
|
XMLDocument doc(true, PEDANTIC_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with pedantic whitespace", false, doc.Error());
|
||||||
|
XMLTest("Pedantic whitespace", " This \nis ' text ' ", doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char* xml = "<element> \n This is ' text ' \n</element>";
|
||||||
|
XMLDocument doc(true, PEDANTIC_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with pedantic whitespace", false, doc.Error());
|
||||||
|
XMLTest("Pedantic whitespace", " \n This is ' text ' \n", doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Following cases are for text that is all whitespace which is preserved with pedantic mode
|
||||||
|
{
|
||||||
|
const char* xml = "<element> </element>";
|
||||||
|
XMLDocument doc(true, PEDANTIC_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with pedantic whitespace", false, doc.Error());
|
||||||
|
XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char* xml = "<element> </element>";
|
||||||
|
XMLDocument doc(true, PEDANTIC_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with pedantic whitespace", false, doc.Error());
|
||||||
|
XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char* xml = "<element>\n\n</element>\n";
|
||||||
|
XMLDocument doc(true, PEDANTIC_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with pedantic whitespace", false, doc.Error());
|
||||||
|
XMLTest("Pedantic whitespace", "\n\n", doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char* xml = "<element> \n</element> \n ";
|
||||||
|
XMLDocument doc(true, PEDANTIC_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with pedantic whitespace", false, doc.Error());
|
||||||
|
XMLTest("Pedantic whitespace", " \n", doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char* xml = "<element> \n \n </element> ";
|
||||||
|
XMLDocument doc(true, PEDANTIC_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse with pedantic whitespace", false, doc.Error());
|
||||||
|
XMLTest("Pedantic whitespace", " \n \n ", doc.FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Following cases are for checking nested elements are still parsed with pedantic whitespace
|
||||||
|
{
|
||||||
|
const char* xml = "<element>\n\t<a> This is nested text </a>\n</element> ";
|
||||||
|
XMLDocument doc(true, PEDANTIC_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
|
||||||
|
XMLTest("Pedantic whitespace", " This is nested text ", doc.RootElement()->FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char* xml = "<element> <b> </b> </element>\n";
|
||||||
|
XMLDocument doc(true, PEDANTIC_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
|
||||||
|
XMLTest("Pedantic whitespace", " ", doc.RootElement()->FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const char* xml = "<element> <c attribute=\"test\"/> </element>\n ";
|
||||||
|
XMLDocument doc(true, PEDANTIC_WHITESPACE);
|
||||||
|
doc.Parse(xml);
|
||||||
|
XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
|
||||||
|
XMLTest("Pedantic whitespace", true, 0 == doc.RootElement()->FirstChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check sample xml can be parsed with pedantic mode
|
||||||
|
{
|
||||||
|
XMLDocument doc(true, PEDANTIC_WHITESPACE);
|
||||||
|
doc.LoadFile("resources/dream.xml");
|
||||||
|
XMLTest("Load dream.xml with pedantic whitespace mode", false, doc.Error());
|
||||||
|
|
||||||
|
XMLTest("Dream", "xml version=\"1.0\"",
|
||||||
|
doc.FirstChild()->ToDeclaration()->Value());
|
||||||
|
XMLTest("Dream", true, doc.FirstChild()->NextSibling()->ToUnknown() != 0);
|
||||||
|
XMLTest("Dream", "DOCTYPE PLAY SYSTEM \"play.dtd\"",
|
||||||
|
doc.FirstChild()->NextSibling()->ToUnknown()->Value());
|
||||||
|
XMLTest("Dream", "And Robin shall restore amends.",
|
||||||
|
doc.LastChild()->LastChild()->LastChild()->LastChild()->LastChildElement()->GetText());
|
||||||
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
// An assert should not fire.
|
// An assert should not fire.
|
||||||
const char* xml = "<element/>";
|
const char* xml = "<element/>";
|
||||||
|
|||||||
Reference in New Issue
Block a user