Merge pull request #960 from leethomason/kcsaul-pedantic-whitespace

Integrate branch with Pedantic whitespace
This commit is contained in:
Lee Thomason
2023-11-21 12:12:41 -08:00
committed by GitHub
5 changed files with 206 additions and 15 deletions

View File

@@ -6,7 +6,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
os: [ windows-2019, macos-10.15, ubuntu-20.04 ] os: [ windows-2019, macos-latest, ubuntu-20.04 ]
cmake: [ 3.15, 3.x ] cmake: [ 3.15, 3.x ]
include: include:
- os: windows-2019 - os: windows-2019
@@ -17,7 +17,7 @@ jobs:
- os: ubuntu-20.04 - os: ubuntu-20.04
tree: tree tree: tree
- os: macos-10.15 - os: macos-latest
tree: find tree: find
- cmake: 3.15 - cmake: 3.15

View File

@@ -1,9 +1,7 @@
TinyXML-2 TinyXML-2
========= =========
![Build](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml/badge.svg) [![Test](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml/badge.svg)](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml)
![TinyXML-2 Logo](http://www.grinninglizard.com/tinyxml2/TinyXML2_small.png)
TinyXML-2 is a simple, small, efficient, C++ XML parser that can be TinyXML-2 is a simple, small, efficient, C++ XML parser that can be
easily integrated into other programs. easily integrated into other programs.
@@ -93,7 +91,7 @@ by the Document. When the Document is deleted, so are all the nodes it contains.
### White Space ### White Space
#### Whitespace Preservation (default) #### Whitespace Preservation (default, PRESERVE_WHITESPACE)
Microsoft has an excellent article on white space: http://msdn.microsoft.com/en-us/library/ms256097.aspx Microsoft has an excellent article on white space: http://msdn.microsoft.com/en-us/library/ms256097.aspx
@@ -125,7 +123,7 @@ valuable. TinyXML-2 sees these as the same XML:
<document><data>1</data><data>2</data><data>3</data></document> <document><data>1</data><data>2</data><data>3</data></document>
#### Whitespace Collapse #### Whitespace Collapse (COLLAPSE_WHITESPACE)
For some applications, it is preferable to collapse whitespace. Collapsing For some applications, it is preferable to collapse whitespace. Collapsing
whitespace gives you "HTML-like" behavior, which is sometimes more suitable whitespace gives you "HTML-like" behavior, which is sometimes more suitable
@@ -143,7 +141,15 @@ However, you may also use COLLAPSE_WHITESPACE, which will:
Note that (currently) there is a performance impact for using COLLAPSE_WHITESPACE. Note that (currently) there is a performance impact for using COLLAPSE_WHITESPACE.
It essentially causes the XML to be parsed twice. It essentially causes the XML to be parsed twice.
#### Error Reporting #### Pedantic Whitespace (PEDANTIC_WHITESPACE)
For applications that need to know about text nodes that are composed entirely of
whitespace, PEDANTIC_WHITESPACE is available. PEDANTIC_WHITESPACE maintains all the
whilespace between elements.
PEDANTIC_WHITESPACE is a new mode and not as tested as the other whitespace modes.
### Error Reporting
TinyXML-2 reports the line number of any errors in an XML document that TinyXML-2 reports the line number of any errors in an XML document that
cannot be parsed correctly. In addition, all nodes (elements, declarations, cannot be parsed correctly. In addition, all nodes (elements, declarations,

View File

@@ -715,7 +715,7 @@ bool XMLUtil::ToUnsigned64(const char* str, uint64_t* value) {
} }
char* XMLDocument::Identify( char* p, XMLNode** node ) char* XMLDocument::Identify( char* p, XMLNode** node, bool first )
{ {
TIXMLASSERT( node ); TIXMLASSERT( node );
TIXMLASSERT( p ); TIXMLASSERT( p );
@@ -767,10 +767,20 @@ char* XMLDocument::Identify( char* p, XMLNode** node )
p += dtdHeaderLen; p += dtdHeaderLen;
} }
else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) { else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) {
// Preserve whitespace pedantically before closing tag, when it's immediately after opening tag
if (WhitespaceMode() == PEDANTIC_WHITESPACE && first && p != start && *(p + elementHeaderLen) == '/') {
returnNode = CreateUnlinkedNode<XMLText>(_textPool);
returnNode->_parseLineNum = startLine;
p = start; // Back it up, all the text counts.
_parseCurLineNum = startLine;
}
else {
returnNode = CreateUnlinkedNode<XMLElement>(_elementPool); returnNode = CreateUnlinkedNode<XMLElement>(_elementPool);
returnNode->_parseLineNum = _parseCurLineNum; returnNode->_parseLineNum = _parseCurLineNum;
p += elementHeaderLen; p += elementHeaderLen;
} }
}
else { else {
returnNode = CreateUnlinkedNode<XMLText>( _textPool ); returnNode = CreateUnlinkedNode<XMLText>( _textPool );
returnNode->_parseLineNum = _parseCurLineNum; // Report line of first non-whitespace character returnNode->_parseLineNum = _parseCurLineNum; // Report line of first non-whitespace character
@@ -1098,14 +1108,16 @@ char* XMLNode::ParseDeep( char* p, StrPair* parentEndTag, int* curLineNumPtr )
if (_document->Error()) if (_document->Error())
return 0; return 0;
bool first = true;
while( p && *p ) { while( p && *p ) {
XMLNode* node = 0; XMLNode* node = 0;
p = _document->Identify( p, &node ); p = _document->Identify( p, &node, first );
TIXMLASSERT( p ); TIXMLASSERT( p );
if ( node == 0 ) { if ( node == 0 ) {
break; break;
} }
first = false;
const int initialLineNum = node->_parseLineNum; const int initialLineNum = node->_parseLineNum;

View File

@@ -1710,7 +1710,8 @@ private:
enum Whitespace { enum Whitespace {
PRESERVE_WHITESPACE, PRESERVE_WHITESPACE,
COLLAPSE_WHITESPACE COLLAPSE_WHITESPACE,
PEDANTIC_WHITESPACE
}; };
@@ -1921,7 +1922,7 @@ public:
void DeepCopy(XMLDocument* target) const; void DeepCopy(XMLDocument* target) const;
// internal // internal
char* Identify( char* p, XMLNode** node ); char* Identify( char* p, XMLNode** node, bool first );
// internal // internal
void MarkInUse(const XMLNode* const); void MarkInUse(const XMLNode* const);

View File

@@ -1869,6 +1869,178 @@ int main( int argc, const char ** argv )
XMLTest( "Whitespace all space", true, 0 == doc.FirstChildElement()->FirstChild() ); XMLTest( "Whitespace all space", true, 0 == doc.FirstChildElement()->FirstChild() );
} }
// ----------- Preserve Whitespace ------------
{
const char* xml = "<element>This is &apos; \n\n text &apos;</element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", "This is ' \n\n text '", doc.FirstChildElement()->GetText());
}
{
const char* xml = "<element> This \nis &apos; text &apos; </element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", " This \nis ' text ' ", doc.FirstChildElement()->GetText());
}
{
const char* xml = "<element> \n This is &apos; text &apos; \n</element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", " \n This is ' text ' \n", doc.FirstChildElement()->GetText());
}
// Following cases are for text that is all whitespace which are not preserved intentionally
{
const char* xml = "<element> </element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
}
{
const char* xml = "<element> </element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
}
{
const char* xml = "<element>\n\n</element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
}
{
const char* xml = "<element> \n</element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
}
{
const char* xml = "<element> \n \n </element>";
XMLDocument doc(true, PRESERVE_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with whitespace preserved", false, doc.Error());
XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
}
// ----------- Pedantic Whitespace ------------
{
const char* xml = "<element>This is &apos; \n\n text &apos;</element>";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", "This is ' \n\n text '", doc.FirstChildElement()->GetText());
}
{
const char* xml = "<element> This \nis &apos; text &apos; </element>";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " This \nis ' text ' ", doc.FirstChildElement()->GetText());
}
{
const char* xml = "<element> \n This is &apos; text &apos; \n</element>";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " \n This is ' text ' \n", doc.FirstChildElement()->GetText());
}
// Following cases are for text that is all whitespace which is preserved with pedantic mode
{
const char* xml = "<element> </element>";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText());
}
{
const char* xml = "<element> </element>";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText());
}
{
const char* xml = "<element>\n\n</element>\n";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", "\n\n", doc.FirstChildElement()->GetText());
}
{
const char* xml = "<element> \n</element> \n ";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " \n", doc.FirstChildElement()->GetText());
}
{
const char* xml = "<element> \n \n </element> ";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " \n \n ", doc.FirstChildElement()->GetText());
}
// Following cases are for checking nested elements are still parsed with pedantic whitespace
{
const char* xml = "<element>\n\t<a> This is nested text </a>\n</element> ";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " This is nested text ", doc.RootElement()->FirstChildElement()->GetText());
}
{
const char* xml = "<element> <b> </b> </element>\n";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", " ", doc.RootElement()->FirstChildElement()->GetText());
}
{
const char* xml = "<element> <c attribute=\"test\"/> </element>\n ";
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.Parse(xml);
XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
XMLTest("Pedantic whitespace", true, 0 == doc.RootElement()->FirstChildElement()->GetText());
}
// Check sample xml can be parsed with pedantic mode
{
XMLDocument doc(true, PEDANTIC_WHITESPACE);
doc.LoadFile("resources/dream.xml");
XMLTest("Load dream.xml with pedantic whitespace mode", false, doc.Error());
XMLTest("Dream", "xml version=\"1.0\"",
doc.FirstChild()->ToDeclaration()->Value());
XMLTest("Dream", true, doc.FirstChild()->NextSibling()->ToUnknown() != 0);
XMLTest("Dream", "DOCTYPE PLAY SYSTEM \"play.dtd\"",
doc.FirstChild()->NextSibling()->ToUnknown()->Value());
XMLTest("Dream", "And Robin shall restore amends.",
doc.LastChild()->LastChild()->LastChild()->LastChild()->LastChildElement()->GetText());
}
{ {
// An assert should not fire. // An assert should not fire.
const char* xml = "<element/>"; const char* xml = "<element/>";