diff --git a/tinyxml2.cpp b/tinyxml2.cpp
index 1d68cb0..51cf795 100644
--- a/tinyxml2.cpp
+++ b/tinyxml2.cpp
@@ -18,17 +18,29 @@ static const char CR = CARRIAGE_RETURN;
static const char SINGLE_QUOTE = '\'';
static const char DOUBLE_QUOTE = '\"';
-// Bunch of unicode info at:
-// http://www.unicode.org/faq/utf_bom.html
-// ef bb bf (Microsoft "lead bytes") - designates UTF-8
-
-static const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
-static const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
-static const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
+// Bunch of unicode info at:
+// http://www.unicode.org/faq/utf_bom.html
+// ef bb bf (Microsoft "lead bytes") - designates UTF-8
+
+static const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
+static const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
+static const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
-#define DELETE_NODE( node ) { MemPool* pool = node->memPool; node->~XMLNode(); pool->Free( node ); }
-#define DELETE_ATTRIBUTE( attrib ) { MemPool* pool = attrib->memPool; attrib->~XMLAttribute(); pool->Free( attrib ); }
+#define DELETE_NODE( node ) { \
+ if ( node ) { \
+ MemPool* pool = node->memPool; \
+ node->~XMLNode(); \
+ pool->Free( node ); \
+ } \
+}
+#define DELETE_ATTRIBUTE( attrib ) { \
+ if ( attrib ) { \
+ MemPool* pool = attrib->memPool; \
+ attrib->~XMLAttribute(); \
+ pool->Free( attrib ); \
+ } \
+}
struct Entity {
const char* pattern;
@@ -229,114 +241,114 @@ const char* XMLUtil::ReadBOM( const char* p, bool* bom )
}
-void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
-{
- const unsigned long BYTE_MASK = 0xBF;
- const unsigned long BYTE_MARK = 0x80;
- const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-
- if (input < 0x80)
- *length = 1;
- else if ( input < 0x800 )
- *length = 2;
- else if ( input < 0x10000 )
- *length = 3;
- else if ( input < 0x200000 )
- *length = 4;
- else
- { *length = 0; return; } // This code won't covert this correctly anyway.
-
- output += *length;
-
- // Scary scary fall throughs.
- switch (*length)
- {
- case 4:
- --output;
- *output = (char)((input | BYTE_MARK) & BYTE_MASK);
- input >>= 6;
- case 3:
- --output;
- *output = (char)((input | BYTE_MARK) & BYTE_MASK);
- input >>= 6;
- case 2:
- --output;
- *output = (char)((input | BYTE_MARK) & BYTE_MASK);
- input >>= 6;
- case 1:
- --output;
- *output = (char)(input | FIRST_BYTE_MARK[*length]);
- }
-}
-
-
-const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length )
-{
- // Presume an entity, and pull it out.
- *length = 0;
-
- if ( *(p+1) == '#' && *(p+2) )
- {
- unsigned long ucs = 0;
- ptrdiff_t delta = 0;
- unsigned mult = 1;
-
- if ( *(p+2) == 'x' )
- {
- // Hexadecimal.
- if ( !*(p+3) ) return 0;
-
- const char* q = p+3;
- q = strchr( q, ';' );
-
- if ( !q || !*q ) return 0;
-
- delta = q-p;
- --q;
-
- while ( *q != 'x' )
- {
- if ( *q >= '0' && *q <= '9' )
- ucs += mult * (*q - '0');
- else if ( *q >= 'a' && *q <= 'f' )
- ucs += mult * (*q - 'a' + 10);
- else if ( *q >= 'A' && *q <= 'F' )
- ucs += mult * (*q - 'A' + 10 );
- else
- return 0;
- mult *= 16;
- --q;
- }
- }
- else
- {
- // Decimal.
- if ( !*(p+2) ) return 0;
-
- const char* q = p+2;
- q = strchr( q, ';' );
-
- if ( !q || !*q ) return 0;
-
- delta = q-p;
- --q;
-
- while ( *q != '#' )
- {
- if ( *q >= '0' && *q <= '9' )
- ucs += mult * (*q - '0');
- else
- return 0;
- mult *= 10;
- --q;
- }
- }
- // convert the UCS to UTF-8
- ConvertUTF32ToUTF8( ucs, value, length );
- return p + delta + 1;
- }
- return p+1;
-}
+void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
+{
+ const unsigned long BYTE_MASK = 0xBF;
+ const unsigned long BYTE_MARK = 0x80;
+ const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+ if (input < 0x80)
+ *length = 1;
+ else if ( input < 0x800 )
+ *length = 2;
+ else if ( input < 0x10000 )
+ *length = 3;
+ else if ( input < 0x200000 )
+ *length = 4;
+ else
+ { *length = 0; return; } // This code won't covert this correctly anyway.
+
+ output += *length;
+
+ // Scary scary fall throughs.
+ switch (*length)
+ {
+ case 4:
+ --output;
+ *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+ input >>= 6;
+ case 3:
+ --output;
+ *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+ input >>= 6;
+ case 2:
+ --output;
+ *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+ input >>= 6;
+ case 1:
+ --output;
+ *output = (char)(input | FIRST_BYTE_MARK[*length]);
+ }
+}
+
+
+const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length )
+{
+ // Presume an entity, and pull it out.
+ *length = 0;
+
+ if ( *(p+1) == '#' && *(p+2) )
+ {
+ unsigned long ucs = 0;
+ ptrdiff_t delta = 0;
+ unsigned mult = 1;
+
+ if ( *(p+2) == 'x' )
+ {
+ // Hexadecimal.
+ if ( !*(p+3) ) return 0;
+
+ const char* q = p+3;
+ q = strchr( q, ';' );
+
+ if ( !q || !*q ) return 0;
+
+ delta = q-p;
+ --q;
+
+ while ( *q != 'x' )
+ {
+ if ( *q >= '0' && *q <= '9' )
+ ucs += mult * (*q - '0');
+ else if ( *q >= 'a' && *q <= 'f' )
+ ucs += mult * (*q - 'a' + 10);
+ else if ( *q >= 'A' && *q <= 'F' )
+ ucs += mult * (*q - 'A' + 10 );
+ else
+ return 0;
+ mult *= 16;
+ --q;
+ }
+ }
+ else
+ {
+ // Decimal.
+ if ( !*(p+2) ) return 0;
+
+ const char* q = p+2;
+ q = strchr( q, ';' );
+
+ if ( !q || !*q ) return 0;
+
+ delta = q-p;
+ --q;
+
+ while ( *q != '#' )
+ {
+ if ( *q >= '0' && *q <= '9' )
+ ucs += mult * (*q - '0');
+ else
+ return 0;
+ mult *= 10;
+ --q;
+ }
+ }
+ // convert the UCS to UTF-8
+ ConvertUTF32ToUTF8( ucs, value, length );
+ return p + delta + 1;
+ }
+ return p+1;
+}
char* XMLDocument::Identify( char* p, XMLNode** node )
@@ -397,6 +409,11 @@ char* XMLDocument::Identify( char* p, XMLNode** node )
returnNode = new (elementPool.Alloc()) XMLElement( this );
returnNode->memPool = &elementPool;
p += elementHeaderLen;
+
+ p = XMLUtil::SkipWhiteSpace( p );
+ if ( p && *p == '/' ) {
+ ((XMLElement*)returnNode)->closingType = XMLElement::CLOSING;
+ }
}
else {
returnNode = new (textPool.Alloc()) XMLText( this );
@@ -587,20 +604,75 @@ const XMLElement* XMLNode::LastChildElement( const char* value ) const
char* XMLNode::ParseDeep( char* p )
{
+ // This is a recursive method, but thinking about it "at the current level"
+ // it is a pretty simple flat list:
+ //
+ //
+ //
+ // With a special case:
+ //
+ //
+ //
+ //
+ // Where the closing element (/foo) *must* be the next thing after the opening
+ // element, and the names must match. BUT the tricky bit is that the closing
+ // element will be read by the child.
+
while( p && *p ) {
XMLNode* node = 0;
- p = document->Identify( p, &node );
- if ( p && node ) {
- p = node->ParseDeep( p );
+ char* mark = p;
- if ( node->IsClosingElement() ) {
- if ( !XMLUtil::StringEqual( Value(), node->Value() )) {
- document->SetError( ERROR_MISMATCHED_ELEMENT, Value(), 0 );
- }
+ p = document->Identify( p, &node );
+ if ( p == 0 ) {
+ break;
+ }
+
+ // We read the end tag. Back up and return.
+ if ( node && node->ToElement() && node->ToElement()->ClosingType() == XMLElement::CLOSING ) {
+ DELETE_NODE( node );
+ return mark;
+ }
+
+ if ( node ) {
+ p = node->ParseDeep( p );
+ if ( !p ) {
DELETE_NODE( node );
- return p;
+ node = 0;
+ break;
+ }
+
+ XMLElement* ele = node->ToElement();
+ if ( ele && ele->ClosingType() == XMLElement::OPEN ) {
+ XMLNode* closingNode = 0;
+ p = document->Identify( p, &closingNode );
+ XMLElement* closingEle = closingNode ? closingNode->ToElement() : 0;
+
+ if ( closingEle == 0 ) {
+ document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 );
+ p = 0;
+ }
+ else if ( closingEle->ClosingType() != XMLElement::CLOSING ) {
+ document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 );
+ p = 0;
+ }
+ else
+ {
+ p = closingEle->ParseDeep( p );
+ if ( !XMLUtil::StringEqual( closingEle->Value(), node->Value() )) {
+ document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 );
+ p = 0;
+ }
+ }
+ // Else everything is fine, but we need to throw away the node.
+ DELETE_NODE( closingNode );
+ if ( p == 0 ) {
+ DELETE_NODE( node );
+ node = 0;
+ }
+ }
+ if ( node ) {
+ this->InsertEndChild( node );
}
- this->InsertEndChild( node );
}
}
return 0;
@@ -736,7 +808,7 @@ char* XMLAttribute::ParseDeep( char* p )
char endTag[2] = { *p, 0 };
++p;
p = value.ParseText( p, endTag, StrPair::ATTRIBUTE_VALUE );
- if ( value.Empty() ) return 0;
+ //if ( value.Empty() ) return 0;
return p;
}
@@ -842,9 +914,8 @@ void XMLAttribute::SetAttribute( float v )
// --------- XMLElement ---------- //
XMLElement::XMLElement( XMLDocument* doc ) : XMLNode( doc ),
- closing( false ),
+ closingType( 0 ),
rootAttribute( 0 )
- //lastAttribute( 0 )
{
}
@@ -937,10 +1008,9 @@ void XMLElement::DeleteAttribute( const char* name )
}
-char* XMLElement::ParseAttributes( char* p, bool* closedElement )
+char* XMLElement::ParseAttributes( char* p )
{
const char* start = p;
- *closedElement = false;
// Read the attributes.
while( p ) {
@@ -965,11 +1035,7 @@ char* XMLElement::ParseAttributes( char* p, bool* closedElement )
}
// end of the tag
else if ( *p == '/' && *(p+1) == '>' ) {
- if ( closing ) {
- document->SetError( ERROR_PARSING_ELEMENT, start, p );
- return 0;
- }
- *closedElement = true;
+ closingType = CLOSED;
return p+2; // done; sealed element.
}
// end of the tag
@@ -1001,7 +1067,7 @@ char* XMLElement::ParseDeep( char* p )
// parsed just like a regular element then deleted from
// the DOM.
if ( *p == '/' ) {
- closing = true;
+ closingType = CLOSING;
++p;
}
@@ -1009,8 +1075,8 @@ char* XMLElement::ParseDeep( char* p )
if ( value.Empty() ) return 0;
bool elementClosed=false;
- p = ParseAttributes( p, &elementClosed );
- if ( !p || !*p || elementClosed || closing )
+ p = ParseAttributes( p );
+ if ( !p || !*p || closingType )
return p;
p = XMLNode::ParseDeep( p );
diff --git a/tinyxml2.h b/tinyxml2.h
index 7185472..0e15231 100644
--- a/tinyxml2.h
+++ b/tinyxml2.h
@@ -463,7 +463,6 @@ public:
virtual bool Accept( XMLVisitor* visitor ) const = 0;
virtual char* ParseDeep( char* );
- virtual bool IsClosingElement() const { return false; }
protected:
XMLNode( XMLDocument* );
@@ -681,7 +680,12 @@ public:
const char* GetText() const;
// internal:
- virtual bool IsClosingElement() const { return closing; }
+ enum {
+ OPEN, //
+ CLOSED, //
+ CLOSING //
+ };
+ int ClosingType() const { return closingType; }
char* ParseDeep( char* p );
private:
@@ -693,9 +697,9 @@ private:
XMLAttribute* FindAttribute( const char* name );
XMLAttribute* FindOrCreateAttribute( const char* name );
void LinkAttribute( XMLAttribute* attrib );
- char* ParseAttributes( char* p, bool *closedElement );
+ char* ParseAttributes( char* p );
- bool closing;
+ int closingType;
XMLAttribute* rootAttribute;
};
diff --git a/xmltest.cpp b/xmltest.cpp
index d244efa..40ac1a9 100644
--- a/xmltest.cpp
+++ b/xmltest.cpp
@@ -129,7 +129,7 @@ int main( int argc, const char* argv )
printf( "----------------------------------------------\n" );
}
}
-
+#if 1
{
static const char* test = "";
- XMLDocument doc;
- doc.Parse( doctype );
-
- XMLComment* comment = doc.FirstChild()->ToComment();
-
- XMLTest( "Comment formatting.", " Somewhat ", comment->Value() );
- }
- {
- // Double attributes
- const char* doctype = "";
-
- XMLDocument doc;
- doc.Parse( doctype );
-
- XMLTest( "Parsing repeated attributes.", ERROR_PARSING_ATTRIBUTE, doc.ErrorID() ); // is an error to tinyxml (didn't use to be, but caused issues)
- }
-
- {
- // Embedded null in stream.
- const char* doctype = "";
-
- XMLDocument doc;
- doc.Parse( doctype );
- XMLTest( "Embedded null throws error.", true, doc.Error() );
- }
-
- {
- // Empty documents should return TIXML_ERROR_PARSING_EMPTY, bug 1070717
- const char* str = " ";
- XMLDocument doc;
- doc.Parse( str );
- XMLTest( "Empty document error", ERROR_EMPTY_DOCUMENT, doc.ErrorID() );
- }
+ // Entities not being written correctly.
+ // From Lynn Allen
- {
- // Low entities
- XMLDocument doc;
- doc.Parse( "" );
- const char result[] = { 0x0e, 0 };
- XMLTest( "Low entities.", doc.FirstChildElement()->GetText(), result );
- doc.Print();
- }
+ const char* passages =
+ ""
+ ""
+ " "
+ "";
- {
- // Attribute values with trailing quotes not handled correctly
- XMLDocument doc;
- doc.Parse( "" );
- XMLTest( "Throw error with bad end quotes.", doc.Error(), true );
- }
+ XMLDocument doc;
+ doc.Parse( passages );
+ XMLElement* psg = doc.RootElement()->FirstChildElement();
+ const char* context = psg->Attribute( "context" );
+ const char* expected = "Line 5 has \"quotation marks\" and 'apostrophe marks'. It also has <, >, and &, as well as a fake copyright \xC2\xA9.";
- {
- // [ 1663758 ] Failure to report error on bad XML
- XMLDocument xml;
- xml.Parse("");
- XMLTest("Missing end tag at end of input", xml.Error(), true);
- xml.Parse(" ");
- XMLTest("Missing end tag with trailing whitespace", xml.Error(), true);
- xml.Parse("");
- XMLTest("Mismatched tags", xml.ErrorID(), ERROR_MISMATCHED_ELEMENT);
- }
-
-
- {
- // [ 1475201 ] TinyXML parses entities in comments
- XMLDocument xml;
- xml.Parse(""
- "" );
-
- XMLNode* e0 = xml.FirstChild();
- XMLNode* e1 = e0->NextSibling();
- XMLComment* c0 = e0->ToComment();
- XMLComment* c1 = e1->ToComment();
-
- XMLTest( "Comments ignore entities.", " declarations for & ", c0->Value(), true );
- XMLTest( "Comments ignore entities.", " far & away ", c1->Value(), true );
- }
-
- {
- XMLDocument xml;
- xml.Parse( ""
- ""
- ""
- ""
- "" );
- int count = 0;
-
- for( XMLNode* ele = xml.FirstChildElement( "Parent" )->FirstChild();
- ele;
- ele = ele->NextSibling() )
- {
- ++count;
- }
-
- XMLTest( "Comments iterate correctly.", 3, count );
- }
-
- {
- // trying to repro ]1874301]. If it doesn't go into an infinite loop, all is well.
- unsigned char buf[] = " " );
- XMLTest( "Handle end tag whitespace", false, xml.Error() );
- }
-
- {
- // This one must not result in an infinite loop
- XMLDocument xml;
- xml.Parse( "loop" );
- XMLTest( "Infinite loop test.", true, true );
- }
+ XMLTest( "Entity transformation: read. ", expected, context, true );
+ FILE* textfile = fopen( "textfile.txt", "w" );
+ if ( textfile )
+ {
+ XMLStreamer streamer( textfile );
+ psg->Accept( &streamer );
+ fclose( textfile );
+ }
+ textfile = fopen( "textfile.txt", "r" );
+ TIXMLASSERT( textfile );
+ if ( textfile )
+ {
+ char buf[ 1024 ];
+ fgets( buf, 1024, textfile );
+ XMLTest( "Entity transformation: write. ",
+ "\n",
+ buf, false );
+ }
+ fclose( textfile );
+ }
+
+ {
+ const char* test = "";
+
+ XMLDocument doc;
+ doc.Parse( test );
+ XMLTest( "dot in names", doc.Error(), 0);
+ XMLTest( "dot in names", doc.FirstChildElement()->Name(), "a.elem" );
+ XMLTest( "dot in names", doc.FirstChildElement()->Attribute( "xmi.version" ), "2.0" );
+ }
+
+ {
+ const char* test = "1.1 Start easy ignore fin thickness
";
+
+ XMLDocument doc;
+ doc.Parse( test );
+
+ XMLText* text = doc.FirstChildElement()->FirstChildElement()->FirstChild()->ToText();
+ XMLTest( "Entity with one digit.",
+ text->Value(), "1.1 Start easy ignore fin thickness\n",
+ false );
+ }
+
+ {
+ // DOCTYPE not preserved (950171)
+ //
+ const char* doctype =
+ ""
+ ""
+ ""
+ ""
+ "";
+
+ XMLDocument doc;
+ doc.Parse( doctype );
+ doc.SaveFile( "test7.xml" );
+ doc.DeleteChild( doc.RootElement() );
+ doc.LoadFile( "test7.xml" );
+ doc.Print();
+
+ const XMLUnknown* decl = doc.FirstChild()->NextSibling()->ToUnknown();
+ XMLTest( "Correct value of unknown.", "DOCTYPE PLAY SYSTEM 'play.dtd'", decl->Value() );
+
+ }
+
+ {
+ // Comments do not stream out correctly.
+ const char* doctype =
+ "";
+ XMLDocument doc;
+ doc.Parse( doctype );
+
+ XMLComment* comment = doc.FirstChild()->ToComment();
+
+ XMLTest( "Comment formatting.", " Somewhat ", comment->Value() );
+ }
+ {
+ // Double attributes
+ const char* doctype = "";
+
+ XMLDocument doc;
+ doc.Parse( doctype );
+
+ XMLTest( "Parsing repeated attributes.", ERROR_PARSING_ATTRIBUTE, doc.ErrorID() ); // is an error to tinyxml (didn't use to be, but caused issues)
+ }
+
+ {
+ // Embedded null in stream.
+ const char* doctype = "";
+
+ XMLDocument doc;
+ doc.Parse( doctype );
+ XMLTest( "Embedded null throws error.", true, doc.Error() );
+ }
+
+ {
+ // Empty documents should return TIXML_ERROR_PARSING_EMPTY, bug 1070717
+ const char* str = " ";
+ XMLDocument doc;
+ doc.Parse( str );
+ XMLTest( "Empty document error", ERROR_EMPTY_DOCUMENT, doc.ErrorID() );
+ }
+
+ {
+ // Low entities
+ XMLDocument doc;
+ doc.Parse( "" );
+ const char result[] = { 0x0e, 0 };
+ XMLTest( "Low entities.", doc.FirstChildElement()->GetText(), result );
+ doc.Print();
+ }
+
+ {
+ // Attribute values with trailing quotes not handled correctly
+ XMLDocument doc;
+ doc.Parse( "" );
+ XMLTest( "Throw error with bad end quotes.", doc.Error(), true );
+ }
+
+ {
+ // [ 1663758 ] Failure to report error on bad XML
+ XMLDocument xml;
+ xml.Parse("");
+ XMLTest("Missing end tag at end of input", xml.Error(), true);
+ xml.Parse(" ");
+ XMLTest("Missing end tag with trailing whitespace", xml.Error(), true);
+ xml.Parse("");
+ XMLTest("Mismatched tags", xml.ErrorID(), ERROR_MISMATCHED_ELEMENT);
+ }
+
+
+ {
+ // [ 1475201 ] TinyXML parses entities in comments
+ XMLDocument xml;
+ xml.Parse(""
+ "" );
+
+ XMLNode* e0 = xml.FirstChild();
+ XMLNode* e1 = e0->NextSibling();
+ XMLComment* c0 = e0->ToComment();
+ XMLComment* c1 = e1->ToComment();
+
+ XMLTest( "Comments ignore entities.", " declarations for & ", c0->Value(), true );
+ XMLTest( "Comments ignore entities.", " far & away ", c1->Value(), true );
+ }
+
+ {
+ XMLDocument xml;
+ xml.Parse( ""
+ ""
+ ""
+ ""
+ "" );
+ xml.Print();
+
+ int count = 0;
+
+ for( XMLNode* ele = xml.FirstChildElement( "Parent" )->FirstChild();
+ ele;
+ ele = ele->NextSibling() )
+ {
+ ++count;
+ }
+
+ XMLTest( "Comments iterate correctly.", 3, count );
+ }
+
+ {
+ // trying to repro ]1874301]. If it doesn't go into an infinite loop, all is well.
+ unsigned char buf[] = " " );
+ XMLTest( "Handle end tag whitespace", false, xml.Error() );
+ }
+
+ {
+ // This one must not result in an infinite loop
+ XMLDocument xml;
+ xml.Parse( "loop" );
+ XMLTest( "Infinite loop test.", true, true );
+ }
+#endif
#if defined( WIN32 )
_CrtMemCheckpoint( &endMemState );
//_CrtMemDumpStatistics( &endMemState );