new element loop

2012-02-23 22:27:28 -08:00
parent d627776dd3
commit 46a14cfec7
3 changed files with 481 additions and 409 deletions
--- a/tinyxml2.cpp
+++ b/tinyxml2.cpp
@@ -18,17 +18,29 @@ static const char CR = CARRIAGE_RETURN;
 static const char SINGLE_QUOTE			= '\'';
 static const char DOUBLE_QUOTE			= '\"';

-// Bunch of unicode info at:
-//		http://www.unicode.org/faq/utf_bom.html
-//	ef bb bf (Microsoft "lead bytes") - designates UTF-8
-
-static const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
-static const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
-static const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
+// Bunch of unicode info at:
+//		http://www.unicode.org/faq/utf_bom.html
+//	ef bb bf (Microsoft "lead bytes") - designates UTF-8
+
+static const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
+static const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
+static const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;


-#define DELETE_NODE( node ) { MemPool* pool = node->memPool; node->~XMLNode(); pool->Free( node ); }
-#define DELETE_ATTRIBUTE( attrib ) { MemPool* pool = attrib->memPool; attrib->~XMLAttribute(); pool->Free( attrib ); }
+#define DELETE_NODE( node )	{			\
+	if ( node ) {						\
+		MemPool* pool = node->memPool;	\
+		node->~XMLNode();				\
+		pool->Free( node );				\
+	}									\
+}
+#define DELETE_ATTRIBUTE( attrib ) {		\
+	if ( attrib ) {							\
+		MemPool* pool = attrib->memPool;	\
+		attrib->~XMLAttribute();			\
+		pool->Free( attrib );				\
+	}										\
+}

 struct Entity {
 	const char* pattern;
@@ -229,114 +241,114 @@ const char* XMLUtil::ReadBOM( const char* p, bool* bom )
 }


-void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
-{
-	const unsigned long BYTE_MASK = 0xBF;
-	const unsigned long BYTE_MARK = 0x80;
-	const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-
-	if (input < 0x80) 
-		*length = 1;
-	else if ( input < 0x800 )
-		*length = 2;
-	else if ( input < 0x10000 )
-		*length = 3;
-	else if ( input < 0x200000 )
-		*length = 4;
-	else
-		{ *length = 0; return; }	// This code won't covert this correctly anyway.
-
-	output += *length;
-
-	// Scary scary fall throughs.
-	switch (*length) 
-	{
-		case 4:
-			--output; 
-			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 
-			input >>= 6;
-		case 3:
-			--output; 
-			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 
-			input >>= 6;
-		case 2:
-			--output; 
-			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 
-			input >>= 6;
-		case 1:
-			--output; 
-			*output = (char)(input | FIRST_BYTE_MARK[*length]);
-	}
-}
-
-
-const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length )
-{
-	// Presume an entity, and pull it out.
-	*length = 0;
-
-	if ( *(p+1) == '#' && *(p+2) )
-	{
-		unsigned long ucs = 0;
-		ptrdiff_t delta = 0;
-		unsigned mult = 1;
-
-		if ( *(p+2) == 'x' )
-		{
-			// Hexadecimal.
-			if ( !*(p+3) ) return 0;
-
-			const char* q = p+3;
-			q = strchr( q, ';' );
-
-			if ( !q || !*q ) return 0;
-
-			delta = q-p;
-			--q;
-
-			while ( *q != 'x' )
-			{
-				if ( *q >= '0' && *q <= '9' )
-					ucs += mult * (*q - '0');
-				else if ( *q >= 'a' && *q <= 'f' )
-					ucs += mult * (*q - 'a' + 10);
-				else if ( *q >= 'A' && *q <= 'F' )
-					ucs += mult * (*q - 'A' + 10 );
-				else 
-					return 0;
-				mult *= 16;
-				--q;
-			}
-		}
-		else
-		{
-			// Decimal.
-			if ( !*(p+2) ) return 0;
-
-			const char* q = p+2;
-			q = strchr( q, ';' );
-
-			if ( !q || !*q ) return 0;
-
-			delta = q-p;
-			--q;
-
-			while ( *q != '#' )
-			{
-				if ( *q >= '0' && *q <= '9' )
-					ucs += mult * (*q - '0');
-				else 
-					return 0;
-				mult *= 10;
-				--q;
-			}
-		}
-		// convert the UCS to UTF-8
-		ConvertUTF32ToUTF8( ucs, value, length );
-		return p + delta + 1;
-	}
-	return p+1;
-}
+void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
+{
+	const unsigned long BYTE_MASK = 0xBF;
+	const unsigned long BYTE_MARK = 0x80;
+	const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+	if (input < 0x80) 
+		*length = 1;
+	else if ( input < 0x800 )
+		*length = 2;
+	else if ( input < 0x10000 )
+		*length = 3;
+	else if ( input < 0x200000 )
+		*length = 4;
+	else
+		{ *length = 0; return; }	// This code won't covert this correctly anyway.
+
+	output += *length;
+
+	// Scary scary fall throughs.
+	switch (*length) 
+	{
+		case 4:
+			--output; 
+			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 
+			input >>= 6;
+		case 3:
+			--output; 
+			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 
+			input >>= 6;
+		case 2:
+			--output; 
+			*output = (char)((input | BYTE_MARK) & BYTE_MASK); 
+			input >>= 6;
+		case 1:
+			--output; 
+			*output = (char)(input | FIRST_BYTE_MARK[*length]);
+	}
+}
+
+
+const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length )
+{
+	// Presume an entity, and pull it out.
+	*length = 0;
+
+	if ( *(p+1) == '#' && *(p+2) )
+	{
+		unsigned long ucs = 0;
+		ptrdiff_t delta = 0;
+		unsigned mult = 1;
+
+		if ( *(p+2) == 'x' )
+		{
+			// Hexadecimal.
+			if ( !*(p+3) ) return 0;
+
+			const char* q = p+3;
+			q = strchr( q, ';' );
+
+			if ( !q || !*q ) return 0;
+
+			delta = q-p;
+			--q;
+
+			while ( *q != 'x' )
+			{
+				if ( *q >= '0' && *q <= '9' )
+					ucs += mult * (*q - '0');
+				else if ( *q >= 'a' && *q <= 'f' )
+					ucs += mult * (*q - 'a' + 10);
+				else if ( *q >= 'A' && *q <= 'F' )
+					ucs += mult * (*q - 'A' + 10 );
+				else 
+					return 0;
+				mult *= 16;
+				--q;
+			}
+		}
+		else
+		{
+			// Decimal.
+			if ( !*(p+2) ) return 0;
+
+			const char* q = p+2;
+			q = strchr( q, ';' );
+
+			if ( !q || !*q ) return 0;
+
+			delta = q-p;
+			--q;
+
+			while ( *q != '#' )
+			{
+				if ( *q >= '0' && *q <= '9' )
+					ucs += mult * (*q - '0');
+				else 
+					return 0;
+				mult *= 10;
+				--q;
+			}
+		}
+		// convert the UCS to UTF-8
+		ConvertUTF32ToUTF8( ucs, value, length );
+		return p + delta + 1;
+	}
+	return p+1;
+}


 char* XMLDocument::Identify( char* p, XMLNode** node ) 
@@ -397,6 +409,11 @@ char* XMLDocument::Identify( char* p, XMLNode** node )
 		returnNode = new (elementPool.Alloc()) XMLElement( this );
 		returnNode->memPool = &elementPool;
 		p += elementHeaderLen;
+
+		p = XMLUtil::SkipWhiteSpace( p );
+		if ( p && *p == '/' ) {
+			((XMLElement*)returnNode)->closingType = XMLElement::CLOSING;
+		}
 	}
 	else {
 		returnNode = new (textPool.Alloc()) XMLText( this );
@@ -587,20 +604,75 @@ const XMLElement* XMLNode::LastChildElement( const char* value ) const

 char* XMLNode::ParseDeep( char* p )
 {
+	// This is a recursive method, but thinking about it "at the current level"
+	// it is a pretty simple flat list:
+	//		<foo/>
+	//		<!-- comment -->
+	//
+	// With a special case:
+	//		<foo>
+	//		</foo>
+	//		<!-- comment -->
+	//		
+	// Where the closing element (/foo) *must* be the next thing after the opening
+	// element, and the names must match. BUT the tricky bit is that the closing
+	// element will be read by the child.
+
 	while( p && *p ) {
 		XMLNode* node = 0;
-		p = document->Identify( p, &node );
-		if ( p && node ) {
-			p = node->ParseDeep( p );
+		char* mark = p;

-			if ( node->IsClosingElement() ) {
-				if ( !XMLUtil::StringEqual( Value(), node->Value() )) {
-					document->SetError( ERROR_MISMATCHED_ELEMENT, Value(), 0 );
-				}
+		p = document->Identify( p, &node );
+		if ( p == 0 ) {
+			break;
+		}
+
+		// We read the end tag. Back up and return.
+		if ( node && node->ToElement() && node->ToElement()->ClosingType() == XMLElement::CLOSING ) {
+			DELETE_NODE( node );
+			return mark;
+		}
+
+		if ( node ) {
+			p = node->ParseDeep( p );
+			if ( !p ) {
 				DELETE_NODE( node );
-				return p;
+				node = 0;
+				break;
+			}
+	
+			XMLElement* ele = node->ToElement();
+			if ( ele && ele->ClosingType() == XMLElement::OPEN ) {
+				XMLNode* closingNode = 0;
+				p = document->Identify( p, &closingNode );
+				XMLElement* closingEle = closingNode ? closingNode->ToElement() : 0;
+
+				if ( closingEle == 0 ) {
+					document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 );
+					p = 0;
+				}
+				else if ( closingEle->ClosingType() != XMLElement::CLOSING ) {
+					document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 );
+					p = 0;
+				}
+				else 
+				{
+					p = closingEle->ParseDeep( p );
+					if ( !XMLUtil::StringEqual( closingEle->Value(), node->Value() )) { 
+						document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 );
+						p = 0;
+					}
+				}
+				// Else everything is fine, but we need to throw away the node.
+				DELETE_NODE( closingNode );
+				if ( p == 0 ) {
+					DELETE_NODE( node );
+					node = 0;
+				}
+			}
+			if ( node ) {
+				this->InsertEndChild( node );
 			}
-			this->InsertEndChild( node );
 		}
 	}
 	return 0;
@@ -736,7 +808,7 @@ char* XMLAttribute::ParseDeep( char* p )
 	char endTag[2] = { *p, 0 };
 	++p;
 	p = value.ParseText( p, endTag, StrPair::ATTRIBUTE_VALUE );
-	if ( value.Empty() ) return 0;
+	//if ( value.Empty() ) return 0;
 	return p;
 }

@@ -842,9 +914,8 @@ void XMLAttribute::SetAttribute( float v )

 // --------- XMLElement ---------- //
 XMLElement::XMLElement( XMLDocument* doc ) : XMLNode( doc ),
-	closing( false ),
+	closingType( 0 ),
 	rootAttribute( 0 )
-	//lastAttribute( 0 )
 {
 }

@@ -937,10 +1008,9 @@ void XMLElement::DeleteAttribute( const char* name )
 }


-char* XMLElement::ParseAttributes( char* p, bool* closedElement )
+char* XMLElement::ParseAttributes( char* p )
 {
 	const char* start = p;
-	*closedElement = false;

 	// Read the attributes.
 	while( p ) {
@@ -965,11 +1035,7 @@ char* XMLElement::ParseAttributes( char* p, bool* closedElement )
 		}
 		// end of the tag
 		else if ( *p == '/' && *(p+1) == '>' ) {
-			if ( closing ) {
-				document->SetError( ERROR_PARSING_ELEMENT, start, p );
-				return 0;
-			}
-			*closedElement = true;
+			closingType = CLOSED;
 			return p+2;	// done; sealed element.
 		}
 		// end of the tag
@@ -1001,7 +1067,7 @@ char* XMLElement::ParseDeep( char* p )
 	// parsed just like a regular element then deleted from
 	// the DOM.
 	if ( *p == '/' ) {
-		closing = true;
+		closingType = CLOSING;
 		++p;
 	}

@@ -1009,8 +1075,8 @@ char* XMLElement::ParseDeep( char* p )
 	if ( value.Empty() ) return 0;

 	bool elementClosed=false;
-	p = ParseAttributes( p, &elementClosed );
-	if ( !p || !*p || elementClosed || closing ) 
+	p = ParseAttributes( p );
+	if ( !p || !*p || closingType ) 
 		return p;

 	p = XMLNode::ParseDeep( p );