From 23e7ae608548b03136c67981830af244b8a7073a Mon Sep 17 00:00:00 2001 From: Martinsh Shaiters Date: Sat, 26 Jan 2013 20:15:44 +0200 Subject: [PATCH 1/3] Add test for lead non-alpha characters in attribute names. --- xmltest.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xmltest.cpp b/xmltest.cpp index 8a13382..3354471 100644 --- a/xmltest.cpp +++ b/xmltest.cpp @@ -1033,6 +1033,13 @@ int main( int /*argc*/, const char ** /*argv*/ ) doc.Parse( xml ); XMLTest( "Non-alpha element lead letter parses.", doc.Error(), false ); } + + { + const char* xml = ""; + XMLDocument doc; + doc.Parse(xml); + XMLTest("Non-alpha attribute lead character parses.", doc.Error(), false); + } { const char* xml = "WOA THIS ISN'T GOING TO PARSE"; From c6d02f48d57480b89774ceb28806299e1ec360ff Mon Sep 17 00:00:00 2001 From: Martinsh Shaiters Date: Sat, 26 Jan 2013 21:22:57 +0200 Subject: [PATCH 2/3] Implement new utility functions for testing validity of name and token characters. --- tinyxml2.cpp | 9 ++------- tinyxml2.h | 20 ++++++++++++++------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/tinyxml2.cpp b/tinyxml2.cpp index 647901b..2c06a46 100755 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -136,12 +136,7 @@ char* StrPair::ParseName( char* p ) return 0; } - while( *p && ( - XMLUtil::IsAlphaNum( (unsigned char) *p ) - || *p == '_' - || *p == ':' - || (*p == '-' && p>start ) // can be in a name, but not lead it. - || (*p == '.' && p>start ) )) { // can be in a name, but not lead it. + while( *p && ( p == start ? XMLUtil::IsNameStartChar( *p ) : XMLUtil::IsNameChar( *p ) )) { ++p; } @@ -1357,7 +1352,7 @@ char* XMLElement::ParseAttributes( char* p ) } // attribute. - if ( XMLUtil::IsAlpha( *p ) ) { + if (XMLUtil::IsNameStartChar( *p ) ) { XMLAttribute* attrib = new (_document->_attributePool.Alloc() ) XMLAttribute(); attrib->_memPool = &_document->_attributePool; attrib->_memPool->SetTracked(); diff --git a/tinyxml2.h b/tinyxml2.h index 0f8e9ad..698412a 100755 --- a/tinyxml2.h +++ b/tinyxml2.h @@ -464,6 +464,19 @@ public: static bool IsWhiteSpace( char p ) { return !IsUTF8Continuation(p) && isspace( static_cast(p) ); } + + inline static bool IsNameStartChar( unsigned char ch ) { + return ( ( ch < 128 ) ? isalpha( ch ) : 1 ) + || ch == ':' + || ch == '_'; + } + + inline static bool IsNameChar( unsigned char ch ) { + return IsNameStartChar( ch ) + || isdigit( ch ) + || ch == '.' + || ch == '-'; + } inline static bool StringEqual( const char* p, const char* q, int nChar=INT_MAX ) { int n = 0; @@ -480,15 +493,10 @@ public: } return false; } + inline static int IsUTF8Continuation( const char p ) { return p & 0x80; } - inline static int IsAlphaNum( unsigned char anyByte ) { - return ( anyByte < 128 ) ? isalnum( anyByte ) : 1; - } - inline static int IsAlpha( unsigned char anyByte ) { - return ( anyByte < 128 ) ? isalpha( anyByte ) : 1; - } static const char* ReadBOM( const char* p, bool* hasBOM ); // p is the starting location, From 95b3e6543b1ad576d6722c487d7347f76f79eeac Mon Sep 17 00:00:00 2001 From: Martinsh Shaiters Date: Sat, 26 Jan 2013 23:08:10 +0200 Subject: [PATCH 3/3] Add test case for element names with leading digit. --- xmltest.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/xmltest.cpp b/xmltest.cpp index 3354471..7d4ca9d 100644 --- a/xmltest.cpp +++ b/xmltest.cpp @@ -1037,9 +1037,16 @@ int main( int /*argc*/, const char ** /*argv*/ ) { const char* xml = ""; XMLDocument doc; - doc.Parse(xml); + doc.Parse( xml ); XMLTest("Non-alpha attribute lead character parses.", doc.Error(), false); } + + { + const char* xml = "<3lement>"; + XMLDocument doc; + doc.Parse( xml ); + XMLTest("Element names with lead digit fail to parse.", doc.Error(), true); + } { const char* xml = "WOA THIS ISN'T GOING TO PARSE";