From e4422304e0bd27826e333be2dfe34953fbfd1f4a Mon Sep 17 00:00:00 2001 From: Lee Thomason Date: Fri, 20 Jan 2012 17:59:50 -0800 Subject: [PATCH] refactored logic to use the StrPair. Still cleaning up bugs. --- tinyxml2.cpp | 128 +++++++++++++++++++++++++++------------------------ tinyxml2.h | 16 ++++--- tinyxml2.suo | Bin 29184 -> 29184 bytes 3 files changed, 78 insertions(+), 66 deletions(-) diff --git a/tinyxml2.cpp b/tinyxml2.cpp index 091f328..fe1665b 100644 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -7,10 +7,12 @@ using namespace tinyxml2; -static const char LINE_FEED = (char)0x0a; // all line endings are normalized to LF +static const char LINE_FEED = (char)0x0a; // all line endings are normalized to LF static const char LF = LINE_FEED; static const char CARRIAGE_RETURN = (char)0x0d; // CR gets filtered out static const char CR = CARRIAGE_RETURN; +static const char SINGLE_QUOTE = '\''; +static const char DOUBLE_QUOTE = '\"'; // --------- CharBuffer ----------- // @@ -31,60 +33,66 @@ static const char CR = CARRIAGE_RETURN; } +const char* StrPair::GetStr() +{ + if ( flags & NEEDS_FLUSH ) { + *end = 0; + + if ( flags & ( NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION ) ) { + char* p = start; + char* q = start; + + while( p < end ) { + if ( *p == CR ) { + // CR-LF pair becomes LF + // CR alone becomes LF + // LF-CR becomes LF + if ( *(p+1) == LF ) { + p += 2; + } + else { + ++p; + } + *q = LF; + } + else if ( *p == LF ) { + if ( *(p+1) == CR ) { + p += 2; + } + else { + ++p; + } + *q = LF; + } + else { + *q = *p; + ++p; + } + } + } + flags = 0; + } + return start; +} + + // --------- XMLBase ----------- // -const char* XMLBase::ParseText( char* p, const char* endTag, char** next ) +char* XMLBase::ParseText( char* p, StrPair* pair, const char* endTag ) { TIXMLASSERT( endTag && *endTag ); char* start = p; - char* q = p; // q (target) <= p (src) in same buffer. char endChar = *endTag; int length = strlen( endTag ); - char* nextTag = 0; - *next = 0; // Inner loop of text parsing. while ( *p ) { if ( *p == endChar && strncmp( p, endTag, length ) == 0 ) { - *q = 0; - nextTag = p + length; + pair->Set( start, p, StrPair::NEEDS_ENTITY_PROCESSING | StrPair::NEEDS_NEWLINE_NORMALIZATION ); break; } - else if ( *p == CR ) { - // CR-LF pair becomes LF - // CR alone becomes LF - // LF-CR becomes LF - if ( *(p+1) == LF ) { - p += 2; - } - else { - ++p; - } - *q = LF; - } - else if ( *p == LF ) { - if ( *(p+1) == CR ) { - p += 2; - } - else { - ++p; - } - *q = LF; - } - else { - *q = *p; - ++p; - } - ++q; } - - // Error? If we don't have a text tag, something went wrong. (Although - // what the nextTag points at may be null.) - if ( nextTag == 0 ) { - return 0; - } - *next = nextTag; - return start; + return p; } @@ -92,7 +100,6 @@ char* XMLBase::ParseName( char* p, StrPair* pair ) { char* start = p; char* nextTag = 0; - *next = 0; start = p; if ( !start || !(*start) ) { @@ -112,11 +119,10 @@ char* XMLBase::ParseName( char* p, StrPair* pair ) { ++p; } - *p = 0; if ( p > start ) { - *next = p+1; - return start; + pair->Set( start, p, 0 ); + return p; } return 0; } @@ -241,7 +247,7 @@ void XMLNode::PrintSpace( FILE* fp, int depth ) // --------- XMLComment ---------- // -XMLComment::XMLComment( XMLDocument* doc ) : XMLNode( doc ), value( 0 ) +XMLComment::XMLComment( XMLDocument* doc ) : XMLNode( doc ) { } @@ -261,8 +267,7 @@ void XMLComment::Print( FILE* fp, int depth ) char* XMLComment::ParseDeep( char* p ) { // Comment parses as text. - value = ParseText( p, "-->", &p ); - return p; + return ParseText( p, &value, "-->" ); } @@ -271,8 +276,8 @@ char* XMLAttribute::ParseDeep( char* p ) { char endTag[2] = { *p, 0 }; ++p; - value = ParseText( p, endTag, &p ); - if ( !value ) return 0; + p = ParseText( p, &value, endTag ); + if ( value.Empty() ) return 0; return p; } @@ -285,7 +290,6 @@ void XMLAttribute::Print( FILE* cfile ) // --------- XMLElement ---------- // XMLElement::XMLElement( XMLDocument* doc ) : XMLNode( doc ), - name( 0 ), closing( false ), rootAttribute( 0 ), lastAttribute( 0 ) @@ -326,25 +330,24 @@ char* XMLElement::ParseDeep( char* p ) ++p; } - name = ParseName( p, &p ); - if ( !name ) return 0; + p = ParseName( p, &name ); + if ( name.Empty() ) return 0; // Read the attributes. while( p ) { p = SkipWhiteSpace( p ); if ( !p || !(*p) ) { - document->SetError( XMLDocument::ERROR_PARSING_ELEMENT, start, name ); + document->SetError( XMLDocument::ERROR_PARSING_ELEMENT, start, name.GetStr() ); return 0; } - const char* saveP = p; // attribute. - if ( *p == '\'' || *p == '\"' ) { + if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) { XMLAttribute* attrib = new XMLAttribute( this ); p = attrib->ParseDeep( p ); if ( !p ) { delete attrib; - document->SetError( XMLDocument::ERROR_PARSING_ATTRIBUTE, start, saveP ); + document->SetError( XMLDocument::ERROR_PARSING_ATTRIBUTE, start, p ); return 0; } if ( rootAttribute ) { @@ -356,36 +359,41 @@ char* XMLElement::ParseDeep( char* p ) rootAttribute = lastAttribute = attrib; } } + // end of the tag else if ( *p == '/' && *(p+1) == '>' ) { - // end tag. if ( closing ) { document->SetError( XMLDocument::ERROR_PARSING_ELEMENT, start, p ); return 0; } return p+2; // done; sealed element. } + // end of the tag else if ( *p == '>' ) { ++p; break; } + else { + document->SetError( XMLDocument::ERROR_PARSING_ELEMENT, start, p ); + return 0; + } } while( p && *p ) { XMLNode* node = 0; p = Identify( document, p, &node ); if ( p && node ) { - node->ParseDeep( p ); + p = node->ParseDeep( p ); XMLElement* element = node->ToElement(); if ( element && element->Closing() ) { if ( StringEqual( element->Name(), this->Name() ) ) { // All good, this is closing tag. delete node; - p = 0; } else { document->SetError( XMLDocument::ERROR_PARSING_ELEMENT, start, p ); delete node; + p = 0; } return p; } diff --git a/tinyxml2.h b/tinyxml2.h index fe3979e..779fdde 100644 --- a/tinyxml2.h +++ b/tinyxml2.h @@ -50,21 +50,25 @@ class StrPair { public: enum { - NEEDS_FLUSH = 0x01, - NEEDS_ENTITY_PROCESSING = 0x02, - NEEDS_NEWLINE_NORMALIZATION = 0x04 + NEEDS_ENTITY_PROCESSING = 0x01, + NEEDS_NEWLINE_NORMALIZATION = 0x02 }; StrPair() : flags( 0 ), start( 0 ), end( 0 ) {} - void Init( const char* start, char* end, int flags ) { + void Set( char* start, char* end, int flags ) { this->start = start; this->end = end; this->flags = flags | NEEDS_FLUSH; } const char* GetStr(); + bool Empty() const { return start == end; } private: + enum { + NEEDS_FLUSH = 0x100 + }; + // After parsing, if *end != 0, it can be set to zero. int flags; - const char* start; + char* start; char* end; }; @@ -96,7 +100,7 @@ protected: inline static int IsAlphaNum( unsigned char anyByte ) { return ( anyByte <= 127 ) ? isalnum( anyByte ) : 1; } inline static int IsAlpha( unsigned char anyByte ) { return ( anyByte <= 127 ) ? isalpha( anyByte ) : 1; } - const char* ParseText( char* in, const char* endTag, char** next ); + char* ParseText( char* in, StrPair* pair, const char* endTag ); char* ParseName( char* in, StrPair* pair ); char* Identify( XMLDocument* document, char* p, XMLNode** node ); }; diff --git a/tinyxml2.suo b/tinyxml2.suo index 2a9ea15f0b30c79acbe82b24c9b6f69d1246574f..89663df14719c52a179764bee2d449e878777d7b 100644 GIT binary patch delta 2032 zcmYjSZA?>F7`_(^lv--3O>JsdYAvNe+q8(KqqVfOmT#m|s+&Oh8e%C}kxXLTL}T`E z2s~)EWjf5vVixBV%wm>B&BhOtEd~?g7PBq>AjY`Fp)pIgAF}tJ3+PQw?#DUr*YiB@ zd&d^}u|@uBN|M=`-dFQz?vUhG20s8-_7Q!G%7Ax)-OyNm`5)Uj|gbN+`p-@lv zI$SUDVJV`3r%=uZ@I3G~RMr_5`IXO%y+v|_UF6wn!AS90oU(h+Tx`Zn(I|S0d}JL* ztfZ59TnN(H5+&503Y6MaEb@ZQO4&FNh{$o$rdj9VdeGI7k3ajg{8>DAd9h%V;F#0G zPT%#b@GPvz=;!8m$1*Qj=D6)RVNW1z58!&lL7?5iOIJ5bW5H!cY@6^+hVfDd0ii@d zUxvDEGM3KMQa$bWQ943+?NnJ4X3P3$lDkCn3q=J9l&6rD??cfB)hlo2bA*shwP|EvpE#jg=vdD*O&8`_iS0()6*Pe!Pp~VZoS4&k; z;T3KM%0#Uk6J9Mpfi2|+;p{Q4LD;EMGO^y()o>l@TNt2%;%?eyF!)W3dja5^JM;{6K|#Yi)Xa9lse~@7ViBwq;V3Q?C2*W4*^my4Ka3{e`NJ~U};7Q0CD>;ZZda1t*gj6q~U^$u>eR}V1{LcL4X)O&UTNfj>E zq^Jt9C*5^s-pBBNT&rfD{nbi*U8@lJ3yIxrBaH=7S5JLO9rw7-BKAM&=7w8?%XM;+ zz6Ve1i-k(30$J<3xC5LhCk3Uvk*VJ}Amh&=ub~~cO$Im{%`AkF+>-_!0bhZtCL42n zHENw|A}$)^)Wr@wYqG8#Z89s?RH$RxoT$|^o;AetIO*&-0!c+S$9+ccsTGct_^#Q& zcnY&QH5S%kVz&m1(O#DM_t9c;PEa!Gs7lb&R6q+`RL=Lm;le_s#;J)u#8_C5Pg+(7 zkQ%-g6@$jvsKg>Uo{n`ehodn)8)L^>b!;3e%4NB$9;F8@t?)GR<4N4wQ-YayG@3DfM!mXS2T4bSAEh>F$6~t?%?WW>coH;{iE^hun!5_P7gjYq$_s!L?D$ zVfrB)A8N>Zzi;Q+-~U)LfAQ_rkGg-U4(6k)XCwX|`YrG9wsSxKB>iUk`r!SF+RuNT fRI{DG4(`q2wvfvLS^@|dPVbg%6V6z-J|ORe4IW2L!k>b16|l=gZ{3tUTq0(A&TEm#qWd?_%5iWPtOkr;PGU?9KA>@|X8k zXK3y8@(vkBhaGb-b=e%JoBMZ7vrd~ui)+&x7&Eq_zR-(*jIqqZx87rgtyZei)~uG+8`qAk&v|eQDE=JJzErh$0YO)C-ZqwZHA#t%WcGoqFu07 zNEj?Kv!}Vp%!M#tQU#;)BEnt?GgSp>E6$_4b>pEc$c-Vl*n?MHGH$s9Ojav+yvdA+ zON;*EPbr21-kFUzC3?yvVZrLi*-O}|=w(p08YCQfIcjak$rSwLT93s+6GEO$x}}8Y zK0RKzjqH|uwH_DTW=!=csB;%!j~rt8)b%FWL_XpiCoOZ_GL}kASoBykG@3HhdIoSW zq9D|ijV1XuJ5pL|BS2K3T!{Ik$yx!Qms(iGiqA|K@(7vzlRh2o4x)e1j9!n3(H<># z2+O{0xDrU{RME1sIe(A|Qy51174+4>@3=k^^|MNj*cv zkO;=T+SQUqg>w}1HZPVM^-wk$@tqQ;J4+BN%NeW-0V~7gRHZ=lss!E+nkb`$P`#1S z(M&N;)4fh0w3jCw&%^7WcupIW-5E?&_b@rgRvj!|1#H!WR-Y&By^?5%DhFt1!u^~B^5+0PmCIws_XH{UP*L`>_idH^iT`djj}kQrab0Tho1Tt22H4*$0q?D z^F4(5NUFQ0qk2R)>j=btb}nGe!@01YAPXQE&SPK(tU`X9rk;u%peZq+Nf9Bke9wI* z0&xO2NG^evJiK~ePiIupQk|X6I$NbV(lE>*NLbCN+ajjD7`1raV8EU&*IBkulR!6V zN20NgbyjKy*x1Yvckr05C_rbk9=DoegiSThHL=c_+*&~T4A4k%GPzG}b*oZG?}(23 zjLKE7C$OYRyO7q>2)lMPVqwAMumSN_0epmpP0Jwpw&7%vgo_a;EY1Y!*g~aVV6h_G z1TM}9Nwi5S&{<*9)lhn+ob{sG9qyVG4aJyfZDE}`)aoLzW$^o*D{2~IAgP5;N3BeK zNjndJ%*g&PY1h$8;AnUQzmKtqOVrEuP((XhumKvPnJB)Z4 zYv)d3g5NjUZoypp6fEs$=Nfii=Zs&_-BUl|?oIlWPZplSy7oR?>&U^Koh3-_GQJy2 zJE@OOvh`$Pic&V)=~{)8RdqZPmsotgB2XDpkh3Hnk1eTDu<10-!|DA%3IB19O*W)L8N{Q#JGV}Ynko6&t>Y# zBlXGZdz77}5!Kh>Vb|Qtp@$q!4GIuOqHDKsv`jzs`5q3BloYz?Evo6$jA;&$%)?{@ zG|-*+aiowl;OGYp=*=?W9L=*?nkbL$DvL5Jt=B8Bu(^-T0QGZ{h?@}Je;2*lP^K)) z$I+?{VtlkY_vyDe)~g@gzjXEd#_o*Zlbh?%w>KYuk3P-4_MrUO@4wtUiB9jYiu_x5 eKHK@@Kt~Rz!o7h^EevCcwoIG)!pce*xc>k{28%ub