From 67d6131d743bf89e7bcce7b60af6e323d209e6a8 Mon Sep 17 00:00:00 2001 From: Lee Thomason Date: Tue, 24 Jan 2012 16:01:51 -0800 Subject: [PATCH] new element parsing. reasonable set of test cases. --- tinyxml2.cpp | 146 +++++++++++++++++++++++++++++---------------------- tinyxml2.h | 22 +++++--- tinyxml2.suo | Bin 32256 -> 0 bytes xmltest.cpp | 2 + 4 files changed, 98 insertions(+), 72 deletions(-) delete mode 100644 tinyxml2.suo diff --git a/tinyxml2.cpp b/tinyxml2.cpp index 0a02c4b..9df038f 100644 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -186,6 +186,7 @@ char* XMLBase::Identify( XMLDocument* document, char* p, XMLNode** node ) XMLNode::XMLNode( XMLDocument* doc ) : document( doc ), parent( 0 ), + isTextParent( false ), firstChild( 0 ), lastChild( 0 ), prev( 0 ), next( 0 ) { @@ -242,6 +243,9 @@ XMLNode* XMLNode::InsertEndChild( XMLNode* addThis ) addThis->prev = 0; addThis->next = 0; } + if ( addThis->ToText() ) { + SetTextParent(); + } return addThis; } @@ -254,6 +258,25 @@ void XMLNode::Print( FILE* fp, int depth ) } +char* XMLNode::ParseDeep( char* p ) +{ + while( p && *p ) { + XMLNode* node = 0; + p = Identify( document, p, &node ); + if ( p && node ) { + p = node->ParseDeep( p ); + // FIXME: is it the correct closing element? + if ( node->IsClosingElement() ) { + delete node; + return p; + } + this->InsertEndChild( node ); + } + } + return 0; +} + + void XMLNode::PrintSpace( FILE* fp, int depth ) { for( int i=0; i form. It is - // parsed just like a regular element then deleted from - // the DOM. - if ( *p == '/' ) { - closing = true; - ++p; - } - - p = ParseName( p, &name ); - if ( name.Empty() ) return 0; + *closedElement = false; // Read the attributes. while( p ) { @@ -400,6 +411,7 @@ char* XMLElement::ParseDeep( char* p ) document->SetError( XMLDocument::ERROR_PARSING_ELEMENT, start, p ); return 0; } + *closedElement = true; return p+2; // done; sealed element. } // end of the tag @@ -412,38 +424,47 @@ char* XMLElement::ParseDeep( char* p ) return 0; } } + return p; +} - while( p && *p ) { - XMLNode* node = 0; - p = Identify( document, p, &node ); - if ( p && node ) { - p = node->ParseDeep( p ); - XMLElement* element = node->ToElement(); - if ( element && element->Closing() ) { - if ( StringEqual( element->Name(), this->Name() ) ) { - // All good, this is closing tag. - delete node; - } - else { - document->SetError( XMLDocument::ERROR_PARSING_ELEMENT, start, p ); - delete node; - p = 0; - } - return p; - } - else { - this->InsertEndChild( node ); - } - } +// +// +// foobar +// +char* XMLElement::ParseDeep( char* p ) +{ + // Read the element name. + p = SkipWhiteSpace( p ); + if ( !p ) return 0; + const char* start = p; + + // The closing element is the form. It is + // parsed just like a regular element then deleted from + // the DOM. + if ( *p == '/' ) { + closing = true; + ++p; } - return 0; + + p = ParseName( p, &name ); + if ( name.Empty() ) return 0; + + bool elementClosed=false; + p = ParseAttributes( p, &elementClosed ); + if ( !p || !*p || elementClosed || closing ) + return p; + + p = XMLNode::ParseDeep( p ); + return p; } void XMLElement::Print( FILE* cfile, int depth ) { - PrintSpace( cfile, depth ); + if ( !parent || !parent->IsTextParent() ) { + PrintSpace( cfile, depth ); + } fprintf( cfile, "<%s", Name() ); for( XMLAttribute* attrib=rootAttribute; attrib; attrib=attrib->next ) { @@ -452,38 +473,39 @@ void XMLElement::Print( FILE* cfile, int depth ) } if ( firstChild ) { - // fixme: once text is on, it should stay on, and not use newlines. - bool useNewline = firstChild->ToText() == 0; - fprintf( cfile, ">", Name() ); - if ( useNewline ) fprintf( cfile, "\n" ); + if ( !IsTextParent() ) { + fprintf( cfile, "\n" ); + } for( XMLNode* node=firstChild; node; node=node->next ) { node->Print( cfile, depth+1 ); } - fprintf( cfile, "\n", Name() ); - // fixme: see note above - //if ( useNewline ) fprintf( cfile, "\n" ); + fprintf( cfile, "", Name() ); + if ( !IsTextParent() ) { + fprintf( cfile, "\n" ); + } } else { - fprintf( cfile, "/>\n" ); + fprintf( cfile, "/>" ); + if ( !IsTextParent() ) { + fprintf( cfile, "\n" ); + } } } // --------- XMLDocument ----------- // -XMLDocument::XMLDocument() : +XMLDocument::XMLDocument() : + XMLNode( this ), charBuffer( 0 ) { - root = new XMLNode( this ); } XMLDocument::~XMLDocument() { - delete root; - delete charBuffer; } @@ -493,25 +515,21 @@ bool XMLDocument::Parse( const char* p ) charBuffer = CharBuffer::Construct( p ); XMLNode* node = 0; - // fixme: clean up - char* q = Identify( this, charBuffer->mem, &node ); - while ( node ) { - root->InsertEndChild( node ); - q = node->ParseDeep( q ); - node = 0; - if ( q && *q ) { - q = Identify( this, q, &node ); - } - } - return false; + char* q = ParseDeep( charBuffer->mem ); + return true; } void XMLDocument::Print( FILE* fp, int depth ) { - for( XMLNode* node = root->firstChild; node; node=node->next ) { + for( XMLNode* node = firstChild; node; node=node->next ) { node->Print( fp, depth ); } } +void XMLDocument::SetError( int error, const char* str1, const char* str2 ) +{ + printf( "ERROR: id=%d '%s' '%s'\n", error, str1, str2 ); +} + diff --git a/tinyxml2.h b/tinyxml2.h index 227c7c7..4fbd33c 100644 --- a/tinyxml2.h +++ b/tinyxml2.h @@ -106,7 +106,6 @@ protected: char* Identify( XMLDocument* document, char* p, XMLNode** node ); }; - class XMLNode : public XMLBase { friend class XMLDocument; @@ -121,7 +120,12 @@ public: virtual XMLText* ToText() { return 0; } virtual XMLComment* ToComment() { return 0; } - virtual char* ParseDeep( char* ) { TIXMLASSERT( 0 ); } + // fixme: guarentee null terminator to avoid internal checks + virtual char* ParseDeep( char* ); + + void SetTextParent() { isTextParent = true; } + bool IsTextParent() const { return isTextParent; } + virtual bool IsClosingElement() const { return false; } protected: XMLNode( XMLDocument* ); @@ -129,6 +133,7 @@ protected: XMLDocument* document; XMLNode* parent; + bool isTextParent; XMLNode* firstChild; XMLNode* lastChild; @@ -208,13 +213,15 @@ public: virtual void Print( FILE* cfile, int depth ); virtual XMLElement* ToElement() { return this; } - bool Closing() const { return closing; } + virtual bool IsClosingElement() const { return closing; } char* ParseDeep( char* p ); protected: private: + char* ParseAttributes( char* p, bool *closedElement ); + StrPair name; bool closing; XMLAttribute* rootAttribute; @@ -222,7 +229,7 @@ private: }; -class XMLDocument : public XMLBase +class XMLDocument : public XMLNode { public: XMLDocument(); @@ -231,20 +238,19 @@ public: bool Parse( const char* ); void Print( FILE* cfile=stdout, int depth=0 ); + /* XMLNode* Root() { return root; } XMLNode* RootElement(); - + */ enum { ERROR_ELEMENT_MISMATCH, ERROR_PARSING_ELEMENT, ERROR_PARSING_ATTRIBUTE }; - void SetError( int error, const char* str1, const char* str2 ) {} + void SetError( int error, const char* str1, const char* str2 ); private: XMLDocument( const XMLDocument& ); // intentionally not implemented - - XMLNode* root; CharBuffer* charBuffer; }; diff --git a/tinyxml2.suo b/tinyxml2.suo deleted file mode 100644 index e3bbf90cd91fc6c593cad22ee0ff262db5e586ea..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 32256 zcmeHQdypK(c^~nRjD*d@NXUTkA_NkWcGm(hz;jM{3GbmPdPiB^ zXH@a8fw>qnDmd1RqZq01Z5^aWGRQe?*Iqv`{^l*;JG0_q+%(9^Mwo2Mhv+0KvH(e=V4s6^s~yDWlINQI&U>Vs z5>L}{|D!mkHXpOE9d~>uf1jBC@yfLm|F`GmRg+IQMzR~{C zo>1p0Q|<2--_rj7jDOJ5ptAt_w7GSh(*~QMOWQXJI1CL>TG}t}_x{}i`ivK7kHXTo z?6r-EG;&50{btW=#(s=J1Jn=VQ>VTYMGW*wmv%2Kea1($m26)6!{EUL4|5omD#n98 zxg8&yL7#S-wAn1A9saDx_k;F5RFc{3d|R=(sa0(Fv-#|tIbSU#a@huvxpZKI|4-^> zqfx5N)_wIRK=F1@O^@tLk0#2c+~$0#QfiosW}=!qbXyD2Tm6aVnx%4~W|qxt-SlPl z3j24nW;K&fEm{GHt2gT$H?Y^R1-^&^dLT^4@K8EE(=%Y3j+E~Z+~MDYKr6PeP==Xn zl&Td!bq|;~m_g3CF9RF>87O99Q8Ep)-r&f$419n=68$ci#cZ?O@Ff?(AtoY_LlSp3 zIsghjk8GFI8a+H+A2oB$*-5ZqRvOj_O~U$TXU%P~X|qlXSIO-e0N|sz>UH(QG4|E-lm(lch=}n=6~vhq1+mS*e$DrE;lpG%-m|>$s9P zw@z3mW{tm_YE)|``?*Ht5Lv-2xXFTp7$UL~yc8{R3|ZkKvb#QHh8bfwuIF)A72i~mKQND(w!BCa zzbxa^6s|Rm0Tq|6%?`-y1dpxE20~GQcP>tpE84?xoLS^U~)`S1?~wn3<*}jqP(F(kI>3 zz!jE0>k-2%NWC5XqnLeWWLAJDZua>L`fvN*u=E+HOH|(e@5b0uDw&7xx5uJt90`m8 zw%Pq>E*6%44ELrb12|ImylpAhytvwZwx^-pEp5MBZ44r1z1Vnm#psSC{ym!N8%mA# z^z_Ahds4meo}Qku_)!0FPkg+)cesCasDEs1VC2McL?K5JyLV-nJM4y}m7!B5%UWbD z{c2iKsi;jbYZT$?PRmw43Omw=%rOP&?ZLl3RH%A!OyO_by3zw%62~3=$Sr$tHjY|V zFV6e%3tk<=zX8~w6R<`F%r32zqATKWCsjtb?t5h2#Duoe8tj5bF%H({%`1QY@uyP~ zLB!E|Z24ndAgugtz`aucX>STD+WlwW*&IU?czQFw8Hd!eje@CACdoBwt+q^EPdzrODU%wEPxkUerkizcFKGE z#{8T0ch=X!(r4XX29E7)(@v{OrfX5!VN4kN&?8229COn+Zu_`y)bJCCwT3N8XZAge zyI2oXmY0>g^UI5}eL8$F(ipO=ZVAz%33(|(dW8k1Ci-LDiKuf3FoKF?5>}4?MywSK z{A3C3{+I#!`q4YJx_yM7uwr3KrSyXI)#sZQaIPLd_wu^6={=)g`Nn@ec;YMnaVlsy z{O%)9g0J%{9lHYN(aYOLNGexQ=HNl-!RH{?%t4a)m$GsVR;XFW&ExwrGK^Uq>21W> zfwPn+Hb!I*duFnUIVxIKA!m62`g%7e8OS6K=NA`h)kDYa1YTaxw45aJ3k%1OKRTKm zOAYmoJOB!`I$3C&36SD-7)U8Z&7EFxl`VuGsa2iTHUHb zq^wxGVddRopA4&^;pN%Ot|rj#7RS=Hj!0B&7O z`j=|;>kUllsrbav_fhmxuiCzg;m;w4DghIi9UyW(oUnVlvNQo zy^|{hv=153u7DydB-Hc_&Ks!AE?9Cxo_-G?wNtL1MDYh)p=h9&962>^PXqfN&2ek> zvZA^pJvXcMPuVypjaTq#)E?7t{HvLfXJU4kzvQ%dbxY1HMpeYVtU1wcQ9i|v1m~DM z20HqAlE}DE}rEd8!QX_6PpT7#cQN0-VCveS5d4?^( zcD`))5y%}S&@F#^K|#;9y6Xzwl|(fX+#7Yumb>NBWwCOmRym94GkqwzGK`ljSYGSm z<%rgXbHF+Q<&?gI{ySmGEq4Mv0nqaqQcG#egkA}Ky&9O(uH39Ezu(@wBcK!cd2Jk}m-P1NEY`_>RAmXqaue9R%F%{HPPGN4`AIie6;Gs ztn55-amQ8^Tg=#BR`GZ)`uquCyVJI3(vMC-Qzvk*H--?`XAs%4_C>2Ma6AJXdYOZ> zcg%8FFHko2)7m|B&oHe8qe8)51=ym;@gm0;-kfV}1w^pSRu#A0eiwNkN@`l)61 z-d-(YCNvKaGS7Z~4xncp++*pCt?mOZy^Mu7M?c)?=h{>W~NoZ zY;ngiYfwT8MUEuaM`o~^mBC6q>!Gsd#2H||FIOiL&_`N_L4cmrt1;uw1CHAfixaPp zcpRAZW`j0DMkhi)DgWx4lAcF^TUYXBpx1e4?VyfHNNPS*K;N|0rbX5 zNu_rtICjj=xZ0qiO4*YUP}1{5-l#W$xQowaFhbRhH~f8?WC1yz+#eCu7WmJX8l8K9dq>By z^=hg5{R{ZLo@G{}E4-e}GUaG)1xBy*cqf zpcgwR8ztsm?ORY?cpwlymjW@U`H`Su6BQBdgRqSHnj@YOg;FVQ1``0dEBwI6H&doM}UNyx%l6+!>GsJz5+$l_p zCFbZfFze})is@+sX!*pBLgh(z0H>Y>aC1xFOx(OGYjSFI$ACR*jG=g^rR5Cz#Q}Wx zik>%6njq#+?M3&gA1Ht#*E|6c?~J&!LUE6Jns#e5pY%Uk3XQ<{VPMpY$(&KiFrtz){>|X&$^lPQ!ly1|_3R|5=t->NTkMcy zRe^iHMD1AlEo$#AfT&ic)v`+K$LE~%usTPE>&dK!iYHptNL&G|QD;!dnKy6Yj{m3_ zTWu`Ia~(84vP9D*T=-HDT$KvKMIO^ zGV8Xo&QGmtq`wL5y4B0pNW)pbQ!$t90p~KRC2y~Fl0pU)^lYV?6*&vS4t*4ubR#cW zu@XLkm+&`r^agz1!nGV?C+(_Tj!{yt})vcY9aYNY_wT*NLYizEwd!bg1!E;z9LHVtbyR zdB=Ieiy^F69l0KqY|qpR^+b7TFzUM~B8-{_iU$WrD=*>*rrPGV?z#FfU^+ z&Nn@(`pIvAA-!0_>6e3c4ekV1Z-u;b55u9oWw0@lQxPXjRy zW?fJ2;4OfsXK}}c%wQ;j1@M){3IMA(B9D0)oayD;+*`@)sA{bx%V(4LB(UFg5lue` zDtdC`7l$#2WkpBs4ftJPjaqf}+t(+fu_@pk){4*fVdY~8G1hJzmA32#o0+9&p!=NHIbPf^&lZM0*9L94|<(tHNp=e)ibA1JA8Vdyin3TmV;X@~9q{Xq7R zjW7&-?ja0nq|Y5mtWAfdPr3hwjd0!9cK*$H?BvDv&vmjAtO^T~hO@`s=L!PoY${i*TuTS~8fpTq1N zRGvGB?9>J3-~Bx6FMfRP&|j?l_j8Azf9RnrZv5fDvBK%){xMur6(sBBJG@0kq`!pk zUtR9*U&^troWGaOI$kp?UpZ^Pkanb6E}M9jdbL85m+%#}mm+^ME~CA;l7HCNz@x}F z8u>k=+Rk?;lgSgrA;pL1vhkWl7f0@2bn1-^tjGN>wtsQ0*G4gP`TDVe`MEQh8wOu7 z)_vl&|Nb@?yS;*sUQ%1co_9W>N&o!{ZoH7sPcPMegz-Pjj)Gbg|Ht*IuiFU2(C2=z zpho&Jd?_a@=sxH5X?&3M)0oeo_s=_j6Lfwf=yQJ|_gs*Np!=NH8T(jjgkzZgbN^~k zljAQfbot7fjxM(FZ%k&DoO`VKt_<}IwR^o}-I=RCcgeD&cC zpde>P9xbwO|50xk>hvU=jM^W=Zkg zKYmc7|K$<_tH@#LFXCRAe|M(ehkdreElU4C3v{pAD3>ZfON}4Amgl&aZcjmrB2C)xXr9;g^2Y|Gm#YwzYopXV(r~w&&{a-Uz4GN?*kv z;r_P?|JdpM_s=Y4@1y^2LVDo40D72J+~D1rr1s|!Gb?v~UNrbgA17uvuC>4wWi_Pr zeHY}6ImIvI`$wKw=K7g8|8eT@_y7LPGf)5XwUtNS_{mS+_~~^C#?-C0!~Kr5r!P4F zd~PP_?b4#E<9u%kW$goiD3Z(fiAa4=>v*eyO;=l6cceMjjAZ`sBrq=}i*n>%L4Dxf^l#c", "", "Text inside element.", + "", + "Text inside and bolded in the element.", 0 }; for( int i=0; test[i]; ++i ) {