From fde6a756d1655091a1d77cec734329b061728b17 Mon Sep 17 00:00:00 2001 From: Lee Thomason Date: Sat, 14 Jan 2012 18:08:12 -0800 Subject: [PATCH] another rev of text parsing. --- tinyxml2.cpp | 67 +++++++++++++++++++++++++++++++++++++++------------ tinyxml2.h | 8 +++--- tinyxml2.suo | Bin 26624 -> 27648 bytes xmltest.cpp | 7 +++--- 4 files changed, 59 insertions(+), 23 deletions(-) diff --git a/tinyxml2.cpp b/tinyxml2.cpp index cfe2969..ae6a747 100644 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -7,6 +7,12 @@ using namespace tinyxml2; +static const char LINE_FEED = (char)0x0a; // all line endings are normalized to LF +static const char LF = LINE_FEED; +static const char CARRIAGE_RETURN = (char)0x0d; // CR gets filtered out +static const char CR = CARRIAGE_RETURN; + + // --------- CharBuffer ----------- // /*static*/ CharBuffer* CharBuffer::Construct( const char* in ) { @@ -91,25 +97,54 @@ const char* XMLNode::ParseText( char* p, const char* endTag, char** next ) { TIXMLASSERT( endTag && *endTag ); - char* start = SkipWhiteSpace( p ); - if ( !start ) - return 0; - - char endChar = *endTag; - p = start; - int length = strlen( endTag ); + char* start = p; + char* q = p; // q (target) <= p (src) in same buffer. + char endChar = *endTag; + int length = strlen( endTag ); + char* nextTag = 0; + // Inner loop of text parsing. while ( *p ) { - if ( *p == endChar ) { - if ( strncmp( p, endTag, length ) == 0 ) { - *p = 0; - *next = p + length; - return start; - } + if ( *p == endChar && strncmp( p, endTag, length ) == 0 ) { + *q = 0; + nextTag = p + length; + break; } - ++p; + else if ( *p == CR ) { + // CR-LF pair becomes LF + // CR alone becomes LF + // LF-CR becomes LF + if ( *(p+1) == LF ) { + p += 2; + } + else { + ++p; + } + *q = LF; + } + else if ( *p == LF ) { + if ( *(p+1) == CR ) { + p += 2; + } + else { + ++p; + } + *q = LF; + } + else { + *q = *p; + ++p; + } + ++q; } - return 0; + + // Error? If we don't have a text tag, something went wrong. (Although + // what the nextTag points at may be null.) + if ( nextTag == 0 ) { + return 0; + } + *next = nextTag; + return start; } @@ -129,7 +164,7 @@ XMLComment::~XMLComment() void XMLComment::Print( FILE* fp, int depth ) { XMLNode::Print( fp, depth ); - fprintf( fp, "\n", value ); + fprintf( fp, "\n", value ); } diff --git a/tinyxml2.h b/tinyxml2.h index 2ff58c9..23f21a5 100644 --- a/tinyxml2.h +++ b/tinyxml2.h @@ -68,13 +68,13 @@ protected: } return false; } + inline static int IsUTF8Continuation( char p ) { return p & 0x80; } /* Parses text. (Not a text node.) - [ ] EOL normalization. - - [x] Trim leading whitespace - - [ ] Trim trailing whitespace. - - [ ] Leaves inner whitespace - - [ ] Inserts one space between lines. + - [X] Do not trim leading whitespace + - [X] Do not trim trailing whitespace. + - [X] Leaves inner whitespace */ const char* ParseText( char* in, const char* endTag, char** next ); diff --git a/tinyxml2.suo b/tinyxml2.suo index 4fe777ea33304723a23a51ad6c297c7597c14a77..032681299f6592cb8b79076996fc2c691789f742 100644 GIT binary patch delta 630 zcmY*UO-~b16n%I4rk&F1lv?_sC0ei>A<r&D`NNj{KitAD?dV|gf$}{(;AvqGaFDJt z@h_0!z{L3Arbl&5XHhqVcI2xMWX3$~6^5HRD{6!6EW_|`te}RDqi)=LnFODVtMMt7 zn)+j05AFC1SQIOX_Y%&}O^C&07VvRZ!3s1AwU;W0qNj9C|-0~E!|r8R)K_>&5R zy85|phU4}X4QnM$3{UY&ow%!nHeYZmd#lB?x{%G;ixv3H>yT|;bK+Cl9>yh%CIo*G$+Nt z%tx;hC*A#VVb|MW>3I%q{M5L;QA>l4BRq-unl8^#(897(z)s^D)|B#@JHvzc?OrJy g+Y4_FZKc-7KHS`SwzM@niuI=veB01Oexe2c0f9u*jQ{`u delta 1024 zcmb`F-%pZJ7{|}SmtO(`ihxN8Mw_OzmmdrBSE0a#rfcN@u@VC{8)eg0Db!f2i&krc z=caB7yy>FWoNt@8wZ@9A+FB#8y6D2T)?YvuwI)4B#r}bwv(Ni{&+|OzyzhD5llnqa zMY=p9QW*##8|8A@h`t#nM2&JYDhr~4TOzldnaPqRY3)WQxz_dBm8yN71xqyCxr zIk7I8647qcd5x_f%|PnOU-r(^LnHQVRg(l*I=U!LiN~diA;^5 z%6}C~6L+a^j_fC9ylQh~np{TJiWPWMc8qPXTCDAizbR^;_mQl1VD}nJvUtTf(8}5% zrBhN0RA+#Qti9@d+>r^k13PP1rH;t*!>C1wcc;nXv&m#43Te$W{28*=b(2|gkxY^c zq?f2jHCqagcnjo$``BjM6D+cA)9B@p9-Re6(898+N{0CwD1`kGkafZJ`WA3BcEB2E I&hSV60-nwb6951J diff --git a/xmltest.cpp b/xmltest.cpp index 8df6ce0..c3f1b3b 100644 --- a/xmltest.cpp +++ b/xmltest.cpp @@ -7,6 +7,7 @@ using namespace tinyxml2; int main( int argc, const char* argv ) { +#if 0 { static const char* test = ""; @@ -14,14 +15,14 @@ int main( int argc, const char* argv ) doc.Parse( test ); doc.Print( stdout ); } - /* +#endif { - static const char* test = ""; + static const char* test = ""; XMLDocument doc; doc.Parse( test ); doc.Print( stdout ); } - */ return 0; } \ No newline at end of file