From bf59a2d4c8b5e21a4e834fbfdd02854695fc02b0 Mon Sep 17 00:00:00 2001 From: John Senneker Date: Tue, 19 Nov 2019 11:00:52 -0500 Subject: [PATCH 1/3] Added support for files larger than ~2GB. This implements #786. TinyXML reads the entire file into a pre-allocated buffer in memory. To get the buffer size, it uses `fseek()` to seek to the end of the file, and `ftell()` to get the current byte offset. But because `ftell()` returns a 32-bit signed value, it can only represent byte offsets, hence file sizes, up to about 2GB. To get around this, `fseek` and `ftell` have been replaced by the `TIXML_FSEEK` and `TIXML_FTELL` macros, respectively, which will resolve to 64-bit versions of these functions on platforms that have them. --- tinyxml2.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tinyxml2.cpp b/tinyxml2.cpp index 6b79917..a0b8735 100755 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -100,6 +100,20 @@ distribution. #define TIXML_SSCANF sscanf #endif +#if defined(_WIN64) + #define TIXML_FSEEK _fseeki64 + #define TIXML_FTELL _ftelli64 +#elif defined(__APPLE__) + #define TIXML_FSEEK fseeko + #define TIXML_FTELL ftello +#elif defined(__x86_64__) + #define TIXML_FSEEK fseeko64 + #define TIXML_FTELL ftello64 +#else + #define TIXML_FSEEK fseek + #define TIXML_FTELL ftell +#endif + static const char LINE_FEED = static_cast(0x0a); // all line endings are normalized to LF static const char LF = LINE_FEED; @@ -2280,15 +2294,15 @@ XMLError XMLDocument::LoadFile( FILE* fp ) { Clear(); - fseek( fp, 0, SEEK_SET ); + TIXML_FSEEK( fp, 0, SEEK_SET ); if ( fgetc( fp ) == EOF && ferror( fp ) != 0 ) { SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 ); return _errorID; } - fseek( fp, 0, SEEK_END ); - const long filelength = ftell( fp ); - fseek( fp, 0, SEEK_SET ); + TIXML_FSEEK( fp, 0, SEEK_END ); + const long long filelength = TIXML_FTELL( fp ); + TIXML_FSEEK( fp, 0, SEEK_SET ); if ( filelength == -1L ) { SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 ); return _errorID; From d58436c4bd27e6a1207f71da659f4fb5d1776e7b Mon Sep 17 00:00:00 2001 From: John Senneker Date: Tue, 19 Nov 2019 11:34:59 -0500 Subject: [PATCH 2/3] Fixed bug caused by type checking code in `LoadFile`. Since the file offset is now represented in all cases as a `long long`, the previous check comparing the sizes of `unsigned long` and `size_t` no longer makes sense. --- tinyxml2.cpp | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/tinyxml2.cpp b/tinyxml2.cpp index a0b8735..2a122bb 100755 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -2267,29 +2267,6 @@ XMLError XMLDocument::LoadFile( const char* filename ) return _errorID; } -// This is likely overengineered template art to have a check that unsigned long value incremented -// by one still fits into size_t. If size_t type is larger than unsigned long type -// (x86_64-w64-mingw32 target) then the check is redundant and gcc and clang emit -// -Wtype-limits warning. This piece makes the compiler select code with a check when a check -// is useful and code with no check when a check is redundant depending on how size_t and unsigned long -// types sizes relate to each other. -template -= sizeof(size_t))> -struct LongFitsIntoSizeTMinusOne { - static bool Fits( unsigned long value ) - { - return value < static_cast(-1); - } -}; - -template <> -struct LongFitsIntoSizeTMinusOne { - static bool Fits( unsigned long ) - { - return true; - } -}; - XMLError XMLDocument::LoadFile( FILE* fp ) { Clear(); @@ -2309,7 +2286,7 @@ XMLError XMLDocument::LoadFile( FILE* fp ) } TIXMLASSERT( filelength >= 0 ); - if ( !LongFitsIntoSizeTMinusOne<>::Fits( filelength ) ) { + if ( filelength >= static_cast(-1) ) { // Cannot handle files which won't fit in buffer together with null terminator SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 ); return _errorID; From a9f29b74d93504631b6dda9a74842eb3a388b27e Mon Sep 17 00:00:00 2001 From: John Senneker Date: Mon, 2 Mar 2020 11:04:57 -0500 Subject: [PATCH 3/3] Fixed warning caused by sloppy conversion between signed/unsigned types. Also, the comparison between size_t max and the actual file size is done as an unsigned long long, since that type is guaranteed to be at least 64 bits, even on a 32-bit architecture. --- tinyxml2.cpp | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tinyxml2.cpp b/tinyxml2.cpp index 2a122bb..faade3b 100755 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -2278,15 +2278,23 @@ XMLError XMLDocument::LoadFile( FILE* fp ) } TIXML_FSEEK( fp, 0, SEEK_END ); - const long long filelength = TIXML_FTELL( fp ); - TIXML_FSEEK( fp, 0, SEEK_SET ); - if ( filelength == -1L ) { - SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 ); - return _errorID; - } - TIXMLASSERT( filelength >= 0 ); - if ( filelength >= static_cast(-1) ) { + unsigned long long filelength; + { + const long long fileLengthSigned = TIXML_FTELL( fp ); + TIXML_FSEEK( fp, 0, SEEK_SET ); + if ( fileLengthSigned == -1L ) { + SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 ); + return _errorID; + } + TIXMLASSERT( fileLengthSigned >= 0 ); + filelength = static_cast(fileLengthSigned); + } + + const size_t maxSizeT = static_cast(-1); + // We'll do the comparison as an unsigned long long, because that's guaranteed to be at + // least 8 bytes, even on a 32-bit platform. + if ( filelength >= static_cast(maxSizeT) ) { // Cannot handle files which won't fit in buffer together with null terminator SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 ); return _errorID; @@ -2297,7 +2305,7 @@ XMLError XMLDocument::LoadFile( FILE* fp ) return _errorID; } - const size_t size = filelength; + const size_t size = static_cast(filelength); TIXMLASSERT( _charBuffer == 0 ); _charBuffer = new char[size+1]; const size_t read = fread( _charBuffer, 1, size, fp );