diff --git a/tinyxml2.cpp b/tinyxml2.cpp
index ad5bc870..c436fb24 100644
--- a/tinyxml2.cpp
+++ b/tinyxml2.cpp
@@ -552,12 +552,15 @@ const char* XMLUtil::GetCharacterRef(const char* p, char* value, int* length)
TIXMLASSERT(digit < radix);
const unsigned int digitScaled = mult * digit;
+ // Reject before adding: if digitScaled alone exceeds MAX_CODE_POINT,
+ // or if adding it to ucs would exceed it (checked without overflow by
+ // testing ucs > MAX_CODE_POINT - digitScaled, safe since digitScaled
+ // <= MAX_CODE_POINT at this point).
+ if (digitScaled > MAX_CODE_POINT || ucs > MAX_CODE_POINT - digitScaled) {
+ return 0;
+ }
ucs += digitScaled;
- mult *= radix;
-
- // Security check: could a value exist that is out of range?
- // Easily; limit to the MAX_CODE_POINT, which also allows for a
- // bunch of leading zeroes.
+ mult *= radix;
if (mult > MAX_CODE_POINT) {
mult = MAX_CODE_POINT;
}
@@ -569,11 +572,11 @@ const char* XMLUtil::GetCharacterRef(const char* p, char* value, int* length)
}
// convert the UCS to UTF-8
ConvertUTF32ToUTF8(ucs, value, length);
- if (length == 0) {
- // If length is 0, there was an error. (Security? Bad input?)
+ if (*length == 0) {
+ // If *length is 0, ConvertUTF32ToUTF8 rejected the code point.
// Fail safely.
- return 0;
- }
+ return 0;
+ }
return p + delta + 1;
}
return p + 1;
diff --git a/xmltest.cpp b/xmltest.cpp
index d058707b..f8e71962 100644
--- a/xmltest.cpp
+++ b/xmltest.cpp
@@ -2701,6 +2701,56 @@ int main( int argc, const char ** argv )
XMLTest("Test attribute encode with a Hex value", value5, "!"); // hex value in unicode value
}
+ // ---------- Security: numeric character reference bounds ----------
+ {
+ // Regression: U+10FFFF is the last valid Unicode code point and must
+ // parse correctly. The in-loop overflow guard must not reject it.
+ XMLDocument doc;
+ doc.Parse( "" );
+ XMLTest( "Numeric ref U+10FFFF: no error", false, doc.Error() );
+ const char* v = doc.FirstChildElement()->Attribute( "v" );
+ // U+10FFFF encodes to the 4-byte UTF-8 sequence F4 8F BF BF.
+ const char expected[] = {
+ static_cast(0xF4), static_cast(0x8F),
+ static_cast(0xBF), static_cast(0xBF), 0
+ };
+ XMLTest( "Numeric ref U+10FFFF: correct UTF-8 output", expected, v );
+ }
+ {
+ // Boundary check: U+110000 is one above the maximum code point.
+ // The in-loop overflow guard must catch this before ucs is written,
+ // leaving the entity as a literal (starting with '&').
+ XMLDocument doc;
+ doc.Parse( "" );
+ XMLTest( "Numeric ref U+110000: no parse error", false, doc.Error() );
+ const char* v = doc.FirstChildElement()->Attribute( "v" );
+ XMLTest( "Numeric ref U+110000: not resolved (left as literal)", true,
+ v != nullptr && v[0] == '&' );
+ }
+ {
+ // A hex entity with enough digits to overflow uint32_t must
+ // be rejected by the in-loop guard before the accumulator wraps.
+ // Before the fix, ucs could wrap around and pass the post-loop range
+ // check, producing an attacker-chosen character in the parsed output.
+ // Build "" + 300 'F' digits + ";" -- far beyond what fits in uint32_t.
+ const char prefix[] = "";
+ static const int NDIGITS = 300;
+ char xml[sizeof(prefix) + NDIGITS + sizeof(suffix)];
+ strcpy( xml, prefix );
+ memset( xml + strlen(prefix), 'F', NDIGITS );
+ strcpy( xml + strlen(prefix) + NDIGITS, suffix );
+
+ XMLDocument doc;
+ doc.Parse( xml );
+ XMLTest( "Overflow hex entity: no parse error", false, doc.Error() );
+ const char* v = doc.FirstChildElement()->Attribute( "v" );
+ // GetCharacterRef returns 0 for rejected refs; the caller then copies
+ // the literal '&', so the attribute must start with '&', not a char.
+ XMLTest( "Overflow hex entity: not resolved to a character", true,
+ v != nullptr && v[0] == '&' );
+ }
+
// ---------- XMLPrinter Apos Escaping ------
{
const char* testText = "text containing a ' character";