From 10941052c85397c52d6bc15a9a8f6fee2cec3e0b Mon Sep 17 00:00:00 2001 From: Amos Jeffries Date: Fri, 3 Apr 2026 21:16:39 +1300 Subject: [PATCH 1/4] HTTP/1.1: improve handling of malformed 304 responses --- src/HttpHeader.cc | 71 ++++++++++++++++++++++-------------- src/HttpHeader.h | 1 - src/tests/stub_HttpHeader.cc | 1 - 3 files changed, 43 insertions(+), 30 deletions(-) diff --git a/src/HttpHeader.cc b/src/HttpHeader.cc index 3f5db81ad55..7f724b7cbad 100644 --- a/src/HttpHeader.cc +++ b/src/HttpHeader.cc @@ -376,24 +376,47 @@ HttpHeader::append(const HttpHeader * src) bool HttpHeader::needUpdate(HttpHeader const *fresh) const { + bool result = false; // assume optimal result; no further work necessary + String value; for (const auto e: fresh->entries) { - if (!e || skipUpdateHeader(e->id)) + if (!e) // Paranoid check; should never occur continue; - String value; - if (!hasNamed(e->name, &value) || - (value != fresh->getByName(e->name))) - return true; - } - return false; -} -bool -HttpHeader::skipUpdateHeader(const Http::HdrType id) const -{ - return - // TODO: Consider updating Vary headers after comparing the magnitude of - // the required changes (and/or cache losses) with compliance gains. - (id == Http::HdrType::VARY); + switch (e->id) { + case Http::HdrType::OTHER: + if (!result) + result = (!hasNamed(e->name, &value) || value != fresh->getByName(e->name)); + break; + + case Http::HdrType::CONTENT_ENCODING: + case Http::HdrType::CONTENT_LENGTH: + case Http::HdrType::CONTENT_TYPE: + case Http::HdrType::ETAG: + case Http::HdrType::VARY: + // Any change to these headers indicates a broken server. + // The 304 received DOES NOT apply to our cached representation. + // Squid should fallback to either MISS or using the cached + // object without the 304 changes. + if (!getByIdIfPresent(e->id, &value) || value != fresh->getByName(e->name)) + return false; // 304 problem, MUST NOT do an update + break; + + case Http::HdrType::CONTENT_LANGUAGE: + case Http::HdrType::CONTENT_LOCATION: + // these headers are weak identifiers, changes only + // cause representation issues if they are used by + // the Vary field-value + if (hasListMember(Http::HdrType::VARY, e->value.rawBuf(), ',')) + return false; + [[fallthrough]]; + + default: + if (!result) + result = (!getByIdIfPresent(e->id, &value) || value != fresh->getByName(e->name)); + break; + } + } + return result; } void @@ -402,29 +425,21 @@ HttpHeader::update(HttpHeader const *fresh) assert(fresh); assert(this != fresh); - const HttpHeaderEntry *e; HttpHeaderPos pos = HttpHeaderInitPos; + String value; - while ((e = fresh->getEntry(&pos))) { + while (const auto e = fresh->getEntry(&pos)) { /* deny bad guys (ok to check for Http::HdrType::OTHER) here */ - if (skipUpdateHeader(e->id)) + if (hasNamed(e->name, &value) && value == fresh->getByName(e->name)) continue; + debugs(55, 7, "Updating header '" << Http::HeaderLookupTable.lookup(e->id).name << "' in cached entry"); + if (e->id != Http::HdrType::OTHER) delById(e->id); else delByName(e->name); - } - - pos = HttpHeaderInitPos; - while ((e = fresh->getEntry(&pos))) { - /* deny bad guys (ok to check for Http::HdrType::OTHER) here */ - - if (skipUpdateHeader(e->id)) - continue; - - debugs(55, 7, "Updating header '" << Http::HeaderLookupTable.lookup(e->id).name << "' in cached entry"); addEntry(e->clone()); } diff --git a/src/HttpHeader.h b/src/HttpHeader.h index b0e5d6ba5ab..d8ea5d4c0a1 100644 --- a/src/HttpHeader.h +++ b/src/HttpHeader.h @@ -187,7 +187,6 @@ class HttpHeader /// *blk_end points to the first header delimiter character (CR or LF in CR?LF). /// If block starts where it ends, then there are no fields in the header. static bool Isolate(const char **parse_start, size_t l, const char **blk_start, const char **blk_end); - bool skipUpdateHeader(const Http::HdrType id) const; private: HttpHeaderEntry *findLastEntry(Http::HdrType id) const; diff --git a/src/tests/stub_HttpHeader.cc b/src/tests/stub_HttpHeader.cc index d796e3e79a9..93eedda7dee 100644 --- a/src/tests/stub_HttpHeader.cc +++ b/src/tests/stub_HttpHeader.cc @@ -81,7 +81,6 @@ void HttpHeader::removeHopByHopEntries() STUB void HttpHeader::removeConnectionHeaderEntries() STUB bool HttpHeader::Isolate(const char **, size_t, const char **, const char **) STUB_RETVAL(false) bool HttpHeader::needUpdate(const HttpHeader *) const STUB_RETVAL(false) -bool HttpHeader::skipUpdateHeader(const Http::HdrType) const STUB_RETVAL(false) int httpHeaderParseQuotedString(const char *, const int, String *) STUB_RETVAL(-1) SBuf Http::SlowlyParseQuotedString(const char *, const char *, size_t) STUB_RETVAL(SBuf()) SBuf httpHeaderQuoteString(const char *) STUB_RETVAL(SBuf()) From a5a6bad9673de523f88d186bee8f9b08f1f0f4bf Mon Sep 17 00:00:00 2001 From: Amos Jeffries Date: Sun, 5 Apr 2026 20:19:23 +1200 Subject: [PATCH 2/4] Update code documentation with RFC references --- src/HttpHeader.cc | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/HttpHeader.cc b/src/HttpHeader.cc index 7f724b7cbad..2c0d9007aa2 100644 --- a/src/HttpHeader.cc +++ b/src/HttpHeader.cc @@ -373,6 +373,18 @@ HttpHeader::append(const HttpHeader * src) } } +/** + * Whether this is the same resource representation + * the 304 ('fresh') header was generated from, and + * that there are changes to apply. + * + * Governed by RFC 9111 section 4.3.4: + * " + * When a cache receives a 304 (Not Modified) response, + * it needs to identify stored responses that are + * suitable for updating with the new information provided + * " + */ bool HttpHeader::needUpdate(HttpHeader const *fresh) const { @@ -384,6 +396,7 @@ HttpHeader::needUpdate(HttpHeader const *fresh) const switch (e->id) { case Http::HdrType::OTHER: + // check if this header is actually being changed. if (!result) result = (!hasNamed(e->name, &value) || value != fresh->getByName(e->name)); break; @@ -393,24 +406,24 @@ HttpHeader::needUpdate(HttpHeader const *fresh) const case Http::HdrType::CONTENT_TYPE: case Http::HdrType::ETAG: case Http::HdrType::VARY: - // Any change to these headers indicates a broken server. - // The 304 received DOES NOT apply to our cached representation. - // Squid should fallback to either MISS or using the cached - // object without the 304 changes. + // Strong Identifiers; + // Any difference in these headers indicates the 304 + // received DOES NOT apply to this representation. if (!getByIdIfPresent(e->id, &value) || value != fresh->getByName(e->name)) - return false; // 304 problem, MUST NOT do an update + return false; break; case Http::HdrType::CONTENT_LANGUAGE: case Http::HdrType::CONTENT_LOCATION: - // these headers are weak identifiers, changes only - // cause representation issues if they are used by - // the Vary field-value + // Weak identifiers; + // Changes only cause representation issues if they are + // used by the Vary field-value if (hasListMember(Http::HdrType::VARY, e->value.rawBuf(), ',')) return false; [[fallthrough]]; default: + // check if this header is actually being changed. if (!result) result = (!getByIdIfPresent(e->id, &value) || value != fresh->getByName(e->name)); break; From c0cc5a968ef9526f2ceaa2c668b358d011deff25 Mon Sep 17 00:00:00 2001 From: Amos Jeffries Date: Mon, 6 Apr 2026 09:05:29 +1200 Subject: [PATCH 3/4] Restore HttpHeader::skipUpdateHeader with better RFC compliance RFC 9110 section 3.1 and 3.2 define a lot more than just Vary header to be skipped on 304 updates. --- src/HttpHeader.cc | 72 ++++++++++++++++++++++++++++++++++++ src/HttpHeader.h | 1 + src/tests/stub_HttpHeader.cc | 1 + 3 files changed, 74 insertions(+) diff --git a/src/HttpHeader.cc b/src/HttpHeader.cc index 2c0d9007aa2..7e84f32e4a5 100644 --- a/src/HttpHeader.cc +++ b/src/HttpHeader.cc @@ -432,6 +432,75 @@ HttpHeader::needUpdate(HttpHeader const *fresh) const return result; } +/** + * Whether this HTTP header is to be ignored when updating + * an existing cache object after revalidation, etc. + * + * Governed by RFC 9111 section 3.1 and 3.2: + * " + * Caches are required to update a stored response's + * header fields from another (typically newer) + * response in several situations + * " + */ +bool +HttpHeader::skipUpdateHeader(const Http::HdrType id) const +{ + /** + * RFC 9111 section 3.2: + * the cache MUST add each header field in the provided + * response to the stored response, replacing field + * values that are already present, with the following + * exceptions: + */ + + // s3.2: Header fields excepted from storage in Section 3.1 + switch (id) { + // s3.1: Header fields that are specific to the proxy that a cache uses + case Http::HdrType::PROXY_AUTHENTICATE: + case Http::HdrType::PROXY_AUTHENTICATION_INFO: + case Http::HdrType::PROXY_AUTHORIZATION: + return true; + + // s3.1: The Connection header field and fields whose names are listed in it + // MAY update. so we dont bother figuring out if we can skip any listed. + case Http::HdrType::CONNECTION: + return true; + + // s3.1: The no-cache and private cache directives can have arguments that prevent storage of header fields + // MAY update. so we dont bother figuring out if we can skip. + + default: + // s3.1: some fields' semantics require them to be removed before forwarding the message + if (Http::HeaderLookupTable.lookup(id).hopbyhop) + return true; + break; + } + + switch (id) { + // s3.2: Header fields that the cache's stored response depends upon + case Http::HdrType::ETAG: + case Http::HdrType::KEY: + case Http::HdrType::VARY: + return true; + + // s3.2: Header fields that are automatically processed and removed by the recipient + case Http::HdrType::CONTENT_ENCODING: + case Http::HdrType::CONTENT_MD5: + case Http::HdrType::CONTENT_RANGE: + case Http::HdrType::CONTENT_TYPE: + return true; + + // s3.2: The Content-Length header field + case Http::HdrType::CONTENT_LENGTH: + return true; + + case Http::HdrType::OTHER: + default: + return false; + } +} + void HttpHeader::update(HttpHeader const *fresh) { @@ -444,6 +513,9 @@ HttpHeader::update(HttpHeader const *fresh) while (const auto e = fresh->getEntry(&pos)) { /* deny bad guys (ok to check for Http::HdrType::OTHER) here */ + if (skipUpdateHeader(e->id)) + continue; + if (hasNamed(e->name, &value) && value == fresh->getByName(e->name)) continue; diff --git a/src/HttpHeader.h b/src/HttpHeader.h index d8ea5d4c0a1..b0e5d6ba5ab 100644 --- a/src/HttpHeader.h +++ b/src/HttpHeader.h @@ -187,6 +187,7 @@ class HttpHeader /// *blk_end points to the first header delimiter character (CR or LF in CR?LF). /// If block starts where it ends, then there are no fields in the header. static bool Isolate(const char **parse_start, size_t l, const char **blk_start, const char **blk_end); + bool skipUpdateHeader(const Http::HdrType id) const; private: HttpHeaderEntry *findLastEntry(Http::HdrType id) const; diff --git a/src/tests/stub_HttpHeader.cc b/src/tests/stub_HttpHeader.cc index 93eedda7dee..d796e3e79a9 100644 --- a/src/tests/stub_HttpHeader.cc +++ b/src/tests/stub_HttpHeader.cc @@ -81,6 +81,7 @@ void HttpHeader::removeHopByHopEntries() STUB void HttpHeader::removeConnectionHeaderEntries() STUB bool HttpHeader::Isolate(const char **, size_t, const char **, const char **) STUB_RETVAL(false) bool HttpHeader::needUpdate(const HttpHeader *) const STUB_RETVAL(false) +bool HttpHeader::skipUpdateHeader(const Http::HdrType) const STUB_RETVAL(false) int httpHeaderParseQuotedString(const char *, const int, String *) STUB_RETVAL(-1) SBuf Http::SlowlyParseQuotedString(const char *, const char *, size_t) STUB_RETVAL(SBuf()) SBuf httpHeaderQuoteString(const char *) STUB_RETVAL(SBuf()) From aa9218bf00c1d394b0c57f66afafbbd4e674653f Mon Sep 17 00:00:00 2001 From: Amos Jeffries Date: Mon, 6 Apr 2026 09:17:58 +1200 Subject: [PATCH 4/4] make source formatter happier --- src/HttpHeader.cc | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/HttpHeader.cc b/src/HttpHeader.cc index 7e84f32e4a5..e60ce122ee1 100644 --- a/src/HttpHeader.cc +++ b/src/HttpHeader.cc @@ -456,19 +456,19 @@ HttpHeader::skipUpdateHeader(const Http::HdrType id) const // s3.2: Header fields excepted from storage in Section 3.1 switch (id) { - // s3.1: Header fields that are specific to the proxy that a cache uses + // s3.1: Header fields that are specific to the proxy that a cache uses case Http::HdrType::PROXY_AUTHENTICATE: case Http::HdrType::PROXY_AUTHENTICATION_INFO: case Http::HdrType::PROXY_AUTHORIZATION: return true; - // s3.1: The Connection header field and fields whose names are listed in it - // MAY update. so we dont bother figuring out if we can skip any listed. + // s3.1: The Connection header field and fields whose names are listed in it + // MAY update. so we dont bother figuring out if we can skip any listed. case Http::HdrType::CONNECTION: return true; - // s3.1: The no-cache and private cache directives can have arguments that prevent storage of header fields - // MAY update. so we dont bother figuring out if we can skip. + // s3.1: The no-cache and private cache directives can have arguments that prevent storage of header fields + // MAY update. so we dont bother figuring out if we can skip. default: // s3.1: some fields' semantics require them to be removed before forwarding the message @@ -478,23 +478,24 @@ HttpHeader::skipUpdateHeader(const Http::HdrType id) const } switch (id) { - // s3.2: Header fields that the cache's stored response depends upon + // s3.2: Header fields that the cache's stored response depends upon case Http::HdrType::ETAG: case Http::HdrType::KEY: case Http::HdrType::VARY: return true; - // s3.2: Header fields that are automatically processed and removed by the recipient + // s3.2: Header fields that are automatically processed and removed by the recipient case Http::HdrType::CONTENT_ENCODING: case Http::HdrType::CONTENT_MD5: case Http::HdrType::CONTENT_RANGE: case Http::HdrType::CONTENT_TYPE: return true; - // s3.2: The Content-Length header field + // s3.2: The Content-Length header field case Http::HdrType::CONTENT_LENGTH: return true; + // more complex tests using field-value are in update(fresh) case Http::HdrType::OTHER: default: return false; @@ -516,6 +517,7 @@ HttpHeader::update(HttpHeader const *fresh) if (skipUpdateHeader(e->id)) continue; + // when header value not actually changed, don't touch it. if (hasNamed(e->name, &value) && value == fresh->getByName(e->name)) continue;