diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 4509413cee3..1a03b3a88e8 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -119,6 +119,7 @@ Thank you! Daniel Beschorner Daniel Nylander Daniel O'Callaghan + Daniel Oltmanns Daniel Walter Daris A Nevil Dave Dykstra diff --git a/src/cf.data.pre b/src/cf.data.pre index 51c90a376e8..34eaf51f6ee 100644 --- a/src/cf.data.pre +++ b/src/cf.data.pre @@ -4672,7 +4672,7 @@ NAME: logformat TYPE: logformat LOC: Log::TheConfig DEFAULT: none -DEFAULT_DOC: The format definitions squid, common, combined, referrer, useragent are built in. +DEFAULT_DOC: The format definitions squid, common, combined, referrer, useragent, cef are built in. DOC_START Usage: @@ -5038,6 +5038,14 @@ DOC_START sent by Squid as a part of a master transaction do not increment the counter logged for the received request. + Squid runtime format codes: + + squid::hostname The visible hostname of this Squid instance. + Returns visible_hostname when configured, otherwise the + hostname determined at startup. + + squid::version The Squid software version string (e.g., 8.0.0-VCS). + SSL-related format codes: ssl::bump_mode SslBump decision for the transaction: @@ -5239,6 +5247,64 @@ logformat combined %>a - %[un [%tl] "%rm %ru HTTP/%rv" %>Hs %h logformat referrer %ts.%03tu %>a %{Referer}>h %ru logformat useragent %>a [%tl] "%{User-Agent}>h" + The "cef" format produces Common Event Format (CEF) lines + for SIEM ingestion. It is emitted by Squid directly so that + CEF-reserved bytes ('|' and '\\' in the header, '=' / CR / LF in + extensions) are escaped per the spec, and so that values not + otherwise exposed to logformat (notably the derived severity) can + be included. + + Reference: https://docs.microfocus.com/doc/2097/26.1/siemcefimplementationstandard + + Header: CEF:0|Squid|Squid Cache||| + Proxy Request|| + Severity reflects what Squid did with the transaction + (LogTags and error category) rather than the upstream + HTTP status: + 1 routine activity: cache hit (TCP_HIT and similar), + forwarded traffic / cache miss (TCP_MISS, TCP_TUNNEL, + refresh, ICP query, ...), TCP_REDIRECT, or TCP_DENIED + with a 401/407 auth challenge + 3 TCP_DENIED policy block (e.g. 403), UDP_DENIED, or a + Squid access/invalid-request error + 4 operational issue: connection/DNS/timeout failure, + ICAP failure, swap failure, UDP_INVALID, or a 5xx with + no other classification + Falls back to HTTP code only when no Squid signal is set + (>=500 -> 4, >=400 -> 3, otherwise 0). + + Extension fields (omitted when empty): + rt receipt time (ms since epoch) + start, end activity start/end time (ms since epoch) + src, spt client IP and port + dvc, dvchost Squid local IP and visible_hostname + dst, dpt next-hop server IP and port + dhost request URL host + proto transport (TCP for HTTP/HTTPS, UDP for ICP/HTCP) + app URL scheme with HTTP version when known + (e.g., "https/1.1") + suser authenticated or external-ACL user + requestMethod, request, requestClientApplication + method, URL, User-Agent + in, out client-request and client-reply byte totals + act Squid cache code (TCP_HIT, TCP_DENIED, ...) + outcome "success" (HTTP <400) or "failure" + cn1/cn1Label response time, ms ("ResponseTime") + cn2/cn2Label HTTP response status code ("HttpStatus") + cs1/cs1Label Referer header ("Referer") + cs2/cs2Label hierarchy code ("Hierarchy") + fileType response Content-Type + reason Squid err_type (when the transaction errored) + + If the built-in "cef" format does not fit your SIEM schema, + you can build a CEF-shaped line yourself with logformat. The + example below is a minimal starting point; extend it as + needed. Note that pipe ('|') and backslash ('\\') in header + fields, and '=' in extension values, must be escaped per the + CEF spec - the built-in "cef" format does this for you. + + logformat cef-min CEF:0|Squid|Squid Cache|%squid::version|%Ss|Proxy Request|1|rt=%ts%03tu src=%>a spt=%>p dst=%rd app=%>rs/%>rv suser=%[un requestMethod=%rm request=%ru in=%>st out=%Hs cn2Label=HttpStatus dvchost=%squid::hostname + NOTE: When the log_mime_hdrs directive is set to ON. The squid, common and combined formats have a safely encoded copy of the mime headers appended to each line within a pair of brackets. diff --git a/src/format/ByteCode.h b/src/format/ByteCode.h index 442f77d29e6..13dad31a0ff 100644 --- a/src/format/ByteCode.h +++ b/src/format/ByteCode.h @@ -175,6 +175,8 @@ typedef enum { LFT_SQUID_ERROR_DETAIL, LFT_SQUID_HIERARCHY, LFT_SQUID_REQUEST_ATTEMPTS, + LFT_SQUID_HOSTNAME, + LFT_SQUID_VERSION, LFT_MIME_TYPE, LFT_TAG, diff --git a/src/format/Format.cc b/src/format/Format.cc index 445a20712f3..8d3198f763a 100644 --- a/src/format/Format.cc +++ b/src/format/Format.cc @@ -1029,6 +1029,14 @@ Format::Format::assemble(MemBuf &mb, const AccessLogEntry::Pointer &al, int logS doint = 1; break; + case LFT_SQUID_HOSTNAME: + out = getMyHostname(); + break; + + case LFT_SQUID_VERSION: + out = VERSION; + break; + case LFT_MIME_TYPE: out = al->http.content_type; break; diff --git a/src/format/Token.cc b/src/format/Token.cc index 36b6a351562..665711f3895 100644 --- a/src/format/Token.cc +++ b/src/format/Token.cc @@ -191,6 +191,13 @@ static TokenTableEntry TokenTableTransport[] = { TokenTableEntry(nullptr, LFT_NONE) /* this must be last */ }; +/// Squid runtime identification (squid::) tokens +static TokenTableEntry TokenTableSquid[] = { + TokenTableEntry("hostname", LFT_SQUID_HOSTNAME), + TokenTableEntry("version", LFT_SQUID_VERSION), + TokenTableEntry(nullptr, LFT_NONE) /* this must be last */ +}; + #if USE_ADAPTATION static TokenTableEntry TokenTableAdapt[] = { TokenTableEntry("all_trs", LFT_ADAPTATION_ALL_XACT_TIMES), @@ -268,6 +275,7 @@ Format::Token::Init() #endif TheConfig.registerTokens(SBuf("proxy_protocol"), ::Format::TokenTableProxyProtocol); TheConfig.registerTokens(SBuf("transport"), ::Format::TokenTableTransport); + TheConfig.registerTokens(SBuf("squid"), ::Format::TokenTableSquid); } /// Scans a token table to see if the next token exists there diff --git a/src/log/Config.cc b/src/log/Config.cc index 37261373263..cd5ec1242d4 100644 --- a/src/log/Config.cc +++ b/src/log/Config.cc @@ -42,6 +42,9 @@ Log::LogConfig::BuiltInFormatName(const Format::log_type logformatType) case Log::Format::CLF_REFERER: return "referrer"; + + case Format::CLF_CEF: + return "cef"; } // forgotten (by developers) type, invalid type, or unreachable code @@ -78,6 +81,9 @@ Log::LogConfig::FindBuiltInFormat(const char *logformatName) if (strcmp(logformatName, "referrer") == 0) return Format::CLF_REFERER; + if (strcmp(logformatName, "cef") == 0) + return Format::CLF_CEF; + // CLF_NONE, CLF_UNKNOWN, CLF_CUSTOM types cannot be specified explicitly. // TODO: Ban "none" and "unknown" custom logformat names to avoid confusion. return Format::CLF_UNKNOWN; diff --git a/src/log/FormatSiemCef.cc b/src/log/FormatSiemCef.cc new file mode 100644 index 00000000000..9ff374cc184 --- /dev/null +++ b/src/log/FormatSiemCef.cc @@ -0,0 +1,364 @@ +/* + * Copyright (C) 1996-2026 The Squid Software Foundation and contributors + * + * Squid software is distributed under GPLv2+ license and includes + * contributions from numerous individuals and organizations. + * Please see the COPYING and CONTRIBUTORS files for details. + */ + +/* DEBUG: section 46 Access Log - SIEM CEF format */ + +#include "squid.h" +#include "AccessLogEntry.h" +#include "comm/Connection.h" +#include "error/Error.h" +#include "globals.h" +#include "hier_code.h" +#include "HttpRequest.h" +#include "log/File.h" +#include "log/Formats.h" +#include "sbuf/Stream.h" +#include "time/gadgets.h" +#include "tools.h" + +#if USE_AUTH +#include "auth/UserRequest.h" +#endif + +namespace { + +/// Transport protocol Squid used for this transaction, derived from the log +/// tag prefix (TCP_*, UDP_*, ICP_*). +static const char * +CefTransport(const LogTags_ot tag) +{ + switch (tag) { + case LOG_UDP_HIT: + case LOG_UDP_MISS: + case LOG_UDP_DENIED: + case LOG_UDP_INVALID: + case LOG_UDP_MISS_NOFETCH: + case LOG_ICP_QUERY: + return "UDP"; + default: + return "TCP"; + } +} + +/// CEF severity (0..10) describing what Squid did with the transaction. +/// We prefer Squid's own signals (LogTags, error category) over the upstream +/// HTTP status, since they reflect proxy behavior rather than origin replies. +static int +CefSeverity(const AccessLogEntry &al) +{ + const auto httpCode = al.http.code; + + if (const auto err = al.error()) { + switch (err->category) { + case ERR_CONNECT_FAIL: + case ERR_SECURE_CONNECT_FAIL: + case ERR_SOCKET_FAILURE: + case ERR_DNS_FAIL: + case ERR_READ_TIMEOUT: + case ERR_LIFETIME_EXP: + case ERR_READ_ERROR: + case ERR_WRITE_ERROR: + case ERR_GATEWAY_FAILURE: + case ERR_CANNOT_FORWARD: + case ERR_NO_RELAY: + case ERR_FORWARDING_DENIED: + case ERR_ICAP_FAILURE: + case ERR_INVALID_RESP: + case ERR_TOO_BIG: + return 4; + + case ERR_ACCESS_DENIED: + case ERR_CACHE_ACCESS_DENIED: + case ERR_CACHE_MGR_ACCESS_DENIED: + case ERR_INVALID_REQ: + case ERR_INVALID_URL: + case ERR_UNSUP_REQ: + case ERR_UNSUP_HTTPVERSION: + return 3; + + default: + break; + } + } + + switch (al.cache.code.oldType) { + case LOG_TCP_HIT: + case LOG_TCP_IMS_HIT: + case LOG_TCP_INM_HIT: + case LOG_TCP_MEM_HIT: + case LOG_TCP_NEGATIVE_HIT: + case LOG_TCP_OFFLINE_HIT: + case LOG_TCP_REFRESH_UNMODIFIED: + case LOG_TCP_REFRESH_FAIL_OLD: + case LOG_UDP_HIT: + return 1; + + case LOG_TCP_MISS: + case LOG_TCP_REFRESH: + case LOG_TCP_REFRESH_MODIFIED: + case LOG_TCP_CLIENT_REFRESH_MISS: + case LOG_TCP_TUNNEL: + case LOG_UDP_MISS: + case LOG_UDP_MISS_NOFETCH: + case LOG_ICP_QUERY: + return 1; + + case LOG_TCP_REDIRECT: + return 1; + + case LOG_TCP_DENIED: + case LOG_TCP_DENIED_REPLY: + // 401/407 are routine auth handshakes; 403 et al. are policy blocks + return (httpCode == 401 || httpCode == 407) ? 1 : 3; + + case LOG_UDP_DENIED: + return 3; + + case LOG_TCP_SWAPFAIL_MISS: + case LOG_TCP_REFRESH_FAIL_ERR: + case LOG_UDP_INVALID: + return 4; + + case LOG_TAG_NONE: + case LOG_TYPE_MAX: + break; + } + + if (httpCode >= 500) return 4; + if (httpCode >= 400) return 3; + + return 0; +} + +/// Stream `[data, data+len)` to `os`, escaping the CEF header-reserved bytes +/// '\\' and '|' with a leading backslash. +/// Reference: https://docs.microfocus.com/doc/2097/26.1/siemcefimplementationstandard#Character_encoding +void +appendHeader(std::ostream &os, const char *data, const size_t len) +{ + if (!data) return; + for (size_t i = 0; i < len; ++i) { + const char c = data[i]; + if (c == '\\' || c == '|') + os.put('\\'); + os.put(c); + } +} + +inline void +appendHeader(std::ostream &os, const char *cstr) +{ + if (cstr) appendHeader(os, cstr, strlen(cstr)); +} + +class FieldWriter +{ +public: + explicit FieldWriter(std::ostream &o): out(o) {} + + /// Writes ` key=value` for any value type that std::ostream knows how to + /// format (integers, const char* literals, etc.). Skips escaping; only + /// safe for caller-controlled values free of CEF-reserved bytes. + template + void put(const char *key, const T &value) { + out << ' ' << key << '=' << value; + } + + /// Writes ` key=value` with the value escaped per CEF extension rules. + void putStr(const char *key, const char *value) { + if (!value || !*value) return; + out << ' ' << key << '='; + appendExt(out, value, strlen(value)); + } + + void putStr(const char *key, const SBuf &value) { + if (value.isEmpty()) return; + out << ' ' << key << '='; + appendExt(out, value.rawContent(), value.length()); + } + +private: + std::ostream &out; + + /// Stream `[data, data+len)` to `os`, escaping the CEF extension-reserved + /// bytes '\\', '=', CR, LF. + /// Reference: https://docs.microfocus.com/doc/2097/26.1/siemcefimplementationstandard#Character_encoding + static void + appendExt(std::ostream &os, const char *data, const size_t len) + { + if (!data) return; + for (size_t i = 0; i < len; ++i) { + switch (data[i]) { + case '\\': os << "\\\\"; break; + case '=': os << "\\="; break; + case '\r': os << "\\r"; break; + case '\n': os << "\\n"; break; + default: os.put(data[i]); break; + } + } + } +}; + +} // namespace + +void +Log::Format::SiemCef(const AccessLogEntry::Pointer &al, Logfile *logfile) +{ + char clientIp[MAX_IPSTRLEN]; + al->getLogClientIp(clientIp, MAX_IPSTRLEN); + + int clientPort = 0; + char dvcAddr[MAX_IPSTRLEN] = ""; + if (al->tcpClient) { + clientPort = al->tcpClient->remote.port(); + al->tcpClient->local.toStr(dvcAddr, sizeof(dvcAddr)); + } + + char serverIp[MAX_IPSTRLEN] = ""; + int serverPort = 0; + if (al->hier.tcpServer != nullptr) { + al->hier.tcpServer->remote.toStr(serverIp, sizeof(serverIp)); + serverPort = al->hier.tcpServer->remote.port(); + } + + const SBuf method(al->getLogMethod()); + + const char *user = nullptr; +#if USE_AUTH + if (al->request && al->request->auth_user_request != nullptr) + user = al->request->auth_user_request->username(); +#endif + if (!user) + user = al->getExtUser(); + + const char *referer = nullptr; + const char *agent = nullptr; + SBuf urlScheme; + const char *urlHost = nullptr; + if (al->request) { + referer = al->request->header.getStr(Http::HdrType::REFERER); + agent = al->request->header.getStr(Http::HdrType::USER_AGENT); + urlScheme = al->request->url.getScheme().image(); + urlHost = al->request->url.host(); + } + + // CEF "app" is the application-level protocol; combine URL scheme with the + // HTTP version when known (e.g., "https/1.1") so SIEMs can filter by both. + SBuf appProto(urlScheme); + if (al->http.version.protocol == AnyP::PROTO_HTTP && al->http.version.major) { + appProto.appendf("/%u.%u", al->http.version.major, al->http.version.minor); + } + + const auto cacheCode = al->cache.code.c_str(); + const auto hierCode = hier_code_str[al->hier.code]; + + const auto startMs = static_cast(al->cache.start_time.tv_sec) * 1000LL + + (al->cache.start_time.tv_usec / 1000); + const auto trtMs = tvToMsec(al->cache.trTime); + const auto endMs = (trtMs >= 0) ? (startMs + trtMs) : -1; + + SBufStream out; + + // CEF header field order per CEF Implementation Standard: + // https://docs.microfocus.com/doc/2097/26.1/siemcefimplementationstandard#Header_information + /* Header: CEF:Version|Vendor|Product|DeviceVersion|SignatureID|Name|Severity| */ + out << "CEF:0|Squid|Squid Cache|"; + appendHeader(out, VERSION); + out << '|' << cacheCode << "|Proxy Request|" << CefSeverity(*al) << '|'; + + // CEF extensions are space-separated key=value pairs; FieldWriter::put() + // emits the leading space for us. Key names are drawn from the CEF + // Extensions dictionary: + // https://docs.microfocus.com/doc/2097/26.1/ab6eeee4916c_arcsight_extensions + /* Extensions: key1=value1 key2=value2 ... */ + FieldWriter w(out); + + /* Time (rt = receipt time; start/end mark activity boundaries) */ + if (al->cache.start_time.tv_sec > 0) { + w.put("rt", startMs); + w.put("start", startMs); + if (endMs >= 0) + w.put("end", endMs); + } + + /* Client side */ + if (clientIp[0] && !(clientIp[0] == '-' && clientIp[1] == '\0')) + w.putStr("src", clientIp); + if (clientPort > 0) + w.put("spt", clientPort); + + /* Squid (device) end of the client TCP connection */ + if (dvcAddr[0]) + w.putStr("dvc", dvcAddr); + w.putStr("dvchost", getMyHostname()); + + /* Server side */ + if (serverIp[0]) + w.putStr("dst", serverIp); + if (serverPort > 0) + w.put("dpt", serverPort); + w.putStr("dhost", urlHost); + + /* Protocol */ + w.put("proto", CefTransport(al->cache.code.oldType)); + w.putStr("app", appProto); + + /* User */ + w.putStr("suser", user); + + /* Request line */ + w.putStr("requestMethod", method); + w.putStr("request", al->url); + w.putStr("requestClientApplication", agent); + + /* Bytes */ + w.put("in", al->http.clientRequestSz.messageTotal()); + w.put("out", al->http.clientReplySz.messageTotal()); + + /* Action / outcome */ + w.putStr("act", cacheCode); + w.put("outcome", al->http.code >= 400 ? "failure" : "success"); + + /* Response time (ms). cn1 is a numeric custom field; cn1Label names it. */ + if (trtMs >= 0) { + w.put("cn1", trtMs); + w.put("cn1Label", "ResponseTime"); + } + + /* HTTP status code (cn2) */ + if (al->http.code > 0) { + w.put("cn2", al->http.code); + w.put("cn2Label", "HttpStatus"); + } + + /* Referer (cs1) */ + if (referer && *referer) { + w.putStr("cs1", referer); + w.put("cs1Label", "Referer"); + } + + /* Hierarchy code (cs2) */ + if (hierCode && *hierCode) { + w.putStr("cs2", hierCode); + w.put("cs2Label", "Hierarchy"); + } + + /* Response Content-Type */ + w.putStr("fileType", al->http.content_type); + + /* Reason for failure */ + if (const auto err = al->error()) { + if (err->category != ERR_NONE) + w.putStr("reason", errorTypeName(err->category)); + } + + out << '\n'; + const auto buf = out.buf(); + logfileWrite(logfile, buf.rawContent(), buf.length()); +} + diff --git a/src/log/Formats.h b/src/log/Formats.h index 43e010d628c..dcb60034388 100644 --- a/src/log/Formats.h +++ b/src/log/Formats.h @@ -25,6 +25,7 @@ namespace Format typedef enum { CLF_UNKNOWN, + CLF_CEF, CLF_COMBINED, CLF_COMMON, CLF_CUSTOM, @@ -58,6 +59,9 @@ void HttpdCommon(const AccessLogEntryPointer &al, Logfile * logfile); /// Log with Apache httpd combined format void HttpdCombined(const AccessLogEntryPointer &al, Logfile * logfile); +/// Display log details in SIEM Common Event Format. +void SiemCef(const AccessLogEntryPointer &al, Logfile * logfile); + }; // namespace Format }; // namespace Log diff --git a/src/log/Makefile.am b/src/log/Makefile.am index 51aa345e32b..6c79f131254 100644 --- a/src/log/Makefile.am +++ b/src/log/Makefile.am @@ -23,6 +23,7 @@ liblog_la_SOURCES = \ File.h \ FormatHttpdCombined.cc \ FormatHttpdCommon.cc \ + FormatSiemCef.cc \ FormatSquidCustom.cc \ FormatSquidIcap.cc \ FormatSquidNative.cc \ diff --git a/src/log/access_log.cc b/src/log/access_log.cc index 1f0f12c4723..b1f3b7866c7 100644 --- a/src/log/access_log.cc +++ b/src/log/access_log.cc @@ -108,6 +108,10 @@ accessLogLogTo(CustomLog *log, const AccessLogEntryPointer &al, ACLChecklist *ch Log::Format::SquidUserAgent(al, log->logfile); break; + case Log::Format::CLF_CEF: + Log::Format::SiemCef(al, log->logfile); + break; + case Log::Format::CLF_CUSTOM: Log::Format::SquidCustom(al, log); break; diff --git a/src/tests/stub_liblog.cc b/src/tests/stub_liblog.cc index cd971198b20..30b7faa3343 100644 --- a/src/tests/stub_liblog.cc +++ b/src/tests/stub_liblog.cc @@ -73,6 +73,7 @@ void SquidReferer(const AccessLogEntryPointer &, Logfile *) STUB void SquidCustom(const AccessLogEntryPointer &, CustomLog *) STUB void HttpdCommon(const AccessLogEntryPointer &, Logfile *) STUB void HttpdCombined(const AccessLogEntryPointer &, Logfile *) STUB +void SiemCef(const AccessLogEntryPointer &, Logfile *) STUB } }