diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index 4798a2414d0f..719c36e698c1 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -685,6 +685,12 @@ void AccessControl::restoreFromBackup(RestorerFromBackup & restorer, const Strin void AccessControl::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config) { + /// Re-read `enable_token_auth` on every config reload. `setupFromMainConfig` + /// runs only once at startup, so without this re-sync flipping the flag in + /// the config and triggering a reload would silently leave the previous + /// value in place -- operators who toggle token auth off in response to an + /// IdP outage or a credential leak would see no effect until restart. + setTokenAuthEnabled(config.getBool("enable_token_auth", true)); external_authenticators->setConfiguration(config, getLogger(), isTokenAuthEnabled()); } diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index dd8ea85f767a..5c6e0716ad74 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -150,6 +150,13 @@ bool AuthenticationData::Util::checkPasswordBcrypt(std::string_view password [[m bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs) { + /// `MemoryAccessStorage::updateNoLock` short-circuits when the existing + /// entity equals the new one, so any field omitted from this comparator + /// becomes invisible to ALTER USER -- same-type ALTER would silently + /// no-op. JWT users carry two extra fields (`token_processor_name` and + /// `jwt_claims`) and they MUST take part in equality, otherwise re-pinning + /// a JWT user via ALTER USER is a no-op (CREATE USER OR REPLACE works + /// only by accident, via storage->insertOrReplace). return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash) && (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm) #if USE_SSL @@ -160,6 +167,8 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs) #endif && (lhs.http_auth_scheme == rhs.http_auth_scheme) && (lhs.http_auth_server_name == rhs.http_auth_server_name) + && (lhs.token_processor_name == rhs.token_processor_name) + && (lhs.jwt_claims == rhs.jwt_claims) && (lhs.valid_until == rhs.valid_until); } @@ -405,9 +414,22 @@ boost::intrusive_ptr AuthenticationData::toAST() const } case AuthenticationType::JWT: { + /// Round-trip into the same shape the parser produces: PROCESSOR + /// child first (when set), CLAIMS child after (when set), with the + /// AST flags telling the formatter which slot is which. + const auto & processor_name = getTokenProcessorName(); + if (!processor_name.empty()) + { + node->has_jwt_processor = true; + node->children.push_back(make_intrusive(processor_name)); + } + const auto & claims = getJWTClaims(); if (!claims.empty()) + { + node->has_jwt_claims = true; node->children.push_back(make_intrusive(claims)); + } break; } case AuthenticationType::KERBEROS: @@ -689,9 +711,22 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que #if USE_JWT_CPP else if (query.type == AuthenticationType::JWT) { - if (!args.empty()) + /// `query.has_jwt_processor` and `query.has_jwt_claims` describe which + /// of the two optional clauses the parser saw. Children are pushed in + /// PROCESSOR-then-CLAIMS order, so we walk them in that order. + size_t arg_idx = 0; + + if (query.has_jwt_processor) + { + String processor_name = checkAndGetLiteralArgument(args[arg_idx++], "processor"); + if (processor_name.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "JWT 'PROCESSOR' name must not be empty"); + auth_data.setTokenProcessorName(processor_name); + } + + if (query.has_jwt_claims) { - String value = checkAndGetLiteralArgument(args[0], "claims"); + String value = checkAndGetLiteralArgument(args[arg_idx++], "claims"); picojson::value json_obj; auto error = picojson::parse(json_obj, value); if (!error.empty()) diff --git a/src/Access/Common/JWKSProvider.cpp b/src/Access/Common/JWKSProvider.cpp index 16c0dbba2423..8809f35b1e86 100644 --- a/src/Access/Common/JWKSProvider.cpp +++ b/src/Access/Common/JWKSProvider.cpp @@ -2,7 +2,11 @@ #if USE_JWT_CPP #include +#include +#include #include +#include +#include #include #include #include @@ -21,28 +25,66 @@ namespace ErrorCodes JWKSType JWKSClient::getJWKS() { + /// `last_request_send` semantics: timestamp of the most recent fetch + /// *attempt*, success or failure. Updated unconditionally before the + /// HTTP call so a failed fetch doesn't leave the timestamp stale and + /// invite every concurrent thread to re-hammer a failing endpoint + /// (L-02). Within `refresh_timeout` of an attempt: + /// - if a previously-successful JWKS is cached, serve it. + /// - otherwise, throw a "fetch in cooldown" exception so callers + /// don't queue up new attempts during the back-off window. + { std::shared_lock lock(mutex); - auto now = std::chrono::high_resolution_clock::now(); + auto now = std::chrono::steady_clock::now(); auto diff = std::chrono::duration(now - last_request_send).count(); - if (diff < static_cast(refresh_timeout) && cached_jwks.has_value()) - return cached_jwks.value(); + if (diff < static_cast(refresh_timeout)) + { + if (cached_jwks.has_value()) + return cached_jwks.value(); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "JWKS endpoint at '{}' is in cooldown after a recent failed fetch; will retry after the cache lifetime elapses", + jwks_uri.toString()); + } } std::unique_lock lock(mutex); auto now = std::chrono::high_resolution_clock::now(); auto diff = std::chrono::duration(now - last_request_send).count(); - if (diff < static_cast(refresh_timeout) && cached_jwks.has_value()) - return cached_jwks.value(); + if (diff < static_cast(refresh_timeout)) + { + if (cached_jwks.has_value()) + return cached_jwks.value(); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "JWKS endpoint at '{}' is in cooldown after a recent failed fetch; will retry after the cache lifetime elapses", + jwks_uri.toString()); + } + + /// Mark the attempt before issuing the network call so that even if the + /// fetch throws, subsequent waiters on this mutex see an updated + /// `last_request_send` and short-circuit via the cooldown branches above + /// instead of repeating the failing fetch back-to-back. + last_request_send = now; Poco::Net::HTTPResponse response; std::string response_string; Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, jwks_uri.getPathAndQuery()}; + /// Bound every JWKS fetch to a known limit. Without this, Poco's default + /// `HTTPSession` timeout of 60 seconds applies, and because the JWKS fetch + /// runs while `ExternalAuthenticators::mutex` is held by the outer + /// `checkTokenCredentials` call, a single slow or hung JWKS endpoint would + /// stall the whole auth subsystem (LDAP, Kerberos, HTTP basic, all other + /// token auth paths) for up to a full minute per request. 10 seconds is a + /// conservative cap: well above any healthy provider latency, well below + /// the default. + const Poco::Timespan jwks_http_timeout(/*seconds=*/10, 0); + if (jwks_uri.getScheme() == "https") { Poco::Net::HTTPSClientSession session = Poco::Net::HTTPSClientSession(jwks_uri.getHost(), jwks_uri.getPort()); + session.setTimeout(jwks_http_timeout, jwks_http_timeout, jwks_http_timeout); session.sendRequest(request); std::istream & response_stream = session.receiveResponse(response); if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK || !response_stream) @@ -53,6 +95,7 @@ JWKSType JWKSClient::getJWKS() else { Poco::Net::HTTPClientSession session = Poco::Net::HTTPClientSession(jwks_uri.getHost(), jwks_uri.getPort()); + session.setTimeout(jwks_http_timeout, jwks_http_timeout, jwks_http_timeout); session.sendRequest(request); std::istream & response_stream = session.receiveResponse(response); if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK || !response_stream) @@ -60,8 +103,6 @@ JWKSType JWKSClient::getJWKS() Poco::StreamCopier::copyToString(response_stream, response_string); } - last_request_send = std::chrono::high_resolution_clock::now(); - JWKSType parsed_jwks; try @@ -92,11 +133,18 @@ StaticJWKSParams::StaticJWKSParams(const std::string & static_jwks_, const std:: StaticJWKS::StaticJWKS(const StaticJWKSParams & params) { + static_jwks_file = params.static_jwks_file; + String content = String(params.static_jwks); - if (!params.static_jwks_file.empty()) + if (!static_jwks_file.empty()) { - std::ifstream ifs(params.static_jwks_file); + std::ifstream ifs(static_jwks_file); Poco::StreamCopier::copyToString(ifs, content); + /// Record the mtime so subsequent `getJWKS()` calls can notice rotation. + std::error_code ec; + const auto write_time = std::filesystem::last_write_time(static_jwks_file, ec); + if (!ec) + last_loaded_mtime = write_time; } try { @@ -109,5 +157,70 @@ StaticJWKS::StaticJWKS(const StaticJWKSParams & params) } } +void StaticJWKS::reloadFromFileIfChangedNoLock() +{ + /// Inline `static_jwks` source: nothing to refresh from disk. + if (static_jwks_file.empty()) + return; + + std::error_code ec; + const auto mtime = std::filesystem::last_write_time(static_jwks_file, ec); + if (ec) + { + /// File disappeared or became unreadable. Keep the previously-loaded + /// keys -- failing closed here would lock everyone out on a transient + /// filesystem hiccup. The operator gets a log signal. + LOG_WARNING(getLogger("TokenAuthentication"), + "StaticJWKS: failed to stat '{}' for refresh ({}); keeping previously-loaded keys.", + static_jwks_file, ec.message()); + return; + } + if (mtime <= last_loaded_mtime) + return; + + /// File has been rotated. Read + parse + swap. + String content; + try + { + std::ifstream ifs(static_jwks_file); + Poco::StreamCopier::copyToString(ifs, content); + auto new_keys = jwt::parse_jwks(content); + jwks = std::move(new_keys); + last_loaded_mtime = mtime; + LOG_INFO(getLogger("TokenAuthentication"), + "StaticJWKS: reloaded keys from '{}' after detecting mtime change.", static_jwks_file); + } + catch (const std::exception & e) + { + /// Malformed new JWKS: keep the old one. Loud signal so the operator + /// knows the rotation didn't take. + LOG_ERROR(getLogger("TokenAuthentication"), + "StaticJWKS: failed to parse '{}' on refresh: {}; keeping previously-loaded keys.", + static_jwks_file, e.what()); + } +} + +JWKSType StaticJWKS::getJWKS() +{ + /// Fast path: shared lock + mtime check. Refresh under exclusive lock only + /// when the file actually changed. + { + std::shared_lock lock(mutex); + if (static_jwks_file.empty()) + return jwks; + + std::error_code ec; + const auto mtime = std::filesystem::last_write_time(static_jwks_file, ec); + if (ec) + return jwks; + if (mtime <= last_loaded_mtime) + return jwks; + } + + std::unique_lock lock(mutex); + reloadFromFileIfChangedNoLock(); + return jwks; +} + } #endif diff --git a/src/Access/Common/JWKSProvider.h b/src/Access/Common/JWKSProvider.h index 566effd6e21e..21d52e42917d 100644 --- a/src/Access/Common/JWKSProvider.h +++ b/src/Access/Common/JWKSProvider.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -44,7 +45,9 @@ class JWKSClient : public IJWKSProvider std::shared_mutex mutex; std::optional cached_jwks; - std::chrono::time_point last_request_send; + /// `steady_clock` (not `system_clock`): refresh-cooldown is an elapsed-time + /// measurement; a wall-clock jump must not skip or freeze it. + std::chrono::time_point last_request_send; }; struct StaticJWKSParams @@ -60,12 +63,25 @@ class StaticJWKS : public IJWKSProvider public: explicit StaticJWKS(const StaticJWKSParams ¶ms); + /// Reload the JWKS from disk if `static_jwks_file` was specified and the + /// file's mtime has advanced since the last load. Inline `static_jwks` + /// (no file path) is returned from the in-memory copy without I/O. + /// Without this, rotating the underlying file did NOT refresh the + /// in-memory keys -- admins had to trigger a full + /// `setExternalAuthenticatorsConfig` reload to pick up the new file. + JWKSType getJWKS() override; + private: - JWKSType getJWKS() override - { - return jwks; - } + void reloadFromFileIfChangedNoLock(); + + /// Source path -- empty when JWKS came from inline `` config. + String static_jwks_file; + /// `mtime` of the file at the most recent successful load. Used to detect + /// rotation. `file_time_type::min()` means "not loaded from a file" or + /// "never seen the file yet". + std::filesystem::file_time_type last_loaded_mtime = std::filesystem::file_time_type::min(); + mutable std::shared_mutex mutex; JWKSType jwks; }; diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 18f36cc4cec3..625acb996f38 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -296,7 +296,11 @@ void ExternalAuthenticators::reset() resetImpl(); } -void parseTokenProcessors(std::unordered_map> & token_processors, +/// Parse all token processors as an all-or-nothing operation. +/// +/// Throws if ANY processor fails to parse. The caller is expected to react by +/// disabling token authentication for this configuration cycle (fail-closed). +void parseTokenProcessors(std::map> & token_processors, const Poco::Util::AbstractConfiguration & config, const String & token_processors_config, LoggerPtr log) @@ -304,20 +308,26 @@ void parseTokenProcessors(std::unordered_map> parsed; for (const auto & processor : token_processors_keys) { String prefix = fmt::format("{}.{}", token_processors_config, processor); try { - token_processors[processor] = ITokenProcessor::parseTokenProcessor(config, prefix, processor); + parsed[processor] = ITokenProcessor::parseTokenProcessor(config, prefix, processor); } catch (...) { - tryLogCurrentException(log, "Could not parse token processor" + backQuote(processor)); + tryLogCurrentException(log, "Could not parse token processor " + backQuote(processor)); + /// Re-throw so the caller fails. + throw; } } + + token_processors = std::move(parsed); } bool ExternalAuthenticators::isTokenAuthEnabled() const @@ -326,6 +336,16 @@ bool ExternalAuthenticators::isTokenAuthEnabled() const return token_auth_enabled; } +bool ExternalAuthenticators::hasTokenProcessor(const String & name) const +{ + std::lock_guard lock(mutex); + if (!token_auth_enabled) + return false; + if (name.empty()) + return true; + return token_processors.contains(name); +} + void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfiguration & config, LoggerPtr log, bool token_auth_enabled_) { std::lock_guard lock(mutex); @@ -431,7 +451,22 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur } if (token_auth_enabled) - parseTokenProcessors(token_processors, config, token_processors_config, log); + { + try + { + parseTokenProcessors(token_processors, config, token_processors_config, log); + } + catch (...) + { + /// Fail closed: if any token processor failed to parse, refuse to + /// activate token auth at all for this config cycle. + tryLogCurrentException(log, + "One or more token processors failed to parse; " + "disabling token authentication entirely until the configuration is fixed"); + token_processors.clear(); + token_auth_enabled = false; + } + } else LOG_INFO(log, "Token authentication is disabled, skipping token processors configuration"); } @@ -615,113 +650,233 @@ HTTPAuthClientParams ExternalAuthenticators::getHTTPAuthenticationParams(const S bool ExternalAuthenticators::checkCredentialsAgainstProcessor(const ITokenProcessor & processor, TokenCredentials & credentials) const { - if (processor.resolveAndValidate(credentials)) + if (!processor.resolveAndValidate(credentials)) { - TokenCacheEntry cache_entry; - cache_entry.user_name = credentials.getUserName(); - cache_entry.external_roles = credentials.getGroups(); - - auto default_expiration_ts = std::chrono::system_clock::now() - + std::chrono::seconds(processor.getTokenCacheLifetime()); - - if (credentials.getExpiresAt().has_value()) - { - if (credentials.getExpiresAt().value() < default_expiration_ts) - cache_entry.expires_at = credentials.getExpiresAt().value(); - else - { - LOG_TRACE(getLogger("AccessTokenAuthentication"), "Token for user {} expires after default cache lifetime; using default TTL by {}", credentials.getUserName(), processor.getProcessorName()); - cache_entry.expires_at = default_expiration_ts; - } - } - else - { - cache_entry.expires_at = default_expiration_ts; - } - - LOG_DEBUG(getLogger("AccessTokenAuthentication"), "Authenticated user {} with access token by {}", credentials.getUserName(), processor.getProcessorName()); + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Failed authentication with access token by {}", processor.getProcessorName()); + return false; + } - // CHeck if a cache entry for the same user but with another token exists -- old cache entry is considered outdated and removed - auto old_token_iter = username_to_access_token_cache.find(cache_entry.user_name); - if (old_token_iter != username_to_access_token_cache.end()) + /// Clamp the credentials' expires_at to the processor's cache lifetime so + /// upper layers (notably `Session`) bind their lifetime to whichever is + /// shorter -- the token's own expiry or the operator-configured TTL. This + /// is a *post-validation finalization* of the credentials, not a cache + /// write; the actual token-cache entry is written by `primeTokenCache`, + /// and only after any per-user `jwt_claims` policy has also accepted the + /// token (see `checkTokenCredentials`). + auto default_expiration_ts = std::chrono::system_clock::now() + + std::chrono::seconds(processor.getTokenCacheLifetime()); + + if (credentials.getExpiresAt().has_value()) + { + if (credentials.getExpiresAt().value() >= default_expiration_ts) { - access_token_to_username_cache.erase(old_token_iter->second); - username_to_access_token_cache.erase(old_token_iter); + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Token for user {} expires after default cache lifetime; using default TTL by {}", credentials.getUserName(), processor.getProcessorName()); + credentials.setExpiresAt(default_expiration_ts); } - - access_token_to_username_cache[credentials.getToken()] = cache_entry; - username_to_access_token_cache[cache_entry.user_name] = credentials.getToken(); - LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} added", cache_entry.user_name); - - return true; } - LOG_TRACE(getLogger("AccessTokenAuthentication"), "Failed authentication with access token by {}", processor.getProcessorName()); + else + { + credentials.setExpiresAt(default_expiration_ts); + } - return false; + LOG_DEBUG(getLogger("AccessTokenAuthentication"), "Authenticated user {} with access token by {}", quoteString(credentials.getUserName()), processor.getProcessorName()); + return true; } -bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & credentials, const String & processor_name, const String & jwt_claims) const +void ExternalAuthenticators::primeTokenCache(const ITokenProcessor & processor, + const TokenCredentials & credentials) const { - std::lock_guard lock{mutex}; + /// Build a cache entry from the credentials state that + /// `checkCredentialsAgainstProcessor` finalized. The caller is responsible + /// for invoking this only after both processor validation AND the per-user + /// `jwt_claims` policy have accepted the token -- caching before claims + /// have been evaluated would let later unconstrained lookups (e.g. the + /// HTTP/TCP pre-user-lookup call which passes empty `jwt_claims`) hit a + /// cache entry that never actually satisfied the user's policy. + TokenCacheEntry cache_entry; + cache_entry.user_name = credentials.getUserName(); + cache_entry.external_roles = credentials.getGroups(); + cache_entry.processor_name = processor.getProcessorName(); + cache_entry.expires_at = credentials.getExpiresAt().value_or( + std::chrono::system_clock::now() + std::chrono::seconds(processor.getTokenCacheLifetime())); + + /// If the same token already has a forward entry that maps to a DIFFERENT + /// user_name, clean up the stale reverse entry for that other user before + /// we overwrite the forward entry. This happens when two processors extract + /// different `username_claim` values from the same token (e.g. processor X + /// uses `sub`, processor Y uses `email`): without this, the rotation step + /// below would not see the old user's entry in the reverse map and the + /// bi-map would diverge -- forward saying token -> new_user while a stale + /// reverse says old_user -> token, surfacing later as a dangling reverse + /// pointer that breaks the single-token-per-user invariant. + auto existing_forward = access_token_to_username_cache.find(credentials.getToken()); + if (existing_forward != access_token_to_username_cache.end() + && existing_forward->second.user_name != cache_entry.user_name) + { + auto stale_reverse = username_to_access_token_cache.find(existing_forward->second.user_name); + if (stale_reverse != username_to_access_token_cache.end() + && stale_reverse->second == credentials.getToken()) + username_to_access_token_cache.erase(stale_reverse); + } - if (!token_auth_enabled) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Token authentication is disabled"); + /// If a previous entry exists for the same user under a different token, + /// drop it -- the user has rotated tokens and the old one is now stale. + auto old_token_iter = username_to_access_token_cache.find(cache_entry.user_name); + if (old_token_iter != username_to_access_token_cache.end()) + { + access_token_to_username_cache.erase(old_token_iter->second); + username_to_access_token_cache.erase(old_token_iter); + } - if (token_processors.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Token authentication is not configured"); + access_token_to_username_cache[credentials.getToken()] = cache_entry; + username_to_access_token_cache[cache_entry.user_name] = credentials.getToken(); + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} added", quoteString(cache_entry.user_name)); +} - /// Per-user claims restriction applies only to JWT processors; opaque/access token processors ignore it. +bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & credentials, + const String & processor_name, + const String & jwt_claims, + bool prime_cache_on_success) const +{ + /// Per-user claims restriction is binding: when a user is configured with `jwt_claims`, + /// authentication is only allowed via processors that can actually evaluate those claims + /// (i.e. JWT processors). If the resolving processor cannot enforce the restriction we + /// must deny -- silently treating it as "no restriction" would let an opaque/access-token + /// processor authenticate a token that fails the user's per-user policy. auto check_claims_if_required = [&](const ITokenProcessor & processor) -> bool { if (jwt_claims.empty()) return true; if (!processor.supportsJwtClaimsRestriction()) - return true; + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), + "Processor {} does not support per-user JWT claims restriction; " + "denying authentication that requires claims to be checked", + processor.getProcessorName()); + return false; + } return processor.checkClaims(credentials, jwt_claims); }; - /// lookup token in local cache if not expired. - auto cached_entry_iter = access_token_to_username_cache.find(credentials.getToken()); - if (cached_entry_iter != access_token_to_username_cache.end()) + /// Snapshot the processor set under the mutex, then run the expensive + /// crypto verify WITHOUT the mutex (M-20). `shared_ptr` keeps each + /// processor alive even if a config reload swaps `token_processors` in + /// the middle of validation. Cache lookup stays under the mutex. + std::map> processors_snapshot; + { - if (cached_entry_iter->second.expires_at <= std::chrono::system_clock::now()) // Token found in cache, but already outdated -- need to remove it. - { - const auto expired_user_name = cached_entry_iter->second.user_name; - LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} expired, removing", expired_user_name); - access_token_to_username_cache.erase(cached_entry_iter); - username_to_access_token_cache.erase(expired_user_name); - } - else + std::lock_guard lock{mutex}; + + if (!token_auth_enabled) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Token authentication is disabled"); + + if (token_processors.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Token authentication is not configured"); + + /// lookup token in local cache if not expired. + auto cached_entry_iter = access_token_to_username_cache.find(credentials.getToken()); + if (cached_entry_iter != access_token_to_username_cache.end()) { - const auto & user_data = cached_entry_iter->second; - const_cast(credentials).setUserName(user_data.user_name); - const_cast(credentials).setGroups(user_data.external_roles); - LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} found, using it to authenticate", cached_entry_iter->second.user_name); - if (!jwt_claims.empty()) + if (cached_entry_iter->second.expires_at <= std::chrono::system_clock::now()) // Token found in cache, but already outdated -- need to remove it. { - if (processor_name.empty()) - return false; - const auto it = token_processors.find(processor_name); - if (it == token_processors.end() || !check_claims_if_required(*it->second)) - return false; + const auto expired_user_name = cached_entry_iter->second.user_name; + const auto expired_token = cached_entry_iter->first; + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} expired, removing", quoteString(expired_user_name)); + access_token_to_username_cache.erase(cached_entry_iter); + + /// Only unlink the reverse mapping if it currently points at the token + /// we just evicted. The bi-map invariant is maintained by + /// `primeTokenCache`, but if a reverse entry is somehow stale (or if a + /// concurrent rotation under the same mutex hold has already pointed + /// the user's reverse mapping at a fresh, still-valid token), erasing + /// blindly here would unlink that fresh token's reverse entry -- + /// silently breaking the single-token-per-user invariant and extending + /// the stale token's effective retention. + auto reverse_it = username_to_access_token_cache.find(expired_user_name); + if (reverse_it != username_to_access_token_cache.end() && reverse_it->second == expired_token) + username_to_access_token_cache.erase(reverse_it); + } + /// Enforce the per-user processor pin even on cache hit. A cache entry produced by + /// processor A must NOT be used to satisfy an authentication request that is pinned + /// to a different processor B.When the caller did not pin a processor (processor_name is + /// empty) any cached entry is acceptable. + else if (processor_name.empty() || processor_name == cached_entry_iter->second.processor_name) + { + /// Evaluate per-user claims FIRST, before mutating the outer + /// `TokenCredentials`. The `const_cast`-ed `setUserName`/`setGroups`/ + /// `setExpiresAt` writes below would otherwise leak the cached + /// identity into the caller's credentials object even on rejection. + if (!jwt_claims.empty()) + { + const auto it = token_processors.find(cached_entry_iter->second.processor_name); + if (it == token_processors.end() || !check_claims_if_required(*it->second)) + return false; + } + + const auto & user_data = cached_entry_iter->second; + const_cast(credentials).setUserName(user_data.user_name); + const_cast(credentials).setGroups(user_data.external_roles); + const_cast(credentials).setExpiresAt(user_data.expires_at); + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} found, using it to authenticate", quoteString(user_data.user_name)); + return true; + } + else + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), + "Cached token entry was produced by processor {}, but authentication is pinned to {}; " + "ignoring cache and re-authenticating via the pinned processor", + cached_entry_iter->second.processor_name, processor_name); } - return true; } + + processors_snapshot = token_processors; } + /// Validation path runs WITHOUT the mutex. RSA/ECDSA verifies and any + /// expensive claim matching no longer serialize the auth subsystem. + auto try_processor = [&](const std::shared_ptr & proc) -> std::optional + { + if (!checkCredentialsAgainstProcessor(*proc, const_cast(credentials))) + return std::nullopt; + if (!check_claims_if_required(*proc)) + return false; + if (prime_cache_on_success) + { + std::lock_guard lock{mutex}; + primeTokenCache(*proc, credentials); + } + return true; + }; + if (processor_name.empty()) { - for (const auto & it : token_processors) + for (const auto & [name, proc] : processors_snapshot) { - if (checkCredentialsAgainstProcessor(*it.second, const_cast(credentials))) - return check_claims_if_required(*it.second); + if (!jwt_claims.empty() && !proc->supportsJwtClaimsRestriction()) + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), + "Skipping processor {} during auto-discovery: it cannot enforce per-user JWT claims", + proc->getProcessorName()); + continue; + } + if (auto result = try_processor(proc); result.has_value()) + return *result; } } else { - const auto it = token_processors.find(processor_name); - if (it != token_processors.end() && checkCredentialsAgainstProcessor(*it->second, const_cast(credentials))) - return check_claims_if_required(*it->second); + const auto it = processors_snapshot.find(processor_name); + if (it == processors_snapshot.end()) + return false; + if (!jwt_claims.empty() && !it->second->supportsJwtClaimsRestriction()) + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), + "Pinned processor {} cannot enforce per-user JWT claims; denying authentication", + it->second->getProcessorName()); + return false; + } + if (auto result = try_processor(it->second); result.has_value()) + return *result; } return false; diff --git a/src/Access/ExternalAuthenticators.h b/src/Access/ExternalAuthenticators.h index 1c539fa1917c..1486226f5bd0 100644 --- a/src/Access/ExternalAuthenticators.h +++ b/src/Access/ExternalAuthenticators.h @@ -43,13 +43,34 @@ class ExternalAuthenticators bool isTokenAuthEnabled() const; + /// Returns true if a token processor with the given name is currently + /// configured. Used by `Session::checkIfUserIsStillValid` to terminate + /// active sessions whose authenticating processor was removed by config + /// reload (M-28). Empty `name` is treated as "no specific pin" and + /// returns true (token auth must still be enabled, of course). + bool hasTokenProcessor(const String & name) const; + // The name and readiness of the credentials must be verified before calling these. bool checkLDAPCredentials(const String & server, const BasicCredentials & credentials, const LDAPClient::RoleSearchParamsList * role_search_params = nullptr, LDAPClient::SearchResultsList * role_search_results = nullptr) const; bool checkKerberosCredentials(const String & realm, const GSSAcceptorContext & credentials) const; bool checkHTTPBasicCredentials(const String & server, const BasicCredentials & credentials, const ClientInfo & client_info, SettingsChanges & settings) const; - bool checkTokenCredentials(const TokenCredentials & credentials, const String & processor_name = "", const String & jwt_claims = "") const; + /// `prime_cache_on_success` controls whether a successful validation populates the + /// token cache. Per-user authentication paths (the chain reached from + /// `Session::authenticate`) leave this at the default `true` -- their result is + /// gated by the user's pinned processor and per-user JWT claims, so the cache + /// entry it produces is safe to consult on subsequent requests. The HTTP and TCP + /// bearer entry points authenticate the token *before* the user is known + /// (they need the username from the token to drive user lookup) and so call + /// this with `false`: their decision is made under no processor pin and no + /// claims constraint, and a cache entry written from that context would be + /// trusted by a later per-user call whose `processor_name` is empty -- bypassing + /// the per-user processor and claim selection that would otherwise occur. + bool checkTokenCredentials(const TokenCredentials & credentials, + const String & processor_name = "", + const String & jwt_claims = "", + bool prime_cache_on_success = true) const; GSSAcceptorContext::Params getKerberosParams() const; @@ -72,13 +93,31 @@ class ExternalAuthenticators mutable LDAPCaches ldap_caches TSA_GUARDED_BY(mutex) ; std::optional kerberos_params TSA_GUARDED_BY(mutex) ; std::unordered_map http_auth_servers TSA_GUARDED_BY(mutex) ; - mutable std::unordered_map> token_processors TSA_GUARDED_BY(mutex) ; + /// Ordered (std::map, not unordered_map) so that the auto-discovery + /// dispatch order in `checkTokenCredentials` is deterministic across + /// process runs. Without an ordering, the iteration order of + /// `unordered_map` is implementation-defined and may differ run-to-run + /// or after rehashing -- which means the same unpinned token can be + /// validated by processor A in one run and processor B in another, + /// producing different cached identities, different role mappings (each + /// processor has its own `groups_claim`), and surprising debugging + /// outcomes. Alphabetical-by-name order makes "first to succeed wins" + /// stable and predictable from configuration alone. + /// + /// `shared_ptr` so callers can snapshot the relevant processor pointer + /// (or the whole map) under the mutex, RELEASE the mutex, and run the + /// expensive crypto verify without serializing the entire auth + /// subsystem behind a single attacker-driven RSA verify (M-20). Cheap: + /// processor count is tiny, snapshot is shared_ptr copies. + mutable std::map> token_processors TSA_GUARDED_BY(mutex) ; struct TokenCacheEntry { std::chrono::system_clock::time_point expires_at; String user_name; std::set external_roles; + /// Name of the token processor that produced this cache entry. + String processor_name; }; /// Home-made simple bi-mapping, needed to effectively clean up cache from old tokens. @@ -90,8 +129,21 @@ class ExternalAuthenticators bool token_auth_enabled TSA_GUARDED_BY(mutex) = true; + /// Validates the credentials with the given processor. On success, mutates + /// `credentials` (user name, groups, effective expires_at) and returns true. + /// Does NOT write the token cache -- caching is the responsibility of the + /// caller, after the per-user `jwt_claims` policy has been evaluated. + /// + /// MUST be called WITHOUT holding `mutex`: this is the expensive crypto + /// path (M-20). The processor must be passed by `shared_ptr` so it + /// outlives a concurrent config reload that resets `token_processors`. bool checkCredentialsAgainstProcessor(const ITokenProcessor & processor, - TokenCredentials & credentials) const TSA_REQUIRES(mutex); + TokenCredentials & credentials) const; + + /// Writes the per-token cache entry. Must be called only after both processor + /// validation AND any per-user `jwt_claims` policy have accepted the token. + void primeTokenCache(const ITokenProcessor & processor, + const TokenCredentials & credentials) const TSA_REQUIRES(mutex); void resetImpl() TSA_REQUIRES(mutex); }; diff --git a/src/Access/TokenAccessStorage.cpp b/src/Access/TokenAccessStorage.cpp index 53a6313aefc7..089f371b583e 100644 --- a/src/Access/TokenAccessStorage.cpp +++ b/src/Access/TokenAccessStorage.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -106,24 +107,17 @@ namespace return result; } - String applyTransform(const String & input, const String & pattern, const String & replacement, bool global) + String applyTransform(const String & input, const re2::RE2 & re, const String & replacement, bool global) { - if (pattern.empty()) - return input; - - re2::RE2 re(pattern); - if (!re.ok()) - return input; - + /// `re` is precompiled at storage construction (the constructor refuses + /// to load with an invalid pattern, so by the time we get here the + /// regex is guaranteed to be `ok()`). No per-call recompilation; no + /// silent no-op on a bad pattern. String result = input; if (global) - { RE2::GlobalReplace(&result, re, replacement); - } else - { RE2::Replace(&result, re, replacement); - } return result; } } @@ -137,13 +131,51 @@ TokenAccessStorage::TokenAccessStorage(const String & storage_name_, AccessContr const String prefix_str = (prefix.empty() ? "" : prefix + "."); if (config.has(prefix_str + "roles_filter")) - roles_filter.emplace(config.getString(prefix_str + "roles_filter")); + { + const String filter_pattern = config.getString(prefix_str + "roles_filter"); + roles_filter.emplace(filter_pattern); + + /// Fail closed on invalid regex. RE2 does not throw on bad patterns -- it + /// constructs an object with ok()==false and silently fails every match. + /// Reject the configuration up front so the + /// storage cannot be instantiated in a permissive state. + if (!roles_filter->ok()) + { + const String error = roles_filter->error(); + roles_filter.reset(); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Invalid 'roles_filter' regex for Token user directory '{}': {}. " + "Refusing to start with a misconfigured filter to avoid granting " + "all token groups as roles.", + storage_name_, error); + } + } if (config.has(prefix_str + "roles_transform")) { String transform = config.getString(prefix_str + "roles_transform"); ParsedTransform parsed = parseSedTransform(transform); - roles_transform_pattern = parsed.pattern; + + /// Compile and validate the regex up front. If we deferred compilation + /// to runtime (the previous behavior), an invalid regex would silently + /// return the input unchanged on every call -- meaning every role name + /// from the IdP would flow into role-mapping ungroomed, defeating the + /// purpose of `roles_transform`. Fail loudly at construction so the + /// misconfiguration is visible at startup. + if (!parsed.pattern.empty()) + { + roles_transform_pattern.emplace(parsed.pattern); + if (!roles_transform_pattern->ok()) + { + const String error = roles_transform_pattern->error(); + roles_transform_pattern.reset(); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Invalid 'roles_transform' regex for Token user directory '{}': {}. " + "Refusing to start with a misconfigured transform to avoid admitting " + "ungroomed role names from the IdP.", + storage_name_, error); + } + } roles_transform_replacement = parsed.replacement; roles_transform_global = parsed.global; } @@ -165,6 +197,35 @@ TokenAccessStorage::TokenAccessStorage(const String & storage_name_, AccessContr if (config.has(prefix_str + "default_profile")) default_profile_name = config.getString(prefix_str + "default_profile"); + /// Optional IP allowlist for auto-provisioned users. Mirrors the + /// `users.xml` `` shape: `SUBNET` / + /// `NAME` / `REGEX` children. + /// Without this, every auto-created token user defaults to `AnyHost` and + /// admins have no way to restrict token-auth by network through standard + /// access-control config. + const auto networks_config_path = prefix_str + "networks"; + if (config.has(networks_config_path)) + { + AllowedClientHosts hosts; + Poco::Util::AbstractConfiguration::Keys network_keys; + config.keys(networks_config_path, network_keys); + for (const String & key : network_keys) + { + const String value = config.getString(networks_config_path + "." + key); + if (key.starts_with("ip")) + hosts.addSubnet(value); + else if (key.starts_with("host_regexp")) + hosts.addNameRegexp(value); + else if (key.starts_with("host")) + hosts.addName(value); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Token user directory '{}': unknown entry '{}'; expected 'ip', 'host', or 'host_regexp'.", + storage_name_, key); + } + auto_user_allowed_hosts = std::move(hosts); + } + user_external_roles.clear(); users_per_roles.clear(); roles_per_users.clear(); @@ -466,28 +527,6 @@ void TokenAccessStorage::assignProfileNoLock(User & user) const } } -void TokenAccessStorage::updateAssignedRolesNoLock(const UUID & id, const String & user_name, const std::set & external_roles) const -{ - // Map and grant the roles from scratch only if the list of external role has changed. - const auto it = user_external_roles.find(user_name); - if (it != user_external_roles.end() && it->second == external_roles) - return; - - auto update_func = [this, &external_roles] (const AccessEntityPtr & entity_, const UUID &) -> AccessEntityPtr - { - if (auto user = typeid_cast>(entity_)) - { - auto changed_user = typeid_cast>(user->clone()); - assignRolesNoLock(*changed_user, external_roles); - return changed_user; - } - return entity_; - }; - - memory_storage.update(id, update_func); -} - - std::optional TokenAccessStorage::authenticateImpl( const Credentials & credentials, const Poco::Net::IPAddress & address, @@ -498,10 +537,28 @@ std::optional TokenAccessStorage::authenticateImpl( bool /* allow_plaintext_password */) const { std::lock_guard lock(mutex); + + /// Reject mismatched credential types BEFORE the typeid_cast that would + /// throw a `LOGICAL_ERROR`. The reference-form `typeid_cast` is fatal on + /// mismatch, and `MultipleAccessStorage::authenticateImpl` does not catch + /// per-storage exceptions -- so a single Basic / SSL-cert / Kerberos / SSH + /// login attempt would propagate that exception out of the chain and abort + /// authentication for every later storage in `user_directories`. Concretely, + /// listing `` ahead of `` would lock out every Basic-auth + /// user. Return nullopt cleanly, matching the LDAP-side idiom in + /// `LDAPAccessStorage::areLDAPCredentialsValidNoLock`. + const auto * token_credentials_ptr = dynamic_cast(&credentials); + if (!token_credentials_ptr) + { + if (throw_if_user_not_exists) + throwNotFound(AccessEntityType::USER, credentials.getUserName(), getStorageName()); + return {}; + } + auto id = memory_storage.find(credentials.getUserName()); UserPtr user = id ? memory_storage.read(*id) : nullptr; - const auto & token_credentials = typeid_cast(credentials); + const auto & token_credentials = *token_credentials_ptr; if (!external_authenticators.checkTokenCredentials(token_credentials, provider_name)) { @@ -519,6 +576,17 @@ std::optional TokenAccessStorage::authenticateImpl( new_user = std::make_shared(); new_user->setName(credentials.getUserName()); new_user->authentication_methods.emplace_back(AuthenticationType::JWT); + /// Stamp the storage's pinned processor onto the auth method so the + /// per-request validity check (`Session::checkIfUserIsStillValid`) + /// can detect when an admin removes that processor and terminate + /// active sessions whose tokens were issued through it (M-28). + new_user->authentication_methods.back().setTokenProcessorName(provider_name); + /// If the operator configured a network allowlist for this storage, + /// stamp it onto the auto-created user so `isAddressAllowed` checks it + /// below. Without this, every auto-provisioned token user inherits + /// `AnyHostTag` and there is no way to restrict token auth by network. + if (auto_user_allowed_hosts.has_value()) + new_user->allowed_client_hosts = *auto_user_allowed_hosts; user = new_user; } @@ -526,20 +594,35 @@ std::optional TokenAccessStorage::authenticateImpl( throwAddressNotAllowed(address); std::set external_roles; - if (roles_filter.has_value() && roles_filter.value().ok()) + if (roles_filter.has_value()) { - LOG_TRACE(getLogger(), "{}: External role filter found, applying only matching groups", getStorageName()); - for (const auto & group: token_credentials.getGroups()) { - if (RE2::FullMatch(group, roles_filter.value())) - { - String transformed_group = group; - if (roles_transform_pattern.has_value() && roles_transform_replacement.has_value()) + /// Defensive: a broken regex must NEVER cause a fall-through to the + /// permissive "grant all groups" branch. Parse-time validation in the + /// constructor already rejects invalid patterns; this guard ensures the + /// invariant still holds if any future code path constructs the filter + /// without the parse-time check (e.g. config reload). + if (!roles_filter->ok()) + { + LOG_ERROR(getLogger(), + "{}: Configured 'roles_filter' is invalid ('{}'); refusing to map any " + "external roles for user '{}' to avoid granting all token groups.", + getStorageName(), roles_filter->error(), credentials.getUserName()); + } + else + { + LOG_TRACE(getLogger(), "{}: External role filter found, applying only matching groups", getStorageName()); + for (const auto & group: token_credentials.getGroups()) { + if (RE2::FullMatch(group, roles_filter.value())) { - transformed_group = applyTransform(group, roles_transform_pattern.value(), roles_transform_replacement.value(), roles_transform_global); - LOG_TRACE(getLogger(), "{}: Transformed group '{}' to '{}'", getStorageName(), group, transformed_group); + String transformed_group = group; + if (roles_transform_pattern.has_value() && roles_transform_replacement.has_value()) + { + transformed_group = applyTransform(group, roles_transform_pattern.value(), roles_transform_replacement.value(), roles_transform_global); + LOG_TRACE(getLogger(), "{}: Transformed group '{}' to '{}'", getStorageName(), group, transformed_group); + } + external_roles.insert(transformed_group); + LOG_TRACE(getLogger(), "{}: Granted role (group) {} to user", getStorageName(), transformed_group); } - external_roles.insert(transformed_group); - LOG_TRACE(getLogger(), "{}: Granted role (group) {} to user", getStorageName(), transformed_group); } } } @@ -566,22 +649,75 @@ std::optional TokenAccessStorage::authenticateImpl( } else { - // Just in case external_roles are changed. - updateAssignedRolesNoLock(*id, user->getName(), external_roles); + /// Apply role-set and profile changes atomically under a single + /// `memory_storage.update`. Splitting them into two separate updates + /// (the prior shape) opened a reader-observable window between + /// "new roles, old profile" and "new roles, new profile" -- a query + /// from another thread that read the user via `AccessControl::read` + /// would observe a mid-state, since `MemoryAccessStorage`'s lock is + /// independent of `TokenAccessStorage::mutex` (M-31). + /// + /// Preserve the existing early-return optimization: skip the update + /// when external_roles haven't changed AND the profile is already + /// assigned. The `assignRolesNoLock` cleanup still has to run if + /// the role set changes, so it lives inside the update lambda. + const bool roles_changed = [&] + { + const auto it = user_external_roles.find(user->getName()); + return it == user_external_roles.end() || it->second != external_roles; + }(); - // Also update profile if needed - memory_storage.update(*id, [this] (const AccessEntityPtr & entity_, const UUID &) -> AccessEntityPtr + if (roles_changed) { - if (auto user_entity = typeid_cast>(entity_)) + memory_storage.update(*id, [this, &external_roles] (const AccessEntityPtr & entity_, const UUID &) -> AccessEntityPtr { - auto changed_user = typeid_cast>(user_entity->clone()); - assignProfileNoLock(*changed_user); - return changed_user; - } - return entity_; - }); + if (auto user_entity = typeid_cast>(entity_)) + { + auto changed_user = typeid_cast>(user_entity->clone()); + assignRolesNoLock(*changed_user, external_roles); + assignProfileNoLock(*changed_user); + return changed_user; + } + return entity_; + }); + } + else + { + /// Roles are stable; just refresh the profile in case it was + /// added/changed in config since the last auth. + memory_storage.update(*id, [this] (const AccessEntityPtr & entity_, const UUID &) -> AccessEntityPtr + { + if (auto user_entity = typeid_cast>(entity_)) + { + auto changed_user = typeid_cast>(user_entity->clone()); + assignProfileNoLock(*changed_user); + return changed_user; + } + return entity_; + }); + } } + /// Flush queued user-entity events from this storage's `memory_storage` so + /// subscribers observe the freshly-resolved roles and profile right away. + /// + /// `memory_storage.insert` / `update` only enqueue `onEntityAdded` / + /// `onEntityUpdated` on the shared `AccessChangesNotifier`; without an + /// explicit `sendNotifications` they sit on the queue until some unrelated + /// access mutation (a SQL DDL on access entities, a config reload, a + /// replicated-storage sync) happens to trigger a drain. During that window + /// any existing `ContextAccess` bound to this user UUID keeps serving its + /// previously-cached authorization state -- a freshly-revoked role appears + /// "still granted" until the next unrelated trigger. + /// + /// Note: `applyRoleChangeNoLock` (the storage's other mutation site) does + /// NOT need an explicit flush -- it only runs inside `processRoleChange`, + /// which is itself dispatched from a `sendNotifications` drain; the events + /// it queues are picked up by the very loop that called it. Only + /// `authenticateImpl` runs outside of any drain and so is the one site + /// that has to flush explicitly. + access_control.getChangesNotifier().sendNotifications(); + if (id) return AuthResult{ .user_id = *id, .authentication_data = AuthenticationData(AuthenticationType::JWT), .user_name = credentials.getUserName() }; return std::nullopt; diff --git a/src/Access/TokenAccessStorage.h b/src/Access/TokenAccessStorage.h index aedf8843f2b9..9f15319a0d82 100644 --- a/src/Access/TokenAccessStorage.h +++ b/src/Access/TokenAccessStorage.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -49,12 +50,25 @@ class TokenAccessStorage : public IAccessStorage String provider_name; std::optional roles_filter = std::nullopt; - std::optional roles_transform_pattern = std::nullopt; + /// `roles_transform` regex compiled once at construction. Storing the + /// compiled `re2::RE2` (instead of the pattern string) avoids per-call + /// recompilation and -- more importantly -- makes parse-time validation + /// possible: an invalid regex now fails the storage construction loudly + /// rather than silently no-op'ing every transform at runtime (which would + /// admit ungroomed role names; symmetric with the `roles_filter` fail- + /// closed handling). + std::optional roles_transform_pattern = std::nullopt; std::optional roles_transform_replacement = std::nullopt; bool roles_transform_global = false; std::set common_role_names; // role name that should be granted to all users at all times String default_profile_name; // settings profile name that should be assigned to all users + /// Optional IP allowlist applied to auto-provisioned users at creation + /// time. When unset, auto-created users inherit the default `AnyHostTag` + /// (current behavior, no breakage). When set, only clients whose source + /// address matches this allowlist can authenticate as a token-auto-created + /// user, regardless of the IdP's verdict on the token. + std::optional auto_user_allowed_hosts; mutable std::map> user_external_roles; mutable std::map> users_per_roles; // role name -> user names (...it should be granted to; may but don't have to exist for common roles) mutable std::map> roles_per_users; // user name -> role names (...that should be granted to it; may but don't have to include common roles) @@ -70,7 +84,6 @@ class TokenAccessStorage : public IAccessStorage void applyRoleChangeNoLock(bool grant, const UUID & role_id, const String & role_name); void assignRolesNoLock(User & user, const std::set & external_roles) const; void assignProfileNoLock(User & user) const; - void updateAssignedRolesNoLock(const UUID & id, const String & user_name, const std::set & external_roles) const; protected: std::optional findImpl(AccessEntityType type, const String & name) const override; diff --git a/src/Access/TokenProcessors.h b/src/Access/TokenProcessors.h index f33f0300662d..7b9b4cdc2492 100644 --- a/src/Access/TokenProcessors.h +++ b/src/Access/TokenProcessors.h @@ -12,6 +12,8 @@ namespace DB { +class RemoteHostFilter; + namespace ErrorCodes { extern const int NOT_IMPLEMENTED; @@ -81,6 +83,12 @@ struct StaticKeyJwtParams /// JWT claims to validate (optional) String claims; + + /// Clock-drift tolerance for `exp`/`nbf`/`iat` checks, in seconds. + /// jwt-cpp's default is 0, which rejects tokens on any client/server skew. + /// 60 seconds matches the OpenID processor's default and standard + /// industry practice (RFC 7519 §4.1.4 hints at "small leeway"). + UInt64 verifier_leeway = 60; }; class StaticKeyJwtProcessor : public ITokenProcessor @@ -92,6 +100,7 @@ class StaticKeyJwtProcessor : public ITokenProcessor const String & groups_claim_, const String & expected_issuer_, const String & expected_audience_, + const String & expected_typ_, bool allow_no_expiration_, const StaticKeyJwtParams & params); @@ -103,6 +112,8 @@ class StaticKeyJwtProcessor : public ITokenProcessor const String claims; const String expected_issuer; const String expected_audience; + /// Required JWT `typ` header (RFC 8725 §3.11). Empty = no enforcement. + const String expected_typ; const bool allow_no_expiration; jwt::verifier verifier = jwt::verify(); }; @@ -117,13 +128,11 @@ class JwksJwtProcessor : public ITokenProcessor const String & groups_claim_, const String & expected_issuer_, const String & expected_audience_, + const String & expected_typ_, bool allow_no_expiration_, const String & claims_, size_t verifier_leeway_, - std::shared_ptr provider_) - : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), - claims(claims_), expected_issuer(expected_issuer_), expected_audience(expected_audience_), - allow_no_expiration(allow_no_expiration_), provider(provider_), verifier_leeway(verifier_leeway_) {} + std::shared_ptr provider_); explicit JwksJwtProcessor(const String & processor_name_, UInt64 token_cache_lifetime_, @@ -131,6 +140,7 @@ class JwksJwtProcessor : public ITokenProcessor const String & groups_claim_, const String & expected_issuer_, const String & expected_audience_, + const String & expected_typ_, bool allow_no_expiration_, const String & claims_, size_t verifier_leeway_, @@ -142,6 +152,7 @@ class JwksJwtProcessor : public ITokenProcessor groups_claim_, expected_issuer_, expected_audience_, + expected_typ_, allow_no_expiration_, claims_, verifier_leeway_, @@ -155,8 +166,13 @@ class JwksJwtProcessor : public ITokenProcessor const String claims; const String expected_issuer; const String expected_audience; + /// Required JWT `typ` header (RFC 8725 §3.11). Empty = no enforcement. + const String expected_typ; const bool allow_no_expiration; - mutable jwt::verifier verifier = jwt::verify(); + /// Verifier is built fresh per call inside `resolveAndValidate` (it depends + /// on the current JWT's `kid` -> JWKS-resolved key, which can rotate). A + /// local-per-call verifier also makes the function thread-safe so callers + /// can invoke it without holding the global `ExternalAuthenticators::mutex`. std::shared_ptr provider; const size_t verifier_leeway; }; @@ -169,10 +185,13 @@ class GoogleTokenProcessor : public ITokenProcessor GoogleTokenProcessor(const String & processor_name_, UInt64 token_cache_lifetime_, const String & username_claim_, - const String & groups_claim_) - : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) {} + const String & groups_claim_, + const String & expected_audience_); bool resolveAndValidate(TokenCredentials & credentials) const override; + +private: + const String expected_audience; }; class AzureTokenProcessor : public ITokenProcessor @@ -181,10 +200,13 @@ class AzureTokenProcessor : public ITokenProcessor AzureTokenProcessor(const String & processor_name_, UInt64 token_cache_lifetime_, const String & username_claim_, - const String & groups_claim_) - : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) {} + const String & groups_claim_, + const String & expected_audience_); bool resolveAndValidate(TokenCredentials & credentials) const override; + +private: + const String expected_audience; }; class OpenIdTokenProcessor : public ITokenProcessor @@ -214,7 +236,8 @@ class OpenIdTokenProcessor : public ITokenProcessor bool allow_no_expiration_, const String & openid_config_endpoint_, UInt64 verifier_leeway_, - UInt64 jwks_cache_lifetime_); + UInt64 jwks_cache_lifetime_, + const RemoteHostFilter & remote_host_filter_); bool resolveAndValidate(TokenCredentials & credentials) const override; private: diff --git a/src/Access/TokenProcessorsJWT.cpp b/src/Access/TokenProcessorsJWT.cpp index 182556f24b6a..21c3a641aeb6 100644 --- a/src/Access/TokenProcessorsJWT.cpp +++ b/src/Access/TokenProcessorsJWT.cpp @@ -3,6 +3,7 @@ #if USE_JWT_CPP #include #include +#include #include #include #include @@ -11,6 +12,11 @@ #include #include +/// Ensure picojson's parse-depth ceiling stays in line with H-28's claims-recursion bound. +/// If a future picojson bump removes or raises this, we'd silently re-expose stack-exhaustion. +static_assert(picojson::DEFAULT_MAX_DEPTHS <= 100, + "picojson::DEFAULT_MAX_DEPTHS bumped above 100; revisit JWT parse-depth safety"); + namespace DB { namespace ErrorCodes @@ -22,8 +28,18 @@ namespace ErrorCodes namespace { -bool check_claims(const picojson::value & claims, const picojson::value & payload, const String & path); -bool check_claims(const picojson::value::object & claims, const picojson::value::object & payload, const String & path) +/// Depth budget for `check_claims` recursion. +/// +/// `picojson::DEFAULT_MAX_DEPTHS = 100` rejects deeply-nested JSON at parse +/// time, which already prevents the stack-exhaustion variant. We carry a +/// smaller budget here as defense in depth: if a future contrib bump or +/// PICOJSON_USE_RVALUE change widens the parse-time limit, this bound still +/// caps recursion. 32 is well above any realistic operator-configured claim +/// shape but keeps the worst-case stack frame count modest. +constexpr int kMaxClaimsRecursionDepth = 32; + +bool check_claims(const picojson::value & claims, const picojson::value & payload, const String & path, int depth_remaining); +bool check_claims(const picojson::value::object & claims, const picojson::value::object & payload, const String & path, int depth_remaining) { for (const auto & it : claims) { @@ -33,7 +49,7 @@ bool check_claims(const picojson::value::object & claims, const picojson::value: LOG_TRACE(getLogger("TokenAuthentication"), "Key '{}.{}' not found in JWT payload", path, it.first); return false; } - if (!check_claims(it.second, payload_it->second, path + "." + it.first)) + if (!check_claims(it.second, payload_it->second, path + "." + it.first, depth_remaining)) { return false; } @@ -41,7 +57,7 @@ bool check_claims(const picojson::value::object & claims, const picojson::value: return true; } -bool check_claims(const picojson::value::array & claims, const picojson::value::array & payload, const String & path) +bool check_claims(const picojson::value::array & claims, const picojson::value::array & payload, const String & path, int depth_remaining) { if (claims.size() > payload.size()) { @@ -54,9 +70,18 @@ bool check_claims(const picojson::value::array & claims, const picojson::value:: const auto & claims_val = claims.at(claims_i); for (const auto & payload_val : payload) { - if (!check_claims(claims_val, payload_val, path + "[" + std::to_string(claims_i) + "]")) - continue; - found = true; + /// Break on the first match. Without this, the inner loop kept + /// scanning the rest of the payload even after finding a match, + /// turning the worst case into O(|claims_array| * |payload_array|) + /// even when matches are easy. Combined with `kMaxClaimsRecursionDepth`, + /// this caps CPU per `check_claims` call so a crafted token cannot + /// stall the global `ExternalAuthenticators::mutex` (H-19) for an + /// unbounded time. + if (check_claims(claims_val, payload_val, path + "[" + std::to_string(claims_i) + "]", depth_remaining)) + { + found = true; + break; + } } if (!found) { @@ -67,8 +92,18 @@ bool check_claims(const picojson::value::array & claims, const picojson::value:: return true; } -bool check_claims(const picojson::value & claims, const picojson::value & payload, const String & path) +bool check_claims(const picojson::value & claims, const picojson::value & payload, const String & path, int depth_remaining) { + if (depth_remaining <= 0) + { + LOG_ERROR(getLogger("TokenAuthentication"), + "JWT claims comparison exceeded the maximum recursion depth ({}) at '{}'; " + "rejecting to bound CPU under the auth mutex.", + kMaxClaimsRecursionDepth, path); + return false; + } + --depth_remaining; + if (claims.is()) { if (!payload.is()) @@ -76,7 +111,7 @@ bool check_claims(const picojson::value & claims, const picojson::value & payloa LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'array' in claims '{}'", path); return false; } - return check_claims(claims.get(), payload.get(), path); + return check_claims(claims.get(), payload.get(), path, depth_remaining); } if (claims.is()) { @@ -85,7 +120,7 @@ bool check_claims(const picojson::value & claims, const picojson::value & payloa LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'object' in claims '{}'", path); return false; } - return check_claims(claims.get(), payload.get(), path); + return check_claims(claims.get(), payload.get(), path, depth_remaining); } if (claims.is()) { @@ -159,7 +194,7 @@ bool check_claims(const String & claims, const picojson::value::object & payload throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Bad JWT claims: {}", errors); if (!json.is()) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Bad JWT claims: is not an object"); - return check_claims(json.get(), payload, ""); + return check_claims(json.get(), payload, "", kMaxClaimsRecursionDepth); } std::string create_public_key_from_ec_components(const std::string & x, const std::string & y, int curve_nid) @@ -251,18 +286,90 @@ std::set parseGroupsFromJsonArray(picojson::array groups_array) } } +namespace +{ +/// Warn at construction time when a JWT processor is left without an +/// `expected_audience` (and/or `expected_issuer`) pin. Without `aud`, +/// the same token is replayable on any other deployment that trusts +/// the same IdP -- a token minted for cluster X authenticates on +/// cluster Y as well, because nothing ties a JWT to "this specific +/// relying party". Same shape as the Google/Azure warnings (H-10); +/// the warning is the only signal operators get since the verifier +/// just silently skips the check when the pin is empty. +void warnIfBindingsNotPinned(const String & processor_name, + const String & expected_issuer, + const String & expected_audience, + const String & expected_typ) +{ + if (expected_audience.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_audience' is not configured. Tokens issued by the same IdP for " + "any other relying party will be accepted here, including tokens minted for a " + "different ClickHouse deployment. Set 'expected_audience' to this deployment's " + "audience to prevent cross-cluster replay.", + processor_name); + if (expected_issuer.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_issuer' is not configured. The JWT 'iss' claim will not be enforced; " + "any token signed by a key in this processor's JWKS will be accepted regardless of " + "issuer. Set 'expected_issuer' to bind tokens to a specific IdP.", + processor_name); + if (expected_typ.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_typ' is not configured. The JWT 'typ' header will not be enforced; " + "ID tokens / refresh JWTs / internal-profile JWTs from the same IdP can be presented " + "as access tokens. RFC 8725 §3.11 / RFC 9068 recommend setting 'expected_typ' " + "(commonly 'at+jwt' for OAuth 2.0 access tokens) to prevent cross-token-class substitution.", + processor_name); +} + +/// Verify the JWT header `typ` matches the operator-configured pin. +/// Returns false (with a TRACE log) on mismatch; true if no pin or match. +/// Comparison is case-insensitive per RFC 7519 §5.1 ("JWT" and "jwt" both valid). +bool checkJwtTyp(const String & processor_name, + const String & expected_typ, + const jwt::decoded_jwt & decoded) +{ + if (expected_typ.empty()) + return true; + + if (!decoded.has_type()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Token has no 'typ' header but 'expected_typ' is configured to '{}'; rejecting.", + processor_name, expected_typ); + return false; + } + + const String actual_typ = decoded.get_type(); + if (Poco::toLower(actual_typ) != Poco::toLower(expected_typ)) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Token 'typ' header '{}' does not match 'expected_typ' '{}'; rejecting.", + processor_name, actual_typ, expected_typ); + return false; + } + + return true; +} +} + StaticKeyJwtProcessor::StaticKeyJwtProcessor(const String & processor_name_, UInt64 token_cache_lifetime_, const String & username_claim_, const String & groups_claim_, const String & expected_issuer_, const String & expected_audience_, + const String & expected_typ_, bool allow_no_expiration_, const StaticKeyJwtParams & params) : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), claims(params.claims), expected_issuer(expected_issuer_), expected_audience(expected_audience_), + expected_typ(expected_typ_), allow_no_expiration(allow_no_expiration_) { + warnIfBindingsNotPinned(processor_name, expected_issuer, expected_audience, expected_typ); + const String & algo = params.algo; const String & static_key = params.static_key; bool static_key_in_base64 = params.static_key_in_base64; @@ -340,6 +447,12 @@ StaticKeyJwtProcessor::StaticKeyJwtProcessor(const String & processor_name_, else throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "{}: Invalid token processor definition, unknown algorithm {}", processor_name, algo); + /// Apply clock-drift tolerance. jwt-cpp's default is 0, which rejects + /// tokens whose `exp`/`nbf` straddles even sub-second client/server skew. + /// Operators who set `verifier_leeway` in config get that value; + /// otherwise the parser-side default (60s) kicks in. + verifier = verifier.leeway(params.verifier_leeway); + if (!expected_issuer.empty()) verifier = verifier.with_issuer(expected_issuer); @@ -377,8 +490,16 @@ bool StaticKeyJwtProcessor::resolveAndValidate(TokenCredentials & credentials) c try { auto decoded_jwt = jwt::decode(credentials.getToken()); + + /// RFC 7515 §4.1.11: an unrecognized `crit` extension MUST cause rejection. + if (decoded_jwt.has_header_claim("crit")) + return false; + verifier.verify(decoded_jwt); + if (!checkJwtTyp(processor_name, expected_typ, decoded_jwt)) + return false; + if (!allow_no_expiration && !decoded_jwt.has_expires_at()) { LOG_TRACE(getLogger("TokenAuthentication"), "{}: Token missing 'exp' claim, rejecting", processor_name); @@ -394,7 +515,18 @@ bool StaticKeyJwtProcessor::resolveAndValidate(TokenCredentials & credentials) c return false; } - credentials.setUserName(decoded_jwt.get_payload_claim(username_claim).as_string()); + /// Reject empty `username_claim` value (M-27): a present-but-empty + /// claim would set user_name="" with `is_ready=false`, which the + /// cache would happily accept and collapse every empty-username + /// token into one dynamic user. + const auto user_name = decoded_jwt.get_payload_claim(username_claim).as_string(); + if (user_name.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Resolved username from claim '{}' is empty; rejecting", processor_name, username_claim); + return false; + } + credentials.setUserName(user_name); if (decoded_jwt.has_payload_claim(groups_claim)) credentials.setGroups(parseGroupsFromJsonArray(decoded_jwt.get_payload_claim(groups_claim).as_array())); @@ -410,156 +542,246 @@ bool StaticKeyJwtProcessor::resolveAndValidate(TokenCredentials & credentials) c } } -bool JwksJwtProcessor::resolveAndValidate(TokenCredentials & credentials) const +JwksJwtProcessor::JwksJwtProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + const String & expected_typ_, + bool allow_no_expiration_, + const String & claims_, + size_t verifier_leeway_, + std::shared_ptr provider_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), + claims(claims_), expected_issuer(expected_issuer_), expected_audience(expected_audience_), + expected_typ(expected_typ_), + allow_no_expiration(allow_no_expiration_), provider(provider_), verifier_leeway(verifier_leeway_) { - auto decoded_jwt = jwt::decode(credentials.getToken()); - - if (!allow_no_expiration && !decoded_jwt.has_expires_at()) - { - LOG_TRACE(getLogger("TokenAuthentication"), "{}: Token missing 'exp' claim, rejecting", processor_name); - return false; - } + warnIfBindingsNotPinned(processor_name, expected_issuer, expected_audience, expected_typ); +} - if (!decoded_jwt.has_payload_claim(username_claim)) +bool JwksJwtProcessor::resolveAndValidate(TokenCredentials & credentials) const +{ + /// Whole-body try/catch mirrors `StaticKeyJwtProcessor::resolveAndValidate`. + /// + /// In the auto-discovery path inside `ExternalAuthenticators::checkTokenCredentials`, + /// processors are tried in turn and any exception out of one aborts the entire + /// loop -- later processors are never consulted. That is fine for "the token is + /// definitively bad", but the failures in this body are also raised when the + /// token simply belongs to a different processor (e.g. its `kid` is not in + /// THIS processor's JWKS, or its `alg` is one this JWKS does not know about, + /// or the JWK lacks the components this code path needs). In a multi-processor + /// deployment, raising in those cases denies a perfectly good token just + /// because a sibling processor happened to be iterated first. Convert every + /// such failure into a `false` return so the iterator can move on -- consistent + /// with how `StaticKeyJwtProcessor` already handles its own validation errors. + try { - LOG_ERROR(getLogger("TokenAuthentication"), "{}: Specified username_claim not found in token", processor_name); - return false; - } + auto decoded_jwt = jwt::decode(credentials.getToken()); - if (!decoded_jwt.has_key_id()) - { - LOG_ERROR(getLogger("TokenAuthentication"), "{}: 'kid' (key ID) claim not found in token", processor_name); - return false; - } + /// RFC 7515 §4.1.11: an unrecognized `crit` extension MUST cause rejection. + if (decoded_jwt.has_header_claim("crit")) + return false; - if (!provider->getJWKS().has_jwk(decoded_jwt.get_key_id())) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWKS error: no JWK found for JWT"); + if (!checkJwtTyp(processor_name, expected_typ, decoded_jwt)) + return false; - auto jwk = provider->getJWKS().get_jwk(decoded_jwt.get_key_id()); - auto username = decoded_jwt.get_payload_claim(username_claim).as_string(); + if (!allow_no_expiration && !decoded_jwt.has_expires_at()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Token missing 'exp' claim, rejecting", processor_name); + return false; + } - if (!decoded_jwt.has_algorithm()) - { - LOG_ERROR(getLogger("TokenAuthentication"), "{}: Algorithm not specified in token", processor_name); - return false; - } - auto algo = Poco::toLower(decoded_jwt.get_algorithm()); + if (!decoded_jwt.has_payload_claim(username_claim)) + { + LOG_ERROR(getLogger("TokenAuthentication"), "{}: Specified username_claim not found in token", processor_name); + return false; + } + if (!decoded_jwt.has_key_id()) + { + LOG_ERROR(getLogger("TokenAuthentication"), "{}: 'kid' (key ID) claim not found in token", processor_name); + return false; + } - String public_key; + if (!provider->getJWKS().has_jwk(decoded_jwt.get_key_id())) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: No JWK matching token 'kid' {} in this processor's JWKS; rejecting (a sibling processor may still accept it).", + processor_name, quoteString(decoded_jwt.get_key_id())); + return false; + } - try - { - auto x5c = jwk.get_x5c_key_value(); + auto jwk = provider->getJWKS().get_jwk(decoded_jwt.get_key_id()); + auto username = decoded_jwt.get_payload_claim(username_claim).as_string(); - if (!x5c.empty()) + if (!decoded_jwt.has_algorithm()) { - LOG_TRACE(getLogger("TokenAuthentication"), "{}: Verifying {} with 'x5c' key", processor_name, username); - public_key = jwt::helper::convert_base64_der_to_pem(x5c); + LOG_ERROR(getLogger("TokenAuthentication"), "{}: Algorithm not specified in token", processor_name); + return false; } - } - catch (const jwt::error::claim_not_present_exception &) - { - LOG_TRACE(getLogger("TokenAuthentication"), "{}: x5c was not specified in JWK, will try RSA components", processor_name); - } - catch (const std::bad_cast &) - { - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: invalid claim value type found, claims must be strings"); - } + auto algo = Poco::toLower(decoded_jwt.get_algorithm()); - if (public_key.empty()) - { - const auto key_type = jwk.get_key_type(); - if (key_type == "EC") + + String public_key; + + try { - if (!(jwk.has_jwk_claim("x") && jwk.has_jwk_claim("y"))) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: invalid JWK: missing 'x'/'y' claims for EC key type", processor_name); + auto x5c = jwk.get_x5c_key_value(); - int curve_nid = NID_undef; - std::optional expected_crv; - if (algo == "es256") + if (!x5c.empty()) { - curve_nid = NID_X9_62_prime256v1; - expected_crv = "P-256"; + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Verifying {} with 'x5c' key", processor_name, quoteString(username)); + public_key = jwt::helper::convert_base64_der_to_pem(x5c); } - else if (algo == "es384") + } + catch (const jwt::error::claim_not_present_exception &) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: x5c was not specified in JWK, will try RSA components", processor_name); + } + + if (public_key.empty()) + { + const auto key_type = jwk.get_key_type(); + if (key_type == "EC") { - curve_nid = NID_secp384r1; - expected_crv = "P-384"; + if (!(jwk.has_jwk_claim("x") && jwk.has_jwk_claim("y"))) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: JWK for token 'kid' is missing 'x'/'y' for EC key type; rejecting.", processor_name); + return false; + } + + int curve_nid = NID_undef; + std::optional expected_crv; + if (algo == "es256") + { + curve_nid = NID_X9_62_prime256v1; + expected_crv = "P-256"; + } + else if (algo == "es384") + { + curve_nid = NID_secp384r1; + expected_crv = "P-384"; + } + else if (algo == "es512") + { + curve_nid = NID_secp521r1; + expected_crv = "P-521"; + } + + if (curve_nid == NID_undef) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Unknown algorithm {} for EC key; rejecting.", processor_name, quoteString(algo)); + return false; + } + + if (jwk.has_jwk_claim("crv")) + { + const auto crv = jwk.get_jwk_claim("crv").as_string(); + if (expected_crv.has_value() && crv != expected_crv.value()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: JWK 'crv' {} does not match JWT algorithm {}; rejecting.", + processor_name, quoteString(crv), quoteString(algo)); + return false; + } + } + + LOG_TRACE(getLogger("TokenAuthentication"), "{}: `x5c` not present, verifying {} with EC components", processor_name, quoteString(username)); + const auto x = jwk.get_jwk_claim("x").as_string(); + const auto y = jwk.get_jwk_claim("y").as_string(); + public_key = create_public_key_from_ec_components(x, y, curve_nid); } - else if (algo == "es512") + else if (key_type == "RSA") { - curve_nid = NID_secp521r1; - expected_crv = "P-521"; + if (!(jwk.has_jwk_claim("n") && jwk.has_jwk_claim("e"))) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: JWK is missing 'n'/'e' for RSA key type; rejecting.", processor_name); + return false; + } + LOG_TRACE(getLogger("TokenAuthentication"), "{}: `issuer` or `x5c` not present, verifying {} with RSA components", processor_name, quoteString(username)); + const auto modulus = jwk.get_jwk_claim("n").as_string(); + const auto exponent = jwk.get_jwk_claim("e").as_string(); + public_key = jwt::helper::create_public_key_from_rsa_components(modulus, exponent); } - - if (curve_nid == NID_undef) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: unknown algorithm {}", algo); - - if (jwk.has_jwk_claim("crv")) + else { - const auto crv = jwk.get_jwk_claim("crv").as_string(); - if (expected_crv.has_value() && crv != expected_crv.value()) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: `crv` in JWK does not match JWT algorithm"); + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Unsupported JWK key type {}; rejecting.", processor_name, quoteString(key_type)); + return false; } - - LOG_TRACE(getLogger("TokenAuthentication"), "{}: `x5c` not present, verifying {} with EC components", processor_name, username); - const auto x = jwk.get_jwk_claim("x").as_string(); - const auto y = jwk.get_jwk_claim("y").as_string(); - public_key = create_public_key_from_ec_components(x, y, curve_nid); } - else if (key_type == "RSA") + + if (jwk.has_algorithm() && Poco::toLower(jwk.get_algorithm()) != algo) { - if (!(jwk.has_jwk_claim("n") && jwk.has_jwk_claim("e"))) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: invalid JWK: missing 'n'/'e' claims for RSA key type", processor_name); - LOG_TRACE(getLogger("TokenAuthentication"), "{}: `issuer` or `x5c` not present, verifying {} with RSA components", processor_name, username); - const auto modulus = jwk.get_jwk_claim("n").as_string(); - const auto exponent = jwk.get_jwk_claim("e").as_string(); - public_key = jwt::helper::create_public_key_from_rsa_components(modulus, exponent); + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: JWK 'alg' does not match JWT algorithm {}; rejecting.", processor_name, quoteString(algo)); + return false; } - else - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: invalid JWK key type '{}'", processor_name, key_type); - } - if (jwk.has_algorithm() && Poco::toLower(jwk.get_algorithm()) != algo) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT validation error: `alg` in JWK does not match the algorithm used in JWT"); - - if (algo == "rs256") - verifier = verifier.allow_algorithm(jwt::algorithm::rs256(public_key, "", "", "")); - else if (algo == "rs384") - verifier = verifier.allow_algorithm(jwt::algorithm::rs384(public_key, "", "", "")); - else if (algo == "rs512") - verifier = verifier.allow_algorithm(jwt::algorithm::rs512(public_key, "", "", "")); - else if (algo == "es256") - verifier = verifier.allow_algorithm(jwt::algorithm::es256(public_key, "", "", "")); - else if (algo == "es384") - verifier = verifier.allow_algorithm(jwt::algorithm::es384(public_key, "", "", "")); - else if (algo == "es512") - verifier = verifier.allow_algorithm(jwt::algorithm::es512(public_key, "", "", "")); - else - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: unknown algorithm {}", algo); + /// Build the verifier locally (was a `mutable` member; making it local + /// makes `resolveAndValidate` thread-safe so the caller can drop the + /// global auth mutex around the expensive crypto verify). + auto verifier = jwt::verify(); + if (algo == "rs256") + verifier = verifier.allow_algorithm(jwt::algorithm::rs256(public_key, "", "", "")); + else if (algo == "rs384") + verifier = verifier.allow_algorithm(jwt::algorithm::rs384(public_key, "", "", "")); + else if (algo == "rs512") + verifier = verifier.allow_algorithm(jwt::algorithm::rs512(public_key, "", "", "")); + else if (algo == "es256") + verifier = verifier.allow_algorithm(jwt::algorithm::es256(public_key, "", "", "")); + else if (algo == "es384") + verifier = verifier.allow_algorithm(jwt::algorithm::es384(public_key, "", "", "")); + else if (algo == "es512") + verifier = verifier.allow_algorithm(jwt::algorithm::es512(public_key, "", "", "")); + else + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Unknown JWT algorithm {}; rejecting.", processor_name, quoteString(algo)); + return false; + } - verifier = verifier.leeway(verifier_leeway); + verifier = verifier.leeway(verifier_leeway); - if (!expected_issuer.empty()) - verifier = verifier.with_issuer(expected_issuer); + if (!expected_issuer.empty()) + verifier = verifier.with_issuer(expected_issuer); - if (!expected_audience.empty()) - verifier = verifier.with_audience(expected_audience); + if (!expected_audience.empty()) + verifier = verifier.with_audience(expected_audience); - verifier.verify(decoded_jwt); + verifier.verify(decoded_jwt); - if (!claims.empty() && !check_claims(claims, decoded_jwt.get_payload_json())) - return false; + if (!claims.empty() && !check_claims(claims, decoded_jwt.get_payload_json())) + return false; - credentials.setUserName(decoded_jwt.get_payload_claim(username_claim).as_string()); + /// Reject empty resolved username (M-27); see the + /// `StaticKeyJwtProcessor` peer for rationale. + const auto user_name = decoded_jwt.get_payload_claim(username_claim).as_string(); + if (user_name.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Resolved username from claim '{}' is empty; rejecting", processor_name, username_claim); + return false; + } + credentials.setUserName(user_name); - if (decoded_jwt.has_payload_claim(groups_claim)) - credentials.setGroups(parseGroupsFromJsonArray(decoded_jwt.get_payload_claim(groups_claim).as_array())); - else - LOG_TRACE(getLogger("TokenAuthentication"), "{}: Specified groups_claim {} not found in token, no external roles will be mapped", processor_name, groups_claim); + if (decoded_jwt.has_payload_claim(groups_claim)) + credentials.setGroups(parseGroupsFromJsonArray(decoded_jwt.get_payload_claim(groups_claim).as_array())); + else + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Specified groups_claim {} not found in token, no external roles will be mapped", processor_name, groups_claim); - return true; + return true; + } + catch (const std::exception & ex) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to validate JWT: {}", processor_name, ex.what()); + return false; + } } } diff --git a/src/Access/TokenProcessorsOpaque.cpp b/src/Access/TokenProcessorsOpaque.cpp index b6f50f677564..ecd93a0ae53f 100644 --- a/src/Access/TokenProcessorsOpaque.cpp +++ b/src/Access/TokenProcessorsOpaque.cpp @@ -1,7 +1,9 @@ #include "TokenProcessors.h" #if USE_JWT_CPP +#include #include +#include #include #include #include @@ -56,6 +58,26 @@ namespace return value.get(); } + /// Bound every IdP-bound HTTP call (OIDC discovery, userinfo, introspection) + /// to a known limit. Without this, Poco's default `HTTPSession` timeout of + /// 60 seconds applies, and because `ExternalAuthenticators::mutex` is held + /// for the entire duration of `checkTokenCredentials` -- including the + /// outbound call this function makes -- a single slow or hung IdP would + /// stall the whole auth subsystem (LDAP, Kerberos, HTTP basic, every other + /// token auth) for up to a full minute per request. + /// + /// 10 seconds is a deliberately conservative cap: well above any healthy + /// IdP latency, well below the default. Operators who need a different + /// value would have to expose this via per-processor config; for now it + /// is hard-coded so deployments inherit the bounded behavior automatically. + constexpr int kIdpHttpTimeoutSeconds = 10; + + void applyIdpSessionTimeouts(Poco::Net::HTTPClientSession & session) + { + const Poco::Timespan timeout(kIdpHttpTimeoutSeconds, 0); + session.setTimeout(timeout, timeout, timeout); + } + picojson::object getObjectFromURI(const Poco::URI & uri, const String & token = "") { Poco::Net::HTTPResponse response; @@ -67,12 +89,14 @@ namespace if (uri.getScheme() == "https") { Poco::Net::HTTPSClientSession session(uri.getHost(), uri.getPort()); + applyIdpSessionTimeouts(session); session.sendRequest(request); Poco::StreamCopier::copyStream(session.receiveResponse(response), responseString); } else { Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort()); + applyIdpSessionTimeouts(session); session.sendRequest(request); Poco::StreamCopier::copyStream(session.receiveResponse(response), responseString); } @@ -93,6 +117,28 @@ namespace } } +GoogleTokenProcessor::GoogleTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_audience_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) + , expected_audience(expected_audience_) +{ + /// Without an audience pin, this processor accepts any Google access token + /// that authenticates the user against Google -- including tokens minted for + /// completely unrelated OAuth clients (a classic confused-deputy scenario). + /// Operators who actually want token-based auth almost always want it bound + /// to their own client_id; surface this gap loudly at startup so it can't + /// stay silently un-enforced. + if (expected_audience.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_audience' is not configured for Google token processor. " + "Any valid Google access token (regardless of issuing client) will be accepted; " + "set 'expected_audience' to the OAuth client_id this processor should accept.", + processor_name); +} + bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) const { const String & token = credentials.getToken(); @@ -110,9 +156,41 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co String user_name = user_info[username_claim]; + auto token_info = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/tokeninfo"), token); + + /// Audience binding (H-10): the Google /tokeninfo endpoint authoritatively + /// reports the OAuth client_id the access token was issued for in its 'aud' + /// field. Without this check, a token minted for any other Google OAuth + /// client (the user's mobile app, a third-party tool) would authenticate + /// here too -- because Google /userinfo will happily honor any valid token. + /// Refusing tokens whose 'aud' does not match the configured client pin is + /// what makes the binding strict. + if (!expected_audience.empty()) + { + const auto aud = getValueByKey(token_info, "aud").value_or(""); + if (aud != expected_audience) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Google access token audience '{}' does not match configured 'expected_audience' '{}'; rejecting", + processor_name, aud, expected_audience); + return false; + } + } + + /// Reject empty resolved username (M-27). `TokenCredentials::setUserName` + /// leaves `is_ready=false` for empty input but the function would still + /// return true; the cache would then accept an entry under user_name "", + /// collapsing every empty-username token across all IdPs into the same + /// dynamic ClickHouse user. + if (user_name.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Resolved username from token is empty; rejecting", processor_name); + return false; + } + credentials.setUserName(user_name); - auto token_info = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/tokeninfo"), token); if (token_info.contains("exp")) credentials.setExpiresAt(std::chrono::system_clock::from_time_t((getValueByKey(token_info, "exp").value()))); @@ -144,20 +222,43 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co } auto group_data = group.get(); - String group_name = getValueByKey(group_data["groupKey"].get(), "id").value_or(""); + + /// Guard against a missing or non-object `groupKey`. Without + /// these checks `group_data["groupKey"].get()` + /// would auto-insert a null `picojson::value` (because picojson + /// objects are `std::map` and `[]` + /// default-constructs on a missing key) and then throw + /// `std::bad_cast` on the `.get()` call -- + /// which the `catch (const Exception &)` below does NOT + /// catch (`std::bad_cast` is `std::exception`-derived, not + /// `DB::Exception`-derived). The uncaught exception used to + /// propagate out of `resolveAndValidate` and abort auth. + auto group_key_it = group_data.find("groupKey"); + if (group_key_it == group_data.end() || !group_key_it->second.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Group entry without a 'groupKey' object; skipping", processor_name); + continue; + } + + String group_name = getValueByKey(group_key_it->second.get(), "id").value_or(""); if (!group_name.empty()) { external_groups_names.insert(group_name); LOG_TRACE(getLogger("TokenAuthentication"), - "{}: User {}: new external group {}", processor_name, user_name, group_name); + "{}: User {}: new external group {}", + processor_name, quoteString(user_name), quoteString(group_name)); } } credentials.setGroups(external_groups_names); } - catch (const Exception & e) + catch (const std::exception & e) { - /// Could not get groups info. Log it and skip it. + /// Defense in depth: catch `std::exception` (not just `DB::Exception`) + /// so picojson's `std::bad_cast` and `std::runtime_error` -- and any + /// other future deviation -- degrade to "no roles mapped" rather + /// than aborting the whole authentication. LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to get Google groups, no external roles will be mapped. reason: {}", processor_name, e.what()); return true; @@ -167,6 +268,27 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co return true; } +AzureTokenProcessor::AzureTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_audience_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) + , expected_audience(expected_audience_) +{ + /// Without an audience pin, this processor accepts any Azure AD access token + /// that Microsoft Graph happens to honor -- which includes tokens minted for + /// other applications inside the same tenant. Surface the gap so operators + /// can lock the processor to their own application's audience. + if (expected_audience.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_audience' is not configured for Azure token processor. " + "Any Azure access token Microsoft Graph accepts will authenticate here, " + "regardless of which application it was issued for; set 'expected_audience' " + "to the audience this processor should accept.", + processor_name); +} + bool AzureTokenProcessor::resolveAndValidate(TokenCredentials & credentials) const { /// Token is a JWT in this case, but we cannot directly verify it against Azure AD JWKS. @@ -177,22 +299,66 @@ bool AzureTokenProcessor::resolveAndValidate(TokenCredentials & credentials) con const String & token = credentials.getToken(); + String username; try { picojson::object user_info_json = getObjectFromURI(Poco::URI("https://graph.microsoft.com/oidc/userinfo"), token); - String username = getValueByKey(user_info_json, username_claim).value(); - - if (!username.empty()) - credentials.setUserName(username); - else - LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to get username with token", processor_name); - + username = getValueByKey(user_info_json, username_claim).value(); } catch (...) { return false; } + /// Audience binding (H-10): only after Microsoft Graph has accepted the + /// token (proving it is a real, signed Azure AD token) do we trust its + /// claims. We then enforce that the 'aud' claim matches the operator-pinned + /// audience -- without this check, *any* token issued for *any* application + /// in the tenant that has Graph access would authenticate. With the check, + /// tokens minted for other applications are rejected even though Graph + /// itself would honor them. + if (!expected_audience.empty()) + { + try + { + auto decoded_token = jwt::decode(token); + if (!decoded_token.has_audience()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Azure access token has no 'aud' claim; cannot enforce 'expected_audience' '{}'; rejecting", + processor_name, expected_audience); + return false; + } + const auto auds = decoded_token.get_audience(); + if (auds.find(expected_audience) == auds.end()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Azure access token audience does not contain configured 'expected_audience' '{}'; rejecting", + processor_name, expected_audience); + return false; + } + } + catch (const std::exception & e) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to decode Azure access token while enforcing 'expected_audience': {}; rejecting", + processor_name, e.what()); + return false; + } + } + + /// Reject empty resolved username (M-27). Previously this branch only + /// logged the gap and proceeded to return true at the end of the function, + /// which would cache an entry under user_name "" and collapse every + /// empty-username token across all IdPs into the same dynamic user. + if (username.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Resolved username from token is empty; rejecting", processor_name); + return false; + } + credentials.setUserName(username); + try { credentials.setExpiresAt(jwt::decode(token).get_expires_at()); @@ -229,20 +395,43 @@ bool AzureTokenProcessor::resolveAndValidate(TokenCredentials & credentials) con } auto group_data = group.get(); - if (!group_data.contains("displayName")) + + /// Use the immutable `id` (GUID), not the mutable `displayName`, + /// for role-mapping. `displayName` can be renamed by an Azure AD + /// admin -- and on rename, every ClickHouse role-mapping regex + /// that referenced the old name silently stops matching, while + /// every regex that matches the new name silently starts. Two + /// distinct AAD groups can also share a display name and merge + /// into a single ClickHouse group; deleting and recreating a + /// group with the same name silently inherits the old grants. + /// `id` is a GUID assigned by AAD at group creation; it never + /// changes, never collides, and is never reused. + /// + /// Operators upgrading from a build that emitted `displayName` + /// must update their `roles_filter` / `roles_transform` regex + /// to reference the GUIDs Azure AD assigns to the groups they + /// want to map. The role identifier is not human-friendly -- + /// that is the cost of using an immutable handle. + if (!group_data.contains("id")) continue; - String group_name = getValueByKey(group_data, "displayName").value_or(""); + String group_name = getValueByKey(group_data, "id").value_or(""); if (!group_name.empty()) { external_groups_names.insert(group_name); - LOG_TRACE(getLogger("TokenAuthentication"), "{}: User {}: new external group {}", processor_name, credentials.getUserName(), group_name); + String display_name = getValueByKey(group_data, "displayName").value_or(""); + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: User {}: new external group id={} (displayName={})", + processor_name, quoteString(credentials.getUserName()), + quoteString(group_name), quoteString(display_name)); } } } - catch (const Exception & e) + catch (const std::exception & e) { - /// Could not get groups info. Log it and skip it. + /// Defense in depth (M-10 sibling): broadened to `std::exception` so a + /// picojson `std::bad_cast` from a malformed response degrades to "no + /// roles mapped" rather than aborting the whole authentication. LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to get Azure groups, no external roles will be mapped. reason: {}", processor_name, e.what()); return true; @@ -267,15 +456,35 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), userinfo_endpoint(userinfo_endpoint_), token_introspection_endpoint(token_introspection_endpoint_) { + /// Without `jwks_uri`, no `jwt_validator` is created and so `expected_issuer` + /// / `expected_audience` cannot be enforced anywhere on the validation path + /// -- the runtime falls straight to the userinfo endpoint, which only + /// answers "the IdP describes this user", not "the token's `iss`/`aud` + /// match what this deployment pinned". Refuse to load with that combination + /// rather than silently dropping the operator's bindings. + if (jwks_uri_.empty() && (!expected_issuer_.empty() || !expected_audience_.empty())) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: 'expected_issuer' / 'expected_audience' are configured but no 'jwks_uri' is provided. " + "These bindings can only be enforced via local JWT validation against a JWKS; the userinfo " + "fallback alone cannot enforce them. Configure 'jwks_uri' (or, if you intentionally want " + "userinfo-only validation, clear 'expected_issuer'/'expected_audience').", + processor_name); + if (!jwks_uri_.empty()) { LOG_TRACE(getLogger("TokenAuthentication"), "{}: JWKS URI set, local JWT processing will be attempted", processor_name_); + /// `expected_typ` is left empty here: OpenID's JWT-fastpath inherits no + /// `typ` enforcement from the operator config (the parser doesn't surface + /// `expected_typ` for the `openid` processor type yet). Operators who + /// want strict `typ` enforcement should use `jwt_static_jwks` / + /// `jwt_dynamic_jwks` directly instead of `openid`. jwt_validator.emplace(processor_name_ + "jwks_val", token_cache_lifetime_, username_claim_, groups_claim_, expected_issuer_, expected_audience_, + /*expected_typ=*/"", allow_no_expiration_, "", verifier_leeway_, @@ -293,26 +502,159 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, bool allow_no_expiration_, const String & openid_config_endpoint_, UInt64 verifier_leeway_, - UInt64 jwks_cache_lifetime_) + UInt64 jwks_cache_lifetime_, + const RemoteHostFilter & remote_host_filter_) : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) { + /// Defense in depth: the discovery endpoint itself was already validated by + /// the parser, but re-check here in case this constructor is reached via a + /// future code path that bypasses parseTokenProcessor. + try + { + remote_host_filter_.checkURL(Poco::URI(openid_config_endpoint_)); + } + catch (const Exception & e) + { + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: 'configuration_endpoint' URL '{}' is not in : {}", + processor_name, openid_config_endpoint_, e.message()); + } + const picojson::object openid_config = getObjectFromURI(Poco::URI(openid_config_endpoint_)); - if (!openid_config.contains("userinfo_endpoint") || !openid_config.contains("introspection_endpoint")) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: Cannot extract userinfo_endpoint or introspection_endpoint from OIDC configuration, consider manual configuration.", processor_name); + /// Only `userinfo_endpoint` is mandatory: it backs the runtime userinfo + /// fallback (and is the sole user-info source when no JWKS is configured). + /// `introspection_endpoint` is currently unused at runtime -- it's plumbed + /// for a future RFC 7662 introspection feature -- so a discovery document + /// that omits it should not block processor construction. + if (!openid_config.contains("userinfo_endpoint")) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "{}: Cannot extract userinfo_endpoint from OIDC configuration at '{}'; consider manual configuration.", + processor_name, openid_config_endpoint_); + + /// The discovery document is untrusted: even with the issuer-anchor check + /// below (H-08), a poisoned or misdirected response can still try to point + /// trust-chain endpoints (jwks_uri, userinfo_endpoint, introspection_endpoint) + /// at hosts the operator never approved. Refuse to load the processor when + /// any returned URL is outside ; this prevents the + /// server from reaching out to attacker-controlled endpoints during token + /// validation. + /// + /// Additionally, refuse non-HTTPS schemes on discovery-returned URLs. + /// Without this, an attacker who can MITM the discovery fetch (operator + /// typed an `http://` configuration_endpoint, or any TLS interception path) + /// can substitute a discovery doc whose `jwks_uri` is `http://169.254.169.254/...` + /// (cloud metadata), `http://127.0.0.1:...` (local admin ports), or + /// `http://kubernetes.default.svc:...` -- and the server issues a one-shot + /// HTTP GET under its own process identity. `` is + /// the primary defense, but not every deployment configures it; an + /// HTTPS-only rule on returned URLs is a cheap, orthogonal layer that + /// blocks all three of those targets independently. Operators who run an + /// IdP over plain HTTP intentionally can wire the trust chain manually + /// (`userinfo_endpoint`/`token_introspection_endpoint`/`jwks_uri` directly) + /// instead of relying on discovery. + auto require_allowed_discovery_url = [&](const std::string & url, const char * field) + { + Poco::URI parsed_uri(url); + if (parsed_uri.getScheme() != "https") + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery at '{}' returned non-HTTPS '{}' URL '{}' (scheme '{}'). " + "The trust-chain URLs from discovery must use HTTPS so a poisoned discovery " + "document cannot redirect token validation through internal endpoints " + "(cloud metadata, localhost, in-cluster service IPs). If the IdP genuinely " + "runs over plain HTTP, configure the trust chain manually instead of using " + "'configuration_endpoint'.", + processor_name, openid_config_endpoint_, field, url, parsed_uri.getScheme()); + + try + { + remote_host_filter_.checkURL(parsed_uri); + } + catch (const Exception & e) + { + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery at '{}' returned '{}' URL '{}' which is not in " + ": {}", + processor_name, openid_config_endpoint_, field, url, e.message()); + } + }; + + require_allowed_discovery_url(getValueByKey(openid_config, "userinfo_endpoint").value(), "userinfo_endpoint"); + if (openid_config.contains("introspection_endpoint")) + require_allowed_discovery_url(getValueByKey(openid_config, "introspection_endpoint").value(), "introspection_endpoint"); + if (openid_config.contains("jwks_uri")) + require_allowed_discovery_url(getValueByKey(openid_config, "jwks_uri").value(), "jwks_uri"); + + /// Anchor the discovery document to a known issuer when one is configured. + /// + /// OIDC Discovery 1.0 §4.3 / RFC 8414 §3.3 require the metadata's "issuer" + /// to be tied to the URL used to fetch it. Without this anchor a poisoned + /// or misdirected discovery response can redirect the entire trust chain + /// (jwks_uri, userinfo_endpoint, introspection_endpoint) to URLs the + /// operator never approved -- and because the embedded JWT verifier only + /// enforces the `iss` claim when expected_issuer is non-empty, JWTs signed + /// by the attacker's keys would be silently accepted at runtime. + /// + /// Policy: + /// - expected_issuer configured => discovery's "issuer" MUST match it + /// (refuse to construct on mismatch or + /// absence). Verifier is pinned to it. + /// - expected_issuer empty => log a warning so the gap is visible + /// in operator logs, then proceed with + /// the historical (lax) behavior. The + /// verifier is left without an issuer + /// pin to preserve compatibility. + const auto issuer_from_discovery = getValueByKey(openid_config, "issuer").value_or(""); + + if (!expected_issuer_.empty()) + { + if (issuer_from_discovery.empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery document at '{}' does not advertise an 'issuer'; " + "cannot verify it against the configured 'expected_issuer' '{}'.", + processor_name, openid_config_endpoint_, expected_issuer_); + + if (issuer_from_discovery != expected_issuer_) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery 'issuer' mismatch: configured 'expected_issuer' is '{}' " + "but discovery document at '{}' returned issuer '{}'. Refusing to load the " + "processor to avoid trusting metadata that belongs to a different issuer.", + processor_name, expected_issuer_, openid_config_endpoint_, issuer_from_discovery); + } + else + { + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_issuer' is not configured for OIDC discovery at '{}'. " + "The JWT 'iss' claim will NOT be enforced.", processor_name, openid_config_endpoint_); + } userinfo_endpoint = Poco::URI(getValueByKey(openid_config, "userinfo_endpoint").value()); - token_introspection_endpoint = Poco::URI(getValueByKey(openid_config, "introspection_endpoint").value()); + if (openid_config.contains("introspection_endpoint")) + token_introspection_endpoint = Poco::URI(getValueByKey(openid_config, "introspection_endpoint").value()); + + /// See manual-constructor comment: `expected_issuer` / `expected_audience` + /// can only be enforced via local JWT validation. If the discovery document + /// does not advertise a `jwks_uri`, no `jwt_validator` will be created and + /// the userinfo fallback alone cannot enforce these bindings. Refuse the + /// configuration rather than silently dropping them. + if (!openid_config.contains("jwks_uri") && (!expected_issuer_.empty() || !expected_audience_.empty())) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery at '{}' did not advertise a 'jwks_uri', but 'expected_issuer' / " + "'expected_audience' are configured. These bindings can only be enforced via local JWT " + "validation against a JWKS; userinfo cannot enforce them. Refusing to load.", + processor_name, openid_config_endpoint_); if (openid_config.contains("jwks_uri")) { LOG_TRACE(getLogger("TokenAuthentication"), "{}: JWKS URI set, local JWT processing will be attempted", processor_name_); + /// `expected_typ` empty for the same reason as the manual constructor. jwt_validator.emplace(processor_name_ + "jwks_val", token_cache_lifetime_, username_claim_, groups_claim_, expected_issuer_, expected_audience_, + /*expected_typ=*/"", allow_no_expiration_, "", verifier_leeway_, @@ -327,8 +669,34 @@ bool OpenIdTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co String username; picojson::object user_info_json; - if (jwt_validator.has_value() && jwt_validator.value().resolveAndValidate(credentials)) + if (jwt_validator.has_value()) { + /// When a `jwt_validator` is configured, it owns the operator's + /// `expected_issuer` / `expected_audience` / `allow_no_expiration` + /// bindings. If it rejects the token we MUST NOT fall back to the + /// userinfo endpoint: userinfo only confirms "the IdP describes this + /// user", it has no notion of the operator-pinned audience or issuer + /// and does not enforce the local expiration policy. Falling back here + /// would silently bypass exactly the bindings the operator opted into, + /// e.g. a JWT with the wrong `aud` would still authenticate because + /// the IdP's own userinfo accepts it for itself. + if (!jwt_validator.value().resolveAndValidate(credentials)) + { + /// DEBUG, not TRACE: this is the binding-rejection path. Operators + /// running with DEBUG enabled will see a clear signal that the + /// JWT-fastpath (which enforces `expected_issuer` / `expected_audience` + /// / `allow_no_expiration`) rejected a token. The auth failure itself + /// is also visible to the client, but the log line tells the operator + /// *why* it was rejected on the local side. + LOG_DEBUG(getLogger("TokenAuthentication"), + "{}: Local JWT validation rejected the token. Refusing to fall back to " + "userinfo: the operator-configured bindings (expected_issuer / expected_audience / " + "allow_no_expiration) cannot be enforced by userinfo, and a fallback would silently " + "bypass them.", + processor_name); + return false; + } + try { auto decoded_token = jwt::decode(token); @@ -341,11 +709,32 @@ bool OpenIdTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co } catch (const std::exception & ex) { - LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to process token as JWT: {}", processor_name, ex.what()); + /// WARNING: validation passed but extracting the payload locally + /// failed -- a genuinely rare condition (the same token was just + /// successfully verified, so its bytes ARE a valid JWT). The + /// processor is about to fall back to userinfo for username + /// extraction. Bindings were already enforced by `jwt_validator`, + /// so this fallback is safe -- but the underlying mismatch + /// (decode failure on a verified token) usually means an IdP + /// behavioral change, a clock skew, or a payload-format drift, + /// and operators should know about it loudly. + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: JWT validation succeeded but payload extraction failed: {}. " + "Falling back to userinfo for username; the operator-configured " + "bindings have ALREADY been enforced by JWT validation, so this " + "fallback is safe -- but the decode failure indicates an unexpected " + "JWT shape from the IdP.", + processor_name, ex.what()); } } - /// If username or user info is empty -- local validation failed, trying introspection via provider + /// Userinfo path: only reachable when no `jwt_validator` is configured + /// (the constructor guarantees that combination is incompatible with any + /// `expected_issuer` / `expected_audience` pin), or when local JWT validation + /// passed but extracting the username/payload from the decoded token failed + /// for an unrelated reason -- in which case the bindings have already been + /// enforced by `jwt_validator` and userinfo is just being asked for the user + /// identity. if (username.empty() || user_info_json.empty()) { try diff --git a/src/Access/TokenProcessorsParse.cpp b/src/Access/TokenProcessorsParse.cpp index fa83c5fa6a34..43c2ab05a2e3 100644 --- a/src/Access/TokenProcessorsParse.cpp +++ b/src/Access/TokenProcessorsParse.cpp @@ -1,7 +1,9 @@ #include "TokenProcessors.h" +#include #include #include +#include namespace DB { @@ -27,44 +29,104 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( auto groups_claim = config.getString(prefix + ".groups_claim", "groups"); auto expected_issuer = config.getString(prefix + ".expected_issuer", ""); auto expected_audience = config.getString(prefix + ".expected_audience", ""); + /// `expected_typ` is the JWT header `typ` to require. RFC 8725 §3.11 and + /// RFC 9068 recommend type discrimination to prevent cross-token-class + /// substitution -- e.g. accepting an ID token (intended for client login) + /// where an access token (intended for resource access) is expected. + /// Common values: "at+jwt" (RFC 9068 access tokens), "JWT" (generic). + /// Empty (the default) means no `typ` enforcement; the JWT processors warn + /// at startup when this is left empty so the gap is visible. + auto expected_typ = config.getString(prefix + ".expected_typ", ""); auto allow_no_expiration = config.getBool(prefix + ".allow_no_expiration", false); + /// Constrain every OIDC/JWT trust-chain fetch (discovery, userinfo, + /// introspection, JWKS) to the operator-approved . + /// + /// Without this gate, any URL the operator pastes into the processor config + /// -- and any URL returned by an OIDC discovery document -- is fetched + /// blindly. A misconfigured or attacker-influenced discovery response can + /// then redirect token validation through hosts the operator never approved. + /// + /// We pre-validate every URL the operator typed into the processor config + /// here, at parse time, so a bad config fails fast at startup rather than + /// at first authentication. Discovery-derived URLs (jwks_uri etc.) are + /// validated separately, after the discovery fetch, inside the processor. + /// + /// If is absent the filter degrades to its + /// historical permissive behavior: this matches every other ClickHouse + /// outbound URL site and avoids breaking existing deployments. + RemoteHostFilter remote_host_filter; + remote_host_filter.setValuesFromConfig(config); + + auto require_allowed_url = [&](const String & raw_url, const char * param_name) + { + if (raw_url.empty()) + return; + try + { + remote_host_filter.checkURL(Poco::URI(raw_url)); + } + catch (const Exception & e) + { + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Token processor '{}': '{}' URL '{}' is not in : {}", + processor_name, param_name, raw_url, e.message()); + } + }; + if (provider_type == "google") { - return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim); + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_audience); } else if (provider_type == "azure") { - return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim); + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_audience); } else if (provider_type == "openid") { auto verifier_leeway = config.getUInt64(prefix + ".verifier_leeway", 60); auto jwks_cache_lifetime = config.getUInt64(prefix + ".jwks_cache_lifetime", 3600); + /// `token_introspection_endpoint` is currently unused at runtime: the + /// processor relies on JWT-local validation (when JWKS is configured) + /// or on userinfo, never on RFC 7662 introspection. Don't require it + /// for "locally configured" mode -- forcing operators to set a value + /// that does nothing is a footgun. If introspection is wired up later, + /// the field is already plumbed and can become required at that point. bool externally_configured = config.hasProperty(prefix + ".configuration_endpoint") && !config.hasProperty(prefix + ".jwks_uri"); - bool locally_configured = config.hasProperty(prefix + ".userinfo_endpoint") && config.hasProperty(prefix + ".token_introspection_endpoint"); + bool locally_configured = config.hasProperty(prefix + ".userinfo_endpoint"); if (externally_configured && ! locally_configured) { + const auto configuration_endpoint = config.getString(prefix + ".configuration_endpoint"); + require_allowed_url(configuration_endpoint, "configuration_endpoint"); return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_issuer, expected_audience, allow_no_expiration, - config.getString(prefix + ".configuration_endpoint"), + configuration_endpoint, verifier_leeway, - jwks_cache_lifetime); + jwks_cache_lifetime, + remote_host_filter); } else if (locally_configured && !externally_configured) { + const auto userinfo_endpoint = config.getString(prefix + ".userinfo_endpoint"); + const auto token_introspection_endpoint = config.getString(prefix + ".token_introspection_endpoint", ""); + const auto jwks_uri = config.getString(prefix + ".jwks_uri", ""); + require_allowed_url(userinfo_endpoint, "userinfo_endpoint"); + require_allowed_url(token_introspection_endpoint, "token_introspection_endpoint"); + require_allowed_url(jwks_uri, "jwks_uri"); return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_issuer, expected_audience, allow_no_expiration, - config.getString(prefix + ".userinfo_endpoint"), - config.getString(prefix + ".token_introspection_endpoint"), + userinfo_endpoint, + token_introspection_endpoint, verifier_leeway, - config.getString(prefix + ".jwks_uri", ""), + jwks_uri, jwks_cache_lifetime); } - throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Either 'configuration_endpoint' or both 'userinfo_endpoint' and 'token_introspection_endpoint' (and, optionally, 'jwks_uri') must be specified for 'openid' processor"); + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Either 'configuration_endpoint' or 'userinfo_endpoint' " + "(and, optionally, 'token_introspection_endpoint' / 'jwks_uri') must be specified for 'openid' processor"); } else if (provider_type == "jwt_static_key") { @@ -81,8 +143,9 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( config.getString(prefix + ".private_key", ""), config.getString(prefix + ".public_key_password", ""), config.getString(prefix + ".private_key_password", ""), - config.getString(prefix + ".claims", "")}; - return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_issuer, expected_audience, allow_no_expiration, params); + config.getString(prefix + ".claims", ""), + config.getUInt64(prefix + ".verifier_leeway", 60)}; + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_issuer, expected_audience, expected_typ, allow_no_expiration, params); } else if (provider_type == "jwt_static_jwks") { @@ -101,9 +164,9 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( config.getString(prefix + ".static_jwks_file", "") }; return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, - expected_issuer, expected_audience, allow_no_expiration, + expected_issuer, expected_audience, expected_typ, allow_no_expiration, config.getString(prefix + ".claims", ""), - config.getUInt64(prefix + ".verifier_leeway", 0), + config.getUInt64(prefix + ".verifier_leeway", 60), std::make_shared(params)); } if (provider_type == "jwt_dynamic_jwks") @@ -113,11 +176,13 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( if (!config.hasProperty(prefix + ".jwks_uri")) throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'jwks_uri' must be specified for 'jwt_dynamic_jwks' processor"); + const auto jwks_uri = config.getString(prefix + ".jwks_uri"); + require_allowed_url(jwks_uri, "jwks_uri"); return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, - expected_issuer, expected_audience, allow_no_expiration, + expected_issuer, expected_audience, expected_typ, allow_no_expiration, config.getString(prefix + ".claims", ""), - config.getUInt64(prefix + ".verifier_leeway", 0), - config.getString(prefix + ".jwks_uri"), + config.getUInt64(prefix + ".verifier_leeway", 60), + jwks_uri, config.getUInt(prefix + ".jwks_cache_lifetime", 3600)); } else diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index e513e8179e84..1fa7bf7d64e5 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -146,13 +146,23 @@ void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const if (server_protocol_revision >= DBMS_MIN_REVISON_WITH_JWT_IN_INTERSERVER) { - if (!jwt.empty()) - { - writeBinary(static_cast(1), out); - writeBinary(jwt, out); - } - else - writeBinary(static_cast(0), out); + /// Never serialize the bearer token over the interserver wire. + /// + /// Distributed queries use this `ClientInfo` to fan out to remote shards + /// and replicas. Interserver transport is plaintext by default + /// (`interserver_http_port` vs `interserver_https_port`), so writing the + /// raw JWT here exposes session credentials on the internal network for + /// every distributed query whenever the operator hasn't opted into TLS + /// for interserver -- and no code on the receiving side currently reads + /// `client_info.jwt`, so the transmission is pure leakage with no + /// functional benefit. + /// + /// The protocol-revision byte is still emitted (always `0` = "no JWT") + /// to preserve wire compatibility with peers that expect this field at + /// this offset; receivers will read it as "no JWT present" and skip + /// the body. The `jwt` member of `ClientInfo` is retained for any + /// in-process use within the same node. + writeBinary(static_cast(0), out); } } @@ -345,8 +355,17 @@ void ClientInfo::setFromHTTPRequest(const Poco::Net::HTTPRequest & request) for (const auto & header : request) { /// These headers can contain authentication info and shouldn't be accessible by the user. + /// + /// The standard HTTP authorization header is `Authorization` (RFC 7235 §4.2), + /// which lowercases to `authorization`. The previous filter compared against + /// `authentication` -- a real but distinct header (RFC 7615) that ClickHouse + /// does not use for credentials -- so the actual `Authorization: Basic ...` + /// or `Authorization: Bearer ` header was retained in `http_headers` + /// and exposed via `getClientHTTPHeader('Authorization')` and via + /// `` on HTTP auth servers (which would relay the bearer + /// token meant for ClickHouse to a third-party auth server). String key_lowercase = Poco::toLower(header.first); - if (key_lowercase.starts_with("x-clickhouse") || key_lowercase == "authentication") + if (key_lowercase.starts_with("x-clickhouse") || key_lowercase == "authorization") continue; http_headers[header.first] = header.second; } diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index a159d59db690..d89a0592a858 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -385,6 +386,13 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So user_id = auth_result.user_id; user_authenticated_with = auth_result.authentication_data; settings_from_auth_server = auth_result.settings; + + /// Bind the session lifetime to the access-token lifetime when applicable. + if (const auto * token_credentials = typeid_cast(&credentials_)) + auth_token_expires_at = token_credentials->getExpiresAt(); + else + auth_token_expires_at.reset(); + LOG_DEBUG(log, "{} Authenticated with global context as user {}", toString(auth_id), toString(*user_id)); @@ -410,13 +418,33 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So void Session::checkIfUserIsStillValid() { + const auto now = std::chrono::system_clock::now(); + if (const auto valid_until = user_authenticated_with.getValidUntil()) { - const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - - if (now > valid_until) + if (std::chrono::system_clock::to_time_t(now) > valid_until) throw Exception(ErrorCodes::USER_EXPIRED, "Authentication method used has expired"); } + + /// For sessions established via a bearer/access token (JWT or opaque), enforce token expiry. + if (auth_token_expires_at.has_value() && now >= *auth_token_expires_at) + throw Exception(ErrorCodes::USER_EXPIRED, "Access token used to authenticate the session has expired"); + + /// For JWT/token sessions, also re-validate that the authenticating + /// processor is still configured. Without this, an admin removing a + /// processor (or disabling token auth entirely) would NOT terminate + /// active sessions until each session's token expired naturally -- a + /// gap of up to one token TTL (~1h for typical IdPs) between the + /// admin's "stop accepting tokens from this IdP" intent and actual + /// session termination (M-28). + if (user_authenticated_with.getType() == AuthenticationType::JWT) + { + const auto & processor_name = user_authenticated_with.getTokenProcessorName(); + if (!global_context->getAccessControl().getExternalAuthenticators().hasTokenProcessor(processor_name)) + throw Exception(ErrorCodes::USER_EXPIRED, + "Token processor '{}' that authenticated this session is no longer configured", + processor_name.empty() ? "" : processor_name); + } } void Session::onAuthenticationFailure(const std::optional & user_name, const Poco::Net::SocketAddress & address_, const Exception & e) diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 277670f818fa..23ccb721aa27 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -120,6 +120,10 @@ class Session std::vector external_roles; AuthenticationData user_authenticated_with; + /// When the user was authenticated with a bearer/access token, this holds the + /// effective token expiry captured at authentication time. + std::optional auth_token_expires_at; + ContextMutablePtr session_context; mutable bool query_context_created = false; diff --git a/src/Parsers/Access/ASTAuthenticationData.cpp b/src/Parsers/Access/ASTAuthenticationData.cpp index 7fe8de9bdb5b..ec1cfc02908a 100644 --- a/src/Parsers/Access/ASTAuthenticationData.cpp +++ b/src/Parsers/Access/ASTAuthenticationData.cpp @@ -116,12 +116,29 @@ void ASTAuthenticationData::formatImpl(WriteBuffer & ostr, const FormatSettings } case AuthenticationType::JWT: { - if (!children.empty()) + /// JWT carries two independent optional clauses (PROCESSOR and + /// CLAIMS), so it does not fit the single-prefix/single-parameter + /// shape the rest of this function uses. Emit directly here and + /// short-circuit the prefix/parameter pipeline by returning at + /// the end of this case. + ostr << " " << auth_type_name; + + size_t child_idx = 0; + if (has_jwt_processor) { - prefix = "CLAIMS"; - parameter = true; + ostr << " PROCESSOR "; + children[child_idx++]->format(ostr, settings); } - break; + if (has_jwt_claims) + { + ostr << " CLAIMS "; + children[child_idx++]->format(ostr, settings); + } + + if (valid_until) + formatValidUntil(*valid_until, ostr, settings); + + return; } case AuthenticationType::LDAP: { diff --git a/src/Parsers/Access/ASTAuthenticationData.h b/src/Parsers/Access/ASTAuthenticationData.h index ab2da84fcaf2..742c7f5e39d3 100644 --- a/src/Parsers/Access/ASTAuthenticationData.h +++ b/src/Parsers/Access/ASTAuthenticationData.h @@ -41,6 +41,16 @@ class ASTAuthenticationData : public IAST bool contains_password = false; bool contains_hash = false; + /// IDENTIFIED WITH jwt accepts two optional clauses: + /// PROCESSOR '' + /// CLAIMS '' + /// Both are stored in `children` in this order; flags below tell which slots + /// are populated (children layout depends on which were specified). The + /// processor pin is what protects against the H-14 / H-17 cache-priming + /// bypass for SQL-declared JWT users; without it the per-user lookup goes + /// through the iterate-all-processors auto-discovery path with empty pin. + bool has_jwt_processor = false; + bool has_jwt_claims = false; ASTPtr valid_until; protected: diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 5f0d8d000aab..3afc59aa516b 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -83,7 +83,7 @@ namespace bool expect_ssl_cert_subjects = false; bool expect_public_ssh_key = false; bool expect_http_auth_server = false; - bool expect_claims = false; // NOLINT + bool expect_jwt_args = false; auto parse_non_password_based_type = [&](auto check_type) { @@ -105,8 +105,7 @@ namespace else if (check_type == AuthenticationType::HTTP) expect_http_auth_server = true; else if (check_type == AuthenticationType::JWT) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "CREATE USER is not supported for JWT"); - // expect_claims = true; + expect_jwt_args = true; else if (check_type != AuthenticationType::NO_PASSWORD) expect_password = true; @@ -167,6 +166,7 @@ namespace ASTPtr http_auth_scheme; ASTPtr ssl_cert_subjects; std::optional ssl_cert_subject_type; + ASTPtr jwt_processor; ASTPtr jwt_claims; if (expect_password || expect_hash) @@ -232,12 +232,30 @@ namespace return false; } } - else if (expect_claims) + else if (expect_jwt_args) { - if (ParserKeyword{Keyword::CLAIMS}.ignore(pos, expected)) + /// IDENTIFIED WITH jwt accepts two optional clauses, in either order: + /// PROCESSOR '' -- pin to a specific token_processor + /// CLAIMS '' -- per-user claims requirement + /// Either, both, or neither may appear. Pinning a processor is what + /// gates SQL-declared JWT users out of the iterate-all-processors + /// auto-discovery path. + for (size_t i = 0; i < 2; ++i) { - if (!ParserStringAndSubstitution{}.parse(pos, jwt_claims, expected)) - return false; + if (!jwt_processor && ParserKeyword{Keyword::PROCESSOR}.ignore(pos, expected)) + { + if (!ParserStringAndSubstitution{}.parse(pos, jwt_processor, expected)) + return false; + } + else if (!jwt_claims && ParserKeyword{Keyword::CLAIMS}.ignore(pos, expected)) + { + if (!ParserStringAndSubstitution{}.parse(pos, jwt_claims, expected)) + return false; + } + else + { + break; + } } } @@ -265,8 +283,17 @@ namespace if (http_auth_scheme) auth_data->children.push_back(std::move(http_auth_scheme)); + if (jwt_processor) + { + auth_data->has_jwt_processor = true; + auth_data->children.push_back(std::move(jwt_processor)); + } + if (jwt_claims) + { + auth_data->has_jwt_claims = true; auth_data->children.push_back(std::move(jwt_claims)); + } parseValidUntil(pos, expected, auth_data->valid_until); diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index c5bd6a7d04d4..203d2dc344a2 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -405,6 +405,7 @@ namespace DB MR_MACROS(PRIMARY_KEY, "PRIMARY KEY") \ MR_MACROS(PRIORITY, "PRIORITY") \ MR_MACROS(PRIMARY, "PRIMARY") \ + MR_MACROS(PROCESSOR, "PROCESSOR") \ MR_MACROS(PROFILE, "PROFILE") \ MR_MACROS(PROFILES, "PROFILES") \ MR_MACROS(PROJECTION, "PROJECTION") \ diff --git a/src/Server/ArrowFlightHandler.cpp b/src/Server/ArrowFlightHandler.cpp index e7a4e4888b1f..047478d47121 100644 --- a/src/Server/ArrowFlightHandler.cpp +++ b/src/Server/ArrowFlightHandler.cpp @@ -7,6 +7,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -34,6 +37,7 @@ namespace DB namespace ErrorCodes { + extern const int AUTHENTICATION_FAILED; extern const int LOGICAL_ERROR; extern const int UNKNOWN_EXCEPTION; } @@ -46,10 +50,23 @@ namespace class AuthMiddleware : public arrow::flight::ServerMiddleware { public: - explicit AuthMiddleware(const std::string & token, const std::string & username, const std::string & password) - : token_(token) - , username_(username) - , password_(password) + enum class Scheme { Basic, Bearer }; + + /// Basic constructor: `username:password` already split out of the + /// base64-decoded `Basic <...>` header. + AuthMiddleware(const std::string & token, const std::string & username, const std::string & password) + : scheme_(Scheme::Basic), token_(token), username_(username), password_(password) + { + } + + /// Bearer constructor: stores the raw JWT (or other opaque bearer + /// credential) verbatim. Previously the factory stuffed bearer tokens + /// through the same base64 + `:` split as Basic, which guaranteed any + /// real JWT failed (no `:` after base64-decoding) -- the `Bearer` + /// label was just a misleading alias for `Basic`. + struct BearerTag {}; + AuthMiddleware(BearerTag, const std::string & bearer_token) + : scheme_(Scheme::Bearer), token_(bearer_token) { } @@ -58,8 +75,10 @@ namespace return *static_cast(context.GetMiddleware(AUTHORIZATION_MIDDLEWARE_NAME)); } + Scheme scheme() const { return scheme_; } const std::string & username() const { return username_; } const std::string & password() const { return password_; } + const std::string & bearerToken() const { return token_; } void SendingHeaders(arrow::flight::AddCallHeaders * outgoing_headers) override { @@ -71,6 +90,7 @@ namespace std::string name() const override { return AUTHORIZATION_MIDDLEWARE_NAME; } private: + const Scheme scheme_; const std::string token_; const std::string username_; const std::string password_; @@ -92,32 +112,78 @@ namespace auto auth_header = std::string(it->second); - std::string token; - const std::string prefix_basic = "Basic "; - if (auth_header.starts_with(prefix_basic)) - token = auth_header.substr(prefix_basic.size()); - const std::string prefix_bearer = "Bearer "; + + /// Bearer first: route it to a real JWT/token path, NOT through the + /// base64 + ':' split that Basic uses. A real JWT is base64url- + /// encoded JSON in three parts separated by '.', so base64-decoding + /// the whole header value yields binary garbage and never contains + /// ':' -- the previous code treated `Bearer` as a strict alias for + /// `Basic` and rejected every legitimate bearer token. Token + /// validation runs later in the per-call helper, after the + /// `Session` is constructed. if (auth_header.starts_with(prefix_bearer)) - token = auth_header.substr(prefix_bearer.size()); + { + auto bearer_token = auth_header.substr(prefix_bearer.size()); + if (bearer_token.empty()) + return arrow::Status::IOError("Bearer token is empty"); - if (token.empty()) - return arrow::Status::IOError("Expected Basic auth scheme"); + *middleware = std::make_unique(AuthMiddleware::BearerTag{}, bearer_token); + return arrow::Status::OK(); + } - std::string credentials = base64Decode(token, true); - auto pos = credentials.find(':'); - if (pos == std::string::npos) - return arrow::Status::IOError("Malformed credentials"); + if (auth_header.starts_with(prefix_basic)) + { + auto token = auth_header.substr(prefix_basic.size()); + if (token.empty()) + return arrow::Status::IOError("Basic credentials are empty"); - auto user = credentials.substr(0, pos); - auto password = credentials.substr(pos + 1); + std::string credentials = base64Decode(token, true); + auto pos = credentials.find(':'); + if (pos == std::string::npos) + return arrow::Status::IOError("Malformed credentials"); - *middleware = std::make_unique(token, user, password); - return arrow::Status::OK(); + auto user = credentials.substr(0, pos); + auto password = credentials.substr(pos + 1); + + *middleware = std::make_unique(token, user, password); + return arrow::Status::OK(); + } + + return arrow::Status::IOError("Expected Basic or Bearer auth scheme"); } }; + /// Dispatches `Session::authenticate` based on the auth scheme captured by + /// the middleware. Basic → standard `(user, password)` overload. Bearer → + /// pre-validate the token (no cache priming, mirroring HTTP/TCP H-14) so + /// the resolved username gets attached to the credentials, then run the + /// per-user authentication chain via the `(Credentials &)` overload. + void authenticateArrowFlightSession( + Session & session, + const AuthMiddleware & auth, + const Poco::Net::SocketAddress & address, + ContextPtr global_context) + { + if (auth.scheme() == AuthMiddleware::Scheme::Bearer) + { + const auto & access_control = global_context->getAccessControl(); + if (!access_control.isTokenAuthEnabled()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Token authentication is disabled"); + + TokenCredentials token_credentials(auth.bearerToken()); + if (!access_control.getExternalAuthenticators().checkTokenCredentials( + token_credentials, /*processor_name=*/"", /*jwt_claims=*/"", /*prime_cache_on_success=*/false)) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid bearer token"); + + session.authenticate(token_credentials, address); + return; + } + + session.authenticate(auth.username(), auth.password(), address); + } + String readFile(const String & filepath) { Poco::FileInputStream ifs(filepath); @@ -948,7 +1014,7 @@ arrow::Status ArrowFlightHandler::GetFlightInfo( Session session{server.context(), ClientInfo::Interface::ARROW_FLIGHT}; const auto & auth = AuthMiddleware::get(context); - session.authenticate(auth.username(), auth.password(), getClientAddress(context)); + authenticateArrowFlightSession(session, auth, getClientAddress(context), server.context()); auto query_context = session.makeQueryContext(); query_context->setCurrentQueryId(""); /// Empty string means the query id will be autogenerated. @@ -1034,7 +1100,7 @@ arrow::Status ArrowFlightHandler::GetSchema( Session session{server.context(), ClientInfo::Interface::ARROW_FLIGHT}; const auto & auth = AuthMiddleware::get(context); - session.authenticate(auth.username(), auth.password(), getClientAddress(context)); + authenticateArrowFlightSession(session, auth, getClientAddress(context), server.context()); auto query_context = session.makeQueryContext(); query_context->setCurrentQueryId(""); /// Empty string means the query id will be autogenerated. @@ -1094,7 +1160,7 @@ arrow::Status ArrowFlightHandler::PollFlightInfo( auto session = std::make_unique(server.context(), ClientInfo::Interface::ARROW_FLIGHT); const auto & auth = AuthMiddleware::get(context); - session->authenticate(auth.username(), auth.password(), getClientAddress(context)); + authenticateArrowFlightSession(*session, auth, getClientAddress(context), server.context()); auto query_context = session->makeQueryContext(); query_context->setCurrentQueryId(""); /// Empty string means the query id will be autogenerated. @@ -1272,7 +1338,7 @@ arrow::Status ArrowFlightHandler::DoGet( Session session{server.context(), ClientInfo::Interface::ARROW_FLIGHT}; const auto & auth = AuthMiddleware::get(context); - session.authenticate(auth.username(), auth.password(), getClientAddress(context)); + authenticateArrowFlightSession(session, auth, getClientAddress(context), server.context()); auto query_context = session.makeQueryContext(); query_context->setCurrentQueryId(""); /// Empty string means the query id will be autogenerated. @@ -1337,7 +1403,7 @@ arrow::Status ArrowFlightHandler::DoPut( Session session{server.context(), ClientInfo::Interface::ARROW_FLIGHT}; const auto & auth = AuthMiddleware::get(context); - session.authenticate(auth.username(), auth.password(), getClientAddress(context)); + authenticateArrowFlightSession(session, auth, getClientAddress(context), server.context()); auto query_context = session.makeQueryContext(); query_context->setCurrentQueryId(""); /// Empty string means the query id will be autogenerated. diff --git a/src/Server/HTTP/authenticateUserByHTTP.cpp b/src/Server/HTTP/authenticateUserByHTTP.cpp index 74838f941b37..20db21b69e4a 100644 --- a/src/Server/HTTP/authenticateUserByHTTP.cpp +++ b/src/Server/HTTP/authenticateUserByHTTP.cpp @@ -229,7 +229,15 @@ bool authenticateUserByHTTP( const auto token_credentials = TokenCredentials(bearer_token); const auto & external_authenticators = access_control.getExternalAuthenticators(); - if (!external_authenticators.checkTokenCredentials(token_credentials)) + /// Pre-user-lookup token validation. Pass `prime_cache_on_success=false` + /// so this unconstrained call (no processor pin, no JWT claims) does not + /// populate the token cache. The cache is reserved for entries produced + /// by the per-user authentication path (`Authentication::areCredentialsValid`), + /// which applies the user's pinned processor and per-user claims. + /// Without this, a user whose `` block omits `` would + /// satisfy a later cache lookup with empty `processor_name` -- silently + /// inheriting whichever processor happened to win this auto-discovery race. + if (!external_authenticators.checkTokenCredentials(token_credentials, /*processor_name=*/"", /*jwt_claims=*/"", /*prime_cache_on_success=*/false)) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: Token could not be verified."); current_credentials = std::make_unique(token_credentials); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index c9593c8388ff..821104f6c4f5 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1970,7 +1970,12 @@ void TCPHandler::receiveHello() const auto & external_authenticators = access_control.getExternalAuthenticators(); - if (!external_authenticators.checkTokenCredentials(credentials)) + /// Pre-user-lookup token validation. Pass `prime_cache_on_success=false` + /// so this unconstrained call (no processor pin, no JWT claims) does not + /// populate the token cache; the per-user authentication path is the only + /// site allowed to populate it, after applying the user's pinned processor + /// and per-user claims. See `ExternalAuthenticators::checkTokenCredentials`. + if (!external_authenticators.checkTokenCredentials(credentials, /*processor_name=*/"", /*jwt_claims=*/"", /*prime_cache_on_success=*/false)) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Token is invalid"); session->authenticate(credentials, getClientAddress(client_info)); diff --git a/tests/integration/test_jwt_auth/test.py b/tests/integration/test_jwt_auth/test.py index 481c8117a73e..f3abf04413f5 100644 --- a/tests/integration/test_jwt_auth/test.py +++ b/tests/integration/test_jwt_auth/test.py @@ -97,3 +97,112 @@ def test_jwks_server_ec_es384(started_cluster): ] ) assert res == "jwt_user\n" + + +# Helper: request `SELECT currentUser()` over HTTP with the given bearer token +# and return the body. Caller decides whether to assert on the username or on +# rejection (rejected requests return a non-username error body). +def http_select_current_user(token: str) -> str: + return client.exec_in_container( + [ + "bash", + "-c", + curl_with_jwt(token=token, ip=cluster.get_instance_ip(instance.name)), + ] + ) + + +def make_token(payload: dict, secret: str) -> str: + """Sign an HS256 JWT with the given secret. Matches the secrets configured + for `single_key_processor` (`my_secret`) and `another_single_key_processor` + (`other_secret`) in `configs/validators.xml`.""" + import jwt + return jwt.encode(payload, secret, algorithm="HS256") + + +def test_sql_create_jwt_user_with_processor_pin(started_cluster): + """SQL `CREATE USER ... IDENTIFIED WITH jwt PROCESSOR ''` actually + pins the auth path: a token that validates against a different processor + in the same chain must NOT authenticate the SQL-pinned user. Without the + pin the iterate-all-processors auto-discovery branch would happily accept + either token (this is the H-22 / H-14 bypass surface).""" + + instance.query( + "CREATE USER OR REPLACE sql_jwt_user IDENTIFIED WITH jwt PROCESSOR 'single_key_processor'" + ) + + # Round-trip: SHOW CREATE USER must emit the PROCESSOR clause we just set. + show = instance.query("SHOW CREATE USER sql_jwt_user").strip() + assert "PROCESSOR 'single_key_processor'" in show, show + assert "CLAIMS" not in show, show + + token_my = make_token({"sub": "sql_jwt_user"}, "my_secret") + token_other = make_token({"sub": "sql_jwt_user"}, "other_secret") + + # Pinned processor accepts the my_secret-signed token. + assert http_select_current_user(token_my) == "sql_jwt_user\n" + + # The other_secret-signed token validates fine against + # `another_single_key_processor`, but the user is pinned to + # `single_key_processor` -- the pin must reject it. + rejected = http_select_current_user(token_other) + assert "sql_jwt_user" not in rejected, rejected + + # Re-pin via ALTER and the relationship inverts. + instance.query( + "ALTER USER sql_jwt_user IDENTIFIED WITH jwt PROCESSOR 'another_single_key_processor'" + ) + assert http_select_current_user(token_other) == "sql_jwt_user\n" + rejected = http_select_current_user(token_my) + assert "sql_jwt_user" not in rejected, rejected + + instance.query("DROP USER sql_jwt_user") + + +def test_sql_create_jwt_user_with_claims(started_cluster): + """`CLAIMS ''` must be enforced for SQL-declared JWT users: a token + that is valid against the pinned processor but lacks the required claim + must be rejected, and a token that has the claim must be accepted.""" + + instance.query( + "CREATE USER OR REPLACE sql_jwt_claims_user " + "IDENTIFIED WITH jwt PROCESSOR 'single_key_processor' " + "CLAIMS '{\"role\":\"admin\"}'" + ) + + show = instance.query("SHOW CREATE USER sql_jwt_claims_user").strip() + assert "PROCESSOR 'single_key_processor'" in show, show + assert "CLAIMS '{\"role\":\"admin\"}'" in show, show + + # Token signed with the pinned processor's secret but no `role` claim: + # processor accepts, per-user CLAIMS rejects. + token_no_claim = make_token({"sub": "sql_jwt_claims_user"}, "my_secret") + rejected = http_select_current_user(token_no_claim) + assert "sql_jwt_claims_user" not in rejected, rejected + + # Token with the required claim: both gates pass. + token_with_claim = make_token( + {"sub": "sql_jwt_claims_user", "role": "admin"}, "my_secret" + ) + assert http_select_current_user(token_with_claim) == "sql_jwt_claims_user\n" + + instance.query("DROP USER sql_jwt_claims_user") + + +def test_sql_jwt_user_no_pin_uses_auto_discovery(started_cluster): + """Without `PROCESSOR`, the SQL JWT user falls back to auto-discovery: any + configured processor that validates the token will be accepted. This is + the documented behavior for users who explicitly chose not to pin.""" + + instance.query("CREATE USER OR REPLACE sql_jwt_unpinned IDENTIFIED WITH jwt") + + show = instance.query("SHOW CREATE USER sql_jwt_unpinned").strip() + assert "PROCESSOR" not in show, show + + # Both tokens (each valid against a different processor) authenticate the + # same unpinned SQL user. + for secret in ("my_secret", "other_secret"): + token = make_token({"sub": "sql_jwt_unpinned"}, secret) + assert http_select_current_user(token) == "sql_jwt_unpinned\n" + + instance.query("DROP USER sql_jwt_unpinned") diff --git a/tests/queries/0_stateless/01292_create_user.reference b/tests/queries/0_stateless/01292_create_user.reference index b93bddafb6d5..0aa4225e4394 100644 --- a/tests/queries/0_stateless/01292_create_user.reference +++ b/tests/queries/0_stateless/01292_create_user.reference @@ -125,3 +125,9 @@ CREATE USER u1_01292 IDENTIFIED WITH no_password CREATE USER `u1_01292@192.168.%.%` IDENTIFIED WITH no_password HOST LIKE \'192.168.%.%\' CREATE USER `u2_01292@192.168.%.%` IDENTIFIED WITH no_password HOST LIKE \'192.168.%.%\' -- creating user identified with JWT +CREATE USER user1 IDENTIFIED WITH jwt +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR \'my_processor\' +CREATE USER user1 IDENTIFIED WITH jwt CLAIMS \'{"role":"admin"}\' +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR \'my_processor\' CLAIMS \'{"role":"admin"}\' +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR \'my_processor\' CLAIMS \'{"role":"admin"}\' +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR \'other_processor\' diff --git a/tests/queries/0_stateless/01292_create_user.sql b/tests/queries/0_stateless/01292_create_user.sql index bbd7d17e9db7..2372cf789117 100644 --- a/tests/queries/0_stateless/01292_create_user.sql +++ b/tests/queries/0_stateless/01292_create_user.sql @@ -263,5 +263,20 @@ SHOW CREATE USER u2_01292@'192.168.%.%'; DROP USER u1_01292, u1_01292@'192.168.%.%', u2_01292@'192.168.%.%'; SELECT '-- creating user identified with JWT'; -CREATE USER user1 IDENTIFIED WITH jwt BY '1'; -- { clientError BAD_ARGUMENTS } -CREATE USER user1 IDENTIFIED WITH jwt; -- { clientError BAD_ARGUMENTS } +CREATE USER user1 IDENTIFIED WITH jwt BY '1'; -- { clientError SYNTAX_ERROR } +CREATE USER user1 IDENTIFIED WITH jwt; +SHOW CREATE USER user1; +CREATE USER OR REPLACE user1 IDENTIFIED WITH jwt PROCESSOR 'my_processor'; +SHOW CREATE USER user1; +CREATE USER OR REPLACE user1 IDENTIFIED WITH jwt CLAIMS '{"role":"admin"}'; +SHOW CREATE USER user1; +CREATE USER OR REPLACE user1 IDENTIFIED WITH jwt PROCESSOR 'my_processor' CLAIMS '{"role":"admin"}'; +SHOW CREATE USER user1; +CREATE USER OR REPLACE user1 IDENTIFIED WITH jwt CLAIMS '{"role":"admin"}' PROCESSOR 'my_processor'; +SHOW CREATE USER user1; +ALTER USER user1 IDENTIFIED WITH jwt PROCESSOR 'other_processor'; +SHOW CREATE USER user1; +DROP USER user1; +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR ''; -- { serverError BAD_ARGUMENTS } +CREATE USER user1 IDENTIFIED WITH jwt CLAIMS 'not-json'; -- { serverError BAD_ARGUMENTS } +CREATE USER user1 IDENTIFIED WITH jwt CLAIMS '[]'; -- { serverError BAD_ARGUMENTS }