Skip to content
Closed
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
9524c1f
feat: add automatic S3 request retry with exponential backoff
allanrogerr May 4, 2026
a79aa11
fix: address Copilot review on retry mechanism
allanrogerr May 5, 2026
beead6e
fix: strengthen exception assertions in RetryTest and guard regionCac…
allanrogerr May 5, 2026
53da859
fix: replace instanceof assertions with typed catch to satisfy SpotBu…
allanrogerr May 5, 2026
60aeaed
fix: use ThreadLocalRandom for backoff jitter, disable OkHttp retry, …
allanrogerr May 5, 2026
d1c52e0
style: fix Spotless formatting violations in BaseS3Client
allanrogerr May 5, 2026
0d687ec
fix: propagate runAsync dispatch failure into retryFuture
allanrogerr May 5, 2026
34abd64
refactor: move retry to OkHttp interceptor, drop S3-code retry
allanrogerr May 7, 2026
1997b48
fix: narrow throws clauses in RetryTest to satisfy SpotBugs
allanrogerr May 7, 2026
b2cad44
Merge branch 'master' into feature/retry-mechanism
allanrogerr May 7, 2026
ba65312
refactor: strip retry to balamurugana's interceptor proposal scope
allanrogerr May 7, 2026
6d39a12
feat: restore full retry capability on top of OkHttp interceptor
allanrogerr May 7, 2026
756e294
docs(retry): clarify RetryInterceptor as supported API; add terminal-…
allanrogerr May 7, 2026
2f2fb4f
fix(retry): cancellation awareness, broader docs, tighter scope, more…
allanrogerr May 7, 2026
7fe6e87
fix(retry): drop public Javadoc links to package-private Retry; strip…
allanrogerr May 7, 2026
9a5c8e4
fix(retry): address bala review on PR #1701 review 4248622939
allanrogerr May 8, 2026
93e4f47
fix(retry): address bala review batch on PR #1701 review 4250022733
allanrogerr May 8, 2026
0ae488b
fix(retry): drop Retry.java; revert createBody bracket per r3206778779
allanrogerr May 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 74 additions & 11 deletions api/src/main/java/io/minio/BaseS3Client.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* MinIO Java SDK for Amazon S3 Compatible Cloud Storage,
* (C) 2025 MinIO, Inc.
* (C) 2025-2026 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -104,6 +104,7 @@ public abstract class BaseS3Client implements AutoCloseable {
private static final String UPLOAD_ID = "uploadId";
private static final Set<String> TRACE_QUERY_PARAMS =
ImmutableSet.of("retention", "legal-hold", "tagging", UPLOAD_ID, "acl", "attributes");

private PrintWriter traceStream;
protected final Map<String, String> regionCache = new ConcurrentHashMap<>();
protected String userAgent = Utils.getDefaultUserAgent();
Expand All @@ -113,19 +114,59 @@ public abstract class BaseS3Client implements AutoCloseable {
protected OkHttpClient httpClient;
protected boolean closeHttpClient;

/**
* Maximum attempts per S3 request. Default {@link Retry#MAX_RETRY}. Read on every request via the
* retry interceptor's supplier so runtime tuning via {@link #setMaxRetries(int)} takes immediate
* effect. Package-private so the {@code Builder} classes in this package can seed it via the
* setter; subclasses must go through {@link #setMaxRetries(int)}.
*/
volatile int maxRetries = Retry.MAX_RETRY;

protected BaseS3Client(
Http.BaseUrl baseUrl, Provider provider, OkHttpClient httpClient, boolean closeHttpClient) {
this.baseUrl = baseUrl;
this.provider = provider;
this.httpClient = httpClient;
this.closeHttpClient = closeHttpClient;
this.httpClient = wrapWithRetry(httpClient);
Comment thread
allanrogerr marked this conversation as resolved.
Outdated
}

protected BaseS3Client(BaseS3Client client) {
this.baseUrl = client.baseUrl;
this.provider = client.provider;
this.httpClient = client.httpClient;
this.closeHttpClient = client.closeHttpClient;
this.maxRetries = client.maxRetries;
this.httpClient = wrapWithRetry(client.httpClient);
Comment thread
allanrogerr marked this conversation as resolved.
Outdated
}

/**
* Re-wires the retry interceptor on {@code client} so it reads {@link #maxRetries} from this
* instance. Strips any prior {@link Http.RetryInterceptor} from BOTH the application-interceptor
* and network-interceptor chains (e.g. one bound to a different instance via the copy
* constructor, or accidentally registered on the network chain) before installing this one as an
* application interceptor.
*
* <p>Also forces {@code retryOnConnectionFailure(false)} on the underlying client. The SDK's own
* {@link Http.RetryInterceptor} is the single source of retry policy; layering OkHttp's
* connection-level retry on top would double-count attempts and obscure the {@link #maxRetries}
* budget. This silently overrides any {@code retryOnConnectionFailure(true)} that a
* caller-supplied client was configured with — by design.
*/
private OkHttpClient wrapWithRetry(OkHttpClient client) {
OkHttpClient.Builder builder = client.newBuilder().retryOnConnectionFailure(false);
builder.interceptors().removeIf(i -> i instanceof Http.RetryInterceptor);
builder.networkInterceptors().removeIf(i -> i instanceof Http.RetryInterceptor);
return builder.addInterceptor(new Http.RetryInterceptor(() -> this.maxRetries)).build();
}

/**
* Sets the maximum number of attempts for transient HTTP failures. Pass {@code 1} to disable
* automatic retries. Defaults to {@code 10}.
*
* @param maxRetries maximum attempts (must be {@code >= 1}).
*/
public void setMaxRetries(int maxRetries) {
if (maxRetries < 1) throw new IllegalArgumentException("maxRetries must be >= 1");
this.maxRetries = maxRetries;
}

/** Closes underneath HTTP client. */
Expand Down Expand Up @@ -182,6 +223,11 @@ public void setAppInfo(String name, String version) {
/**
* Enables HTTP call tracing and written to traceStream.
*
* <p><b>Retry caveat.</b> Tracing happens at the SDK callback level, so only the final response
* of a retry sequence is recorded. Per-attempt traces (which the {@link Http.RetryInterceptor}
* sees and discards) are not surfaced here. To inspect individual retry attempts, register an
* OkHttp {@code HttpLoggingInterceptor} on a custom client.
*
* @param traceStream {@link OutputStream} for writing HTTP call tracing.
* @see #traceOff
*/
Expand Down Expand Up @@ -268,7 +314,13 @@ private String[] handleRedirectResponse(
return new String[] {code, message};
}

/** Execute HTTP request asynchronously for given parameters. */
/**
* Execute HTTP request asynchronously for given parameters. Retries on retryable IOException,
* HTTP status, and S3 error code are handled by {@link Http.RetryInterceptor}, which {@link
* #wrapWithRetry} installs on the underlying {@link OkHttpClient} regardless of whether the
* client is the default or caller-supplied. The attempt budget is read from {@link #maxRetries}
* on every request.
*/
protected CompletableFuture<Response> executeAsync(Http.S3Request s3request, String region) {
Credentials credentials = (provider == null) ? null : provider.fetch();
Http.Request request = null;
Expand All @@ -282,15 +334,9 @@ protected CompletableFuture<Response> executeAsync(Http.S3Request s3request, Str
PrintWriter traceStream = this.traceStream;
if (traceStream != null) traceStream.print(request.httpTraces());

OkHttpClient httpClient = this.httpClient;
Comment thread
allanrogerr marked this conversation as resolved.
// FIXME: enable retry for all request.
// if (!s3request.retryFailure()) {
// httpClient = httpClient.newBuilder().retryOnConnectionFailure(false).build();
// }

okhttp3.Request httpRequest = request.httpRequest();
CompletableFuture<Response> completableFuture = newCompleteableFuture();
httpClient
this.httpClient
.newCall(httpRequest)
.enqueue(
new Callback() {
Expand Down Expand Up @@ -1223,6 +1269,15 @@ private Object[] createBody(PutObjectAPIBaseArgs args, MediaType contentType)
boolean checksumHeader = headers.namePrefixAny("x-amz-checksum-");
String md5Hash = headers.getFirst(Http.Headers.CONTENT_MD5);

long fileStartPos = 0;
if (args.file() != null) {
try {
fileStartPos = args.file().getFilePointer();
} catch (IOException e) {
throw new MinioException(e);
}
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Each file.read(...) advances the file pointer. After hashing a length-byte file, the pointer is at fileStartPos + length. So whatever position the pointer is at when new Http.Body(...) runs becomes the offset that subsequent retries seek back to before each network attempt. Without the restore at line 1335, that captured offset would be fileStartPos + length instead of fileStartPos, and every PUT — first attempt and retries alike — would write from the wrong offset (typically EOF, producing an empty or truncated upload). Http.Body doesnt handle this.

minio-go does this in api-put-object-streaming.go:682-705

This comment was marked as duplicate.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing this. in order to do so, Checksum.update's RandomAccessFile branch must be rewritten to use FileChannel.read(ByteBuffer, long position) so it no longer mutates the file's pointer.
PTAL @balamurugana


if (sha256HexString == null && sha256Base64String == null) {
if (!baseUrl.isHttps()) {
Checksum.Hasher hasher = Checksum.Algorithm.SHA256.hasher();
Expand Down Expand Up @@ -1278,6 +1333,14 @@ private Object[] createBody(PutObjectAPIBaseArgs args, MediaType contentType)
}
}

if (args.file() != null) {
try {
args.file().seek(fileStartPos);
} catch (IOException e) {
throw new MinioException(e);
}
}
Comment thread
allanrogerr marked this conversation as resolved.
Outdated

Http.Body body = null;
if (args.file() != null) {
body = new Http.Body(args.file(), args.length(), contentType, sha256HexString, md5Hash);
Expand Down
168 changes: 166 additions & 2 deletions api/src/main/java/io/minio/Http.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* MinIO Java SDK for Amazon S3 Compatible Cloud Storage, (C) 2025 MinIO, Inc.
* MinIO Java SDK for Amazon S3 Compatible Cloud Storage, (C) 2025-2026 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -53,7 +53,9 @@
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.IntSupplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.Nonnull;
Expand All @@ -65,6 +67,7 @@
import javax.net.ssl.TrustManager;
import javax.net.ssl.TrustManagerFactory;
import javax.net.ssl.X509TrustManager;
import okhttp3.Interceptor;
import okhttp3.MediaType;
import okhttp3.OkHttpClient;
import okhttp3.Protocol;
Expand Down Expand Up @@ -620,6 +623,8 @@ public static OkHttpClient newDefaultClient() {
.writeTimeout(DEFAULT_TIMEOUT, TimeUnit.MILLISECONDS)
.readTimeout(DEFAULT_TIMEOUT, TimeUnit.MILLISECONDS)
.protocols(Arrays.asList(Protocol.HTTP_1_1))
.retryOnConnectionFailure(false)
.addInterceptor(new RetryInterceptor())
.build();
try {
return enableExternalCertificatesFromEnv(client);
Expand Down Expand Up @@ -683,6 +688,158 @@ public static OkHttpClient setTimeout(
.build();
}

/**
* OkHttp interceptor that retries transient HTTP failures with full-jitter exponential backoff.
*
* <p>This is part of the SDK's supported public API: it is installed automatically by {@link
* BaseS3Client} on every supplied {@link OkHttpClient} (default or caller-provided), and may also
* be registered explicitly on a stand-alone {@link OkHttpClient} via {@code .addInterceptor(new
* Http.RetryInterceptor())}.
*
* <p>Retries on:
*
* <ul>
* <li>retryable IOException — connection reset, EOF, socket timeout, idle-connection close.
* Excludes TLS handshake / unknown-CA / HTTPS protocol mismatch.
* <li>retryable HTTP status code — 408, 429, 499, 500, 502, 503, 504, 520.
* <li>retryable S3 error code in a non-2xx response body — {@code SlowDown}, {@code
* InternalError}, {@code ExpiredToken}, etc.
* </ul>
*
* <p>Backoff is full-jitter exponential, with a 200&nbsp;ms unit and a 1&nbsp;s per-attempt cap.
* The maximum number of attempts is supplied per intercept call via {@link IntSupplier} so that
* an SDK client can expose runtime tuning while keeping the interceptor itself stateless. The
* no-arg constructor uses the package default of 10 attempts.
*
* <p><b>Threading.</b> Backoff sleeps on the OkHttp dispatcher thread that owns the call. Under
* sustained 5xx/429 storms this can hold dispatcher slots idle while waiting. Callers that need
* higher concurrency under widespread retry should size the dispatcher worker pool accordingly
* (see {@link okhttp3.Dispatcher#setMaxRequests} / {@code setMaxRequestsPerHost}).
*
* <p><b>Cancellation.</b> The interceptor checks {@code chain.call().isCanceled()} before each
* attempt and after any thrown {@link IOException}, so a {@code Call.cancel()} from the caller
* (or an upstream interceptor) terminates the retry loop instead of treating the cancellation as
* a retryable transport error.
*
* <p><b>Replayability.</b> Retries call {@code chain.proceed(request)} again, which re-invokes
* {@code request.body().writeTo(sink)}. Callers using the SDK's own body types ({@link Body} for
* {@code byte[]}, {@link ByteBuffer}, or {@link RandomAccessFile}) are safe — those bodies are
* replayable. A caller-supplied {@link okhttp3.RequestBody} that overrides {@code isOneShot()} to
* return {@code true} (or otherwise consumes its source on the first {@code writeTo}) MUST NOT be
* retried; either disable retries via {@link BaseS3Client#setMaxRetries(int)} {@code (1)} for
* those calls, or wrap the body in a replayable form before submission.
*
* <p><b>Request reuse.</b> The interceptor passes the same signed {@link okhttp3.Request} on
* every attempt, so the {@code X-Amz-Date} / {@code Authorization} headers are not refreshed
* between attempts. This is harmless for the default attempt budget (worst-case wall-clock
* &lt;&nbsp;10&nbsp;s, well inside the 15-minute S3 signing window) but extreme {@code
* maxRetries} values combined with high backoff caps could outlast either the signing window or a
* short-lived STS credential. minio-go's request-level retry rebuilds and re-signs each attempt;
* that semantic is intentionally not replicated here in exchange for the simpler interceptor
* model.
*
* <p>To opt out of retries on a stand-alone {@link OkHttpClient}, simply do not register this
* interceptor.
*/
public static class RetryInterceptor implements Interceptor {
Comment thread
allanrogerr marked this conversation as resolved.
/** Maximum body bytes inspected when probing for an S3 {@code <Code>} value. */
Comment thread
allanrogerr marked this conversation as resolved.
Outdated
private static final long MAX_PEEK_BYTES = 5L * 1024L * 1024L;

private static final Pattern S3_ERROR_CODE_PATTERN = Pattern.compile("<Code>([^<]+)</Code>");

private final IntSupplier maxAttemptsSupplier;

/** Creates a retry interceptor that uses {@link Retry#MAX_RETRY} attempts. */
public RetryInterceptor() {
this(() -> Retry.MAX_RETRY);
}

/**
* Creates a retry interceptor that reads its attempt budget from the supplier on every
* intercept call. Supplier values less than 1 are clamped to 1 (single attempt = retry off).
*/
public RetryInterceptor(IntSupplier maxAttemptsSupplier) {
this.maxAttemptsSupplier = Objects.requireNonNull(maxAttemptsSupplier, "maxAttemptsSupplier");
}

@Override
public okhttp3.Response intercept(Chain chain) throws IOException {
okhttp3.Request request = chain.request();
int maxAttempts = Math.max(1, maxAttemptsSupplier.getAsInt());

okhttp3.Response response = null;
IOException lastException = null;

for (int attempt = 0; attempt < maxAttempts; attempt++) {
// Honour caller cancellation: mirrors minio-go's
// `errors.Is(err, context.Canceled)` short-circuit so the loop does not
// burn attempts re-issuing a call the caller has already abandoned.
if (chain.call().isCanceled()) {
if (response != null) response.close();
throw new IOException("Canceled");
}

if (attempt > 0) {
long delayMs = Retry.exponentialBackoffMs(attempt - 1);
if (delayMs > 0L) {
try {
Thread.sleep(delayMs);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new IOException("Retry interrupted", ie);
}
}
}

if (response != null) {
response.close();
response = null;
}

try {
response = chain.proceed(request);
} catch (IOException e) {
// A cancelled call surfaces as IOException with a message that would
// otherwise pass `isRequestErrorRetryable`. Bail out instead.
if (chain.call().isCanceled()) throw e;
if (!Retry.isRequestErrorRetryable(e)) throw e;
lastException = e;
continue;
}

if (Retry.isHttpStatusRetryable(response.code())) {
lastException = null;
continue;
}

if (!response.isSuccessful()) {
String s3Code = peekS3ErrorCode(response);
if (Retry.isS3CodeRetryable(s3Code)) {
lastException = null;
continue;
}
}

return response;
}

if (lastException != null) throw lastException;
return response;
}

/** Returns the S3 {@code <Code>} value if the body is XML containing one, else null. */
private static String peekS3ErrorCode(okhttp3.Response response) {
try {
String body = response.peekBody(MAX_PEEK_BYTES).string();
if (body.isEmpty() || !body.contains("<Error")) return null;
Matcher m = S3_ERROR_CODE_PATTERN.matcher(body);
return m.find() ? m.group(1) : null;
} catch (IOException e) {
return null;
}
}
}

/** HTTP body of {@link RandomAccessFile}, {@link ByteBuffer} or {@link byte} array. */
public static class Body {
private okhttp3.RequestBody requestBody;
Expand All @@ -695,7 +852,14 @@ public static class Body {
private String md5Hash;
private boolean bodyString;

/** Creates Body for okhttp3 RequestBody. */
/**
* Creates Body for okhttp3 RequestBody.
*
* <p><b>Retry caveat.</b> {@link Http.RetryInterceptor} re-invokes {@code writeTo} on each
* retry. Pass only replayable bodies; do not pass a body that overrides {@code isOneShot()} to
* return {@code true} (or otherwise consumes its source on first write) unless retries are
* disabled for that call.
*/
public Body(okhttp3.RequestBody requestBody) {
this.requestBody = requestBody;
this.contentType = requestBody.contentType();
Expand Down
Loading
Loading