From 7451e32d4da981046d4b06957df0589d3a048c59 Mon Sep 17 00:00:00 2001 From: david-pivonka Date: Wed, 22 Apr 2026 08:13:29 +0200 Subject: [PATCH 1/2] fix: attach per-part checksum header on UploadPart for Object Lock buckets S3 Object Lock buckets reject UploadPart requests that carry the checksum only as a trailer (the default behaviour of s3manager.Uploader) with InvalidRequest "Content-MD5 OR x-amz-checksum-* HTTP header is required for Put Part requests with Object Lock parameters". When CheckSumAlgorithm is set to CRC32 and the file is large enough to require multipart, switch to a manual CreateMultipartUpload / UploadPart / CompleteMultipartUpload loop that pre-computes CRC32 per part and sends it as the x-amz-checksum-crc32 request header via UploadPartInput.ChecksumCRC32. Extends #829 (which fixed the PutObject / CopyObject paths) to the UploadPart path. --- pkg/storage/s3.go | 99 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 4 deletions(-) diff --git a/pkg/storage/s3.go b/pkg/storage/s3.go index 20381238..db8af32f 100644 --- a/pkg/storage/s3.go +++ b/pkg/storage/s3.go @@ -1,9 +1,12 @@ package storage import ( + "bytes" "context" "crypto/tls" + "encoding/base64" "fmt" + "hash/crc32" "io" "net/http" "os" @@ -339,9 +342,6 @@ func (s *S3) PutFileAbsolute(ctx context.Context, key string, r io.ReadCloser, l if s.Config.SSEKMSEncryptionContext != "" { params.SSEKMSEncryptionContext = aws.String(s.Config.SSEKMSEncryptionContext) } - uploader := s3manager.NewUploader(s.client) - uploader.Concurrency = s.Concurrency - uploader.BufferProvider = s3manager.NewBufferedReadSeekerWriteToPool(s.BufferSize) var partSize int64 if s.Config.ChunkSize > 0 && (localSize+s.Config.ChunkSize-1)/s.Config.ChunkSize < s.Config.MaxPartsCount { partSize = s.Config.ChunkSize @@ -351,7 +351,17 @@ func (s *S3) PutFileAbsolute(ctx context.Context, key string, r io.ReadCloser, l partSize += max(1, (localSize%s.Config.MaxPartsCount)/s.Config.MaxPartsCount) } } - uploader.PartSize = AdjustValueByRange(partSize, 5*1024*1024, 5*1024*1024*1024) + partSize = AdjustValueByRange(partSize, 5*1024*1024, 5*1024*1024*1024) + + // s3manager.Uploader sends the part checksum as a trailer, which S3 Object Lock rejects on UploadPart. Fall back to manual multipart with per-part x-amz-checksum-crc32 header, fix https://github.com/Altinity/clickhouse-backup/issues/829 + if s.Config.CheckSumAlgorithm == string(s3types.ChecksumAlgorithmCrc32) && localSize > partSize { + return s.putFileMultipartCRC32(ctx, ¶ms, r, localSize, partSize) + } + + uploader := s3manager.NewUploader(s.client) + uploader.Concurrency = s.Concurrency + uploader.BufferProvider = s3manager.NewBufferedReadSeekerWriteToPool(s.BufferSize) + uploader.PartSize = partSize if _, err := uploader.Upload(ctx, ¶ms); err != nil { return errors.WithMessage(err, "S3 PutFileAbsolute Upload") @@ -359,6 +369,87 @@ func (s *S3) PutFileAbsolute(ctx context.Context, key string, r io.ReadCloser, l return nil } +func (s *S3) putFileMultipartCRC32(ctx context.Context, putParams *s3.PutObjectInput, r io.Reader, localSize, partSize int64) error { + createParams := &s3.CreateMultipartUploadInput{ + Bucket: putParams.Bucket, + Key: putParams.Key, + StorageClass: putParams.StorageClass, + ACL: putParams.ACL, + Tagging: putParams.Tagging, + } + s.enrichCreateMultipartUploadParams(createParams) + + initResp, err := s.client.CreateMultipartUpload(ctx, createParams) + if err != nil { + return errors.WithMessage(err, "S3 putFileMultipartCRC32 CreateMultipartUpload") + } + uploadID := initResp.UploadId + + abort := func(cause error) error { + abortParams := &s3.AbortMultipartUploadInput{ + Bucket: putParams.Bucket, + Key: putParams.Key, + UploadId: uploadID, + } + if s.Config.RequestPayer != "" { + abortParams.RequestPayer = s3types.RequestPayer(s.Config.RequestPayer) + } + if _, abortErr := s.client.AbortMultipartUpload(context.Background(), abortParams); abortErr != nil { + return errors.Wrapf(cause, "aborting putFileMultipartCRC32 multipart upload: %v, original error was", abortErr) + } + return cause + } + + buf := make([]byte, partSize) + parts := make([]s3types.CompletedPart, 0, (localSize+partSize-1)/partSize) + var partNumber int32 = 1 + remaining := localSize + for remaining > 0 { + toRead := partSize + if remaining < toRead { + toRead = remaining + } + if _, readErr := io.ReadFull(r, buf[:toRead]); readErr != nil { + return abort(errors.Wrapf(readErr, "S3 putFileMultipartCRC32 read part=%d", partNumber)) + } + h := crc32.NewIEEE() + h.Write(buf[:toRead]) + uploadParams := &s3.UploadPartInput{ + Bucket: putParams.Bucket, + Key: putParams.Key, + UploadId: uploadID, + PartNumber: aws.Int32(partNumber), + Body: bytes.NewReader(buf[:toRead]), + ChecksumAlgorithm: s3types.ChecksumAlgorithmCrc32, + ChecksumCRC32: aws.String(base64.StdEncoding.EncodeToString(h.Sum(nil))), + } + if s.Config.RequestPayer != "" { + uploadParams.RequestPayer = s3types.RequestPayer(s.Config.RequestPayer) + } + partResp, uploadErr := s.client.UploadPart(ctx, uploadParams) + if uploadErr != nil { + return abort(errors.Wrapf(uploadErr, "S3 putFileMultipartCRC32 UploadPart part=%d", partNumber)) + } + parts = append(parts, s3types.CompletedPart{ + ETag: partResp.ETag, + PartNumber: aws.Int32(partNumber), + ChecksumCRC32: partResp.ChecksumCRC32, + }) + partNumber++ + remaining -= toRead + } + + if _, completeErr := s.client.CompleteMultipartUpload(ctx, &s3.CompleteMultipartUploadInput{ + Bucket: putParams.Bucket, + Key: putParams.Key, + UploadId: uploadID, + MultipartUpload: &s3types.CompletedMultipartUpload{Parts: parts}, + }); completeErr != nil { + return abort(errors.WithMessage(completeErr, "S3 putFileMultipartCRC32 CompleteMultipartUpload")) + } + return nil +} + func (s *S3) deleteKey(ctx context.Context, key string) error { params := &s3.DeleteObjectInput{ Bucket: aws.String(s.Config.Bucket), From 8b677559c3b0c68a2b7eabc921355132246b83a0 Mon Sep 17 00:00:00 2001 From: slach Date: Sat, 25 Apr 2026 23:44:04 +0300 Subject: [PATCH 2/2] add GEMINI.md to .gitignore Signed-off-by: slach --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 77240169..506b6dc3 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ vendor/ .aider* AGENTS.md CLAUDE.md +GEMINI.md .qwen/ .claude/ .crush/