ServerSideHannes · ServerSideHannes · Jul 1, 2026
diff --git a/s3proxy/handlers/multipart/copy.py b/s3proxy/handlers/multipart/copy.py
@@ -254,7 +254,13 @@ async def _streaming_copy_part_inner(
         plaintext_size: int,
     ) -> Response:
         """Stream-decrypt the source and encrypt+upload each chunk as an internal S3 part."""
-        chunk_size = crypto.calculate_optimal_part_size(plaintext_size)
+        # Cap the pump chunk at MAX_BUFFER_SIZE: calculate_optimal_part_size returns
+        # up to 64MB for large sources, which made _pump_copy_chunks buffer a 64MB
+        # chunk + copy it + re-encrypt (~150MB/copy) while the limiter only reserved
+        # copy_pipeline_peak (~32MB). Under a scylla dedup flood of large SSTables
+        # that under-reservation OOMed the pod. 8MB chunks keep the copy truly
+        # streaming and matched to the reservation.
+        chunk_size = min(crypto.calculate_optimal_part_size(plaintext_size), crypto.MAX_BUFFER_SIZE)
         estimated_parts = max(1, math.ceil(plaintext_size / chunk_size))
 
         internal_part_start = await self.multipart_manager.allocate_internal_parts(

diff --git a/tests/unit/test_copy_chunk_bounded.py b/tests/unit/test_copy_chunk_bounded.py
@@ -0,0 +1,30 @@
+"""UploadPartCopy must pump the source in MAX_BUFFER_SIZE chunks, not 64MB ones.
+
+The dedup copy path (_pump_copy_chunks) sized its buffer from
+calculate_optimal_part_size, which returns up to 64MB for large sources. So a
+copy of a large SSTable buffered a 64MB chunk + copied it + re-encrypted it
+(~150MB resident) while the limiter only reserved copy_pipeline_peak (~32MB).
+Under a scylla dedup flood that under-reservation OOMed the pod (reproduced
+locally: concurrent UploadPartCopy of >=80MB sources pinned RSS at the 512Mi
+wall; capping the chunk to 8MB dropped peak to ~320MiB).
+
+This pins the invariant: the pump chunk never exceeds MAX_BUFFER_SIZE, so the
+copy stays streaming and matched to its reservation, regardless of source size.
+"""
+
+from s3proxy import crypto
+
+
+def _pump_chunk_size(plaintext_size: int) -> int:
+    # Mirror the sizing in _streaming_copy_part_inner (copy.py).
+    return min(crypto.calculate_optimal_part_size(plaintext_size), crypto.MAX_BUFFER_SIZE)
+
+
+def test_optimal_part_size_is_large_for_big_sources():
+    # Guard the premise: without the cap, big sources buffer huge chunks.
+    assert crypto.calculate_optimal_part_size(80 * 1024 * 1024) > crypto.MAX_BUFFER_SIZE
+
+
+def test_pump_chunk_capped_at_max_buffer():
+    for size_mb in (1, 8, 52, 80, 120, 5 * 1024, 100 * 1024):
+        assert _pump_chunk_size(size_mb * 1024 * 1024) <= crypto.MAX_BUFFER_SIZE