zarr-developers · mkitti · Jun 18, 2026 · Jun 19, 2026 · Jun 19, 2026 · Jun 19, 2026
diff --git a/.github/codecov.yml b/.github/codecov.yml
@@ -7,6 +7,8 @@ coverage:
     patch:
       default:
         target: 100
+  ignore:
+    - "numcodecs/tests/**"
 comment:
   layout: "diff, files"
   behavior: default

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -13,8 +13,8 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ["3.11", "3.12", "3.13"]
-        # macos-13 is an intel runner, macos-14 is a arm64 runner
-        platform: [ubuntu-latest, windows-latest, macos-13, macos-14]
+        # macos-15-large is an intel runner, macos-14 is a arm64 runner
+        platform: [ubuntu-latest, windows-latest, macos-15-large, macos-14]
 
     defaults:
       run:
@@ -38,7 +38,7 @@ jobs:
         run: conda install -y c-compiler cxx-compiler
 
       - name: Install clang
-        if: matrix.platform == 'macos-13'
+        if: matrix.platform == 'macos-15-large'
         run: conda install -y 'clang>=12.0.1,<17'
 
       - name: Show conda environment info
@@ -52,12 +52,29 @@ jobs:
           export DISABLE_NUMCODECS_AVX2=""
           python -m pip install -v -e .[test,test_extras,msgpack,crc32c,pcodec,zfpy]
 
+      - name: Deduplicate LC_RPATH entries
+        # The conda-forge compilers inject -Wl,-rpath,$PREFIX/lib multiple
+        # times, producing duplicate LC_RPATH load commands. macOS 15's dyld
+        # rejects duplicate LC_RPATH at load time, so strip the extras.
+        if: runner.os == 'macOS'
+        run: |
+          rpaths() { otool -l "$1" | awk '/ LC_RPATH$/{getline; getline; print $2}'; }
+          for so in numcodecs/*.so; do
+            for rp in $(rpaths "$so" | sort | uniq -d); do
+              while [ "$(rpaths "$so" | grep -cx "$rp")" -gt 1 ]; do
+                echo "Removing duplicate LC_RPATH '$rp' from $so"
+                install_name_tool -delete_rpath "$rp" "$so"
+              done
+            done
+          done
+
       - name: Install zarr-python
         # Since zarr v3 requires numpy >= 1.25, on Python 3.11 leave it out
         # so we can have some tests of our minimum version of numpy (1.24)
         if: matrix.python-version != '3.11'
-        # TODO: remove --pre option when zarr v3 is out
-        run: python -m pip install --pre zarr>=3.0.0b2
+        # numcodecs.zarr3 in this branch targets the zarr 3.0.x API; zarr 3.1+
+        # moved these codecs into zarr itself and changed internal APIs.
+        run: python -m pip install "zarr>=3.0,<3.1"
 
       - name: List installed packages
         run: python -m pip list

diff --git a/.github/workflows/wheel.yaml b/.github/workflows/wheel.yaml
@@ -13,8 +13,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        # macos-13 is an intel runner, macos-14 is a arm64 runner
-        os: [ubuntu-latest, windows-latest, macos-13, macos-14]
+        # macos-15-large is an intel runner, macos-14 is an arm64 runner
+        os: [ubuntu-latest, windows-latest, macos-15-large, macos-14]
     env:
       CIBW_TEST_COMMAND: python -c "import numcodecs"
       CIBW_BUILD: "cp311-* cp312-* cp313-*"

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -30,4 +30,4 @@ repos:
       hooks:
       -   id: mypy
           args: [--config-file, pyproject.toml]
-          additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3.0.0rc1']
+          additional_dependencies: [numpy, pytest, crc32c, zfpy, 'zarr>=3.0.0rc1,<3.1']
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -4,12 +4,12 @@ submodules:
   include: all
 
 build:
-  os: ubuntu-20.04
+  os: ubuntu-24.04
   tools:
     python: "3.12"
   jobs:
     post_install:
-      - python -m pip install --pre 'zarr>=3.0.0b2'
+      - python -m pip install --pre 'zarr~=3.0.0'
 
 sphinx:
   configuration: docs/conf.py

diff --git a/docs/release.rst b/docs/release.rst
@@ -21,6 +21,12 @@ Improvements
 ~~~~~~~~~~~~
 * Raise a custom `UnknownCodecError` when trying to retrieve an unavailable codec.
   By :user:`Cas Wognum <cwognum>`.
+* Add streaming decompression for Zstandard when the frame content size is unknown.
+  By :user:`Mark Kittisopikul <mkitti>`, :issue:`707`
+* Fix Zstd decompression negative size issue on 32-bit platforms.
+  By :user:`Mark Kittisopikul <mkitti>`, :issue:`782`
+* Allow Zstandard to decompress multiple concatenated frames.
+  By :user:`Mark Kittisopikul <mkitti>`, :issue:`757`
 
 Fixes
 ~~~~~

diff --git a/numcodecs/tests/test_checksum32.py b/numcodecs/tests/test_checksum32.py
@@ -54,12 +54,12 @@
     )
 
 
-@pytest.mark.parametrize(("codec", "arr"), itertools.product(codecs, arrays))
+@pytest.mark.parametrize(("codec", "arr"), tuple(itertools.product(codecs, arrays)))
 def test_encode_decode(codec, arr):
     check_encode_decode(arr, codec)
 
 
-@pytest.mark.parametrize(("codec", "arr"), itertools.product(codecs, arrays))
+@pytest.mark.parametrize(("codec", "arr"), tuple(itertools.product(codecs, arrays)))
 def test_errors(codec, arr):
     enc = codec.encode(arr)
     with pytest.raises(RuntimeError):

diff --git a/numcodecs/tests/test_pyzstd.py b/numcodecs/tests/test_pyzstd.py
@@ -0,0 +1,76 @@
+# Check Zstd against pyzstd package
+
+import numpy as np
+import pytest
+import pyzstd
+
+from numcodecs.zstd import Zstd
+
+test_data = [
+    b"Hello World!",
+    np.arange(113).tobytes(),
+    np.arange(10, 15).tobytes(),
+    np.random.randint(3, 50, size=(53,), dtype=np.uint16).tobytes(),
+]
+
+
+@pytest.mark.parametrize("input", test_data)
+def test_pyzstd_simple(input):
+    """
+    Test if Zstd.[decode, encode] can perform the inverse operation to
+    pyzstd.[compress, decompress] in the simple case.
+    """
+    z = Zstd()
+    assert z.decode(pyzstd.compress(input)) == input
+    assert pyzstd.decompress(z.encode(input)) == input
+
+
+@pytest.mark.parametrize("input", test_data)
+def test_pyzstd_simple_multiple_frames_decode(input):
+    """
+    Test decompression of two concatenated frames of known sizes
+
+    numcodecs.zstd.Zstd currently fails because it only assesses the size of the
+    first frame. Rather, it should keep iterating through all the frames until
+    the end of the input buffer.
+    """
+    z = Zstd()
+    assert pyzstd.decompress(pyzstd.compress(input) * 2) == input * 2
+    assert z.decode(pyzstd.compress(input) * 2) == input * 2
+
+
+@pytest.mark.parametrize("input", test_data)
+def test_pyzstd_simple_multiple_frames_encode(input):
+    """
+    Test if pyzstd can decompress two concatenated frames from Zstd.encode
+    """
+    z = Zstd()
+    assert pyzstd.decompress(z.encode(input) * 2) == input * 2
+
+
+@pytest.mark.parametrize("input", test_data)
+def test_pyzstd_streaming(input):
+    """
+    Test if Zstd can decode a single frame and concatenated frames in streaming
+    mode where the decompressed size is not recorded in the frame header.
+    """
+    pyzstd_c = pyzstd.ZstdCompressor()
+    pyzstd_d = pyzstd.ZstdDecompressor()
+    pyzstd_e = pyzstd.EndlessZstdDecompressor()
+    z = Zstd()
+
+    d_bytes = input
+    pyzstd_c.compress(d_bytes)
+    c_bytes = pyzstd_c.flush()
+    assert z.decode(c_bytes) == d_bytes
+    assert pyzstd_d.decompress(z.encode(d_bytes)) == d_bytes
+
+    # Test multiple streaming frames
+    assert z.decode(c_bytes * 2) == pyzstd_e.decompress(c_bytes * 2)
+    assert z.decode(c_bytes * 3) == pyzstd_e.decompress(c_bytes * 3)
+    assert z.decode(c_bytes * 4) == pyzstd_e.decompress(c_bytes * 4)
+    assert z.decode(c_bytes * 5) == pyzstd_e.decompress(c_bytes * 5)
+    assert z.decode(c_bytes * 7) == pyzstd_e.decompress(c_bytes * 7)
+    assert z.decode(c_bytes * 11) == pyzstd_e.decompress(c_bytes * 11)
+    assert z.decode(c_bytes * 13) == pyzstd_e.decompress(c_bytes * 13)
+    assert z.decode(c_bytes * 99) == pyzstd_e.decompress(c_bytes * 99)
diff --git a/numcodecs/tests/test_zarr3.py b/numcodecs/tests/test_zarr3.py
@@ -92,12 +92,14 @@ def test_generic_compressor(
         (numcodecs.zarr3.FixedScaleOffset, {"offset": 0, "scale": 25.5}),
         (numcodecs.zarr3.FixedScaleOffset, {"offset": 0, "scale": 51, "astype": "uint16"}),
         (numcodecs.zarr3.AsType, {"encode_dtype": "float32", "decode_dtype": "float32"}),
+        (numcodecs.zarr3.AsType, {"encode_dtype": "float32"}),
     ],
     ids=[
         "delta",
         "fixedscaleoffset",
         "fixedscaleoffset2",
         "astype",
+        "astype_no_decode_dtype",
     ],
 )
 def test_generic_filter(

diff --git a/numcodecs/tests/test_zstd.py b/numcodecs/tests/test_zstd.py
@@ -1,4 +1,5 @@
 import itertools
+import subprocess
 
 import numpy as np
 import pytest
@@ -90,3 +91,104 @@ def test_native_functions():
     assert Zstd.default_level() == 3
     assert Zstd.min_level() == -131072
     assert Zstd.max_level() == 22
+
+
+def test_streaming_decompression():
+    # Test input frames with unknown frame content size
+    codec = Zstd()
+
+    # If the zstd command line interface is available, check the bytes
+    cli = zstd_cli_available()
+    if cli:
+        view_zstd_streaming_bytes()
+
+    # Encode bytes directly that were the result of streaming compression
+    bytes_val = b'(\xb5/\xfd\x00Xa\x00\x00Hello World!'
+    dec = codec.decode(bytes_val)
+    dec_expected = b'Hello World!'
+    assert dec == dec_expected
+    if cli:
+        assert bytes_val == generate_zstd_streaming_bytes(dec_expected)
+        assert dec_expected == generate_zstd_streaming_bytes(bytes_val, decompress=True)
+
+    # Two consecutive frames given as input
+    bytes2 = bytes(bytearray(bytes_val * 2))
+    dec2 = codec.decode(bytes2)
+    dec2_expected = b'Hello World!Hello World!'
+    assert dec2 == dec2_expected
+    if cli:
+        assert dec2_expected == generate_zstd_streaming_bytes(bytes2, decompress=True)
+
+    # Single long frame that decompresses to a large output
+    bytes3 = b'(\xb5/\xfd\x00X$\x02\x00\xa4\x03ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz\x01\x00:\xfc\xdfs\x05\x05L\x00\x00\x08s\x01\x00\xfc\xff9\x10\x02L\x00\x00\x08k\x01\x00\xfc\xff9\x10\x02L\x00\x00\x08c\x01\x00\xfc\xff9\x10\x02L\x00\x00\x08[\x01\x00\xfc\xff9\x10\x02L\x00\x00\x08S\x01\x00\xfc\xff9\x10\x02L\x00\x00\x08K\x01\x00\xfc\xff9\x10\x02L\x00\x00\x08C\x01\x00\xfc\xff9\x10\x02L\x00\x00\x08u\x01\x00\xfc\xff9\x10\x02L\x00\x00\x08m\x01\x00\xfc\xff9\x10\x02L\x00\x00\x08e\x01\x00\xfc\xff9\x10\x02L\x00\x00\x08]\x01\x00\xfc\xff9\x10\x02L\x00\x00\x08U\x01\x00\xfc\xff9\x10\x02L\x00\x00\x08M\x01\x00\xfc\xff9\x10\x02M\x00\x00\x08E\x01\x00\xfc\x7f\x1d\x08\x01'
+    dec3 = codec.decode(bytes3)
+    dec3_expected = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz' * 1024 * 32
+    assert dec3 == dec3_expected
+    if cli:
+        assert bytes3 == generate_zstd_streaming_bytes(dec3_expected)
+        assert dec3_expected == generate_zstd_streaming_bytes(bytes3, decompress=True)
+
+    # Garbage input results in an error
+    bytes4 = bytes(bytearray([0, 0, 0, 0, 0, 0, 0, 0]))
+    with pytest.raises(RuntimeError, match='Zstd decompression error: invalid input data'):
+        codec.decode(bytes4)
+
+
+def test_multi_frame():
+    codec = Zstd()
+
+    hello_world = codec.encode(b"Hello world!")
+    assert codec.decode(hello_world) == b"Hello world!"
+    assert codec.decode(hello_world * 2) == b"Hello world!Hello world!"
+
+    hola = codec.encode(b"Hola ")
+    mundo = codec.encode(b"Mundo!")
+    assert codec.decode(hola) == b"Hola "
+    assert codec.decode(mundo) == b"Mundo!"
+    assert codec.decode(hola + mundo) == b"Hola Mundo!"
+
+    bytes_val = b'(\xb5/\xfd\x00Xa\x00\x00Hello World!'
+    dec = codec.decode(bytes_val)
+    dec_expected = b'Hello World!'
+    assert dec == dec_expected
+    cli = zstd_cli_available()
+    if cli:
+        assert bytes_val == generate_zstd_streaming_bytes(dec_expected)
+        assert dec_expected == generate_zstd_streaming_bytes(bytes_val, decompress=True)
+
+    # Concatenate frames of known sizes and unknown sizes
+    # unknown size frame at the end
+    assert codec.decode(hola + mundo + bytes_val) == b"Hola Mundo!Hello World!"
+    # unknown size frame at the beginning
+    assert codec.decode(bytes_val + hola + mundo) == b"Hello World!Hola Mundo!"
+    # unknown size frame in the middle
+    assert codec.decode(hola + bytes_val + mundo) == b"Hola Hello World!Mundo!"
+
+
+def generate_zstd_streaming_bytes(input: bytes, *, decompress: bool = False) -> bytes:
+    """
+    Use the zstd command line interface to compress or decompress bytes in streaming mode.
+    """
+    if decompress:
+        args = ["-d"]
+    else:
+        args = []
+
+    p = subprocess.run(["zstd", "--no-check", *args], input=input, capture_output=True)
+    return p.stdout
+
+
+def view_zstd_streaming_bytes():
+    bytes_val = generate_zstd_streaming_bytes(b"Hello world!")
+    print(f"    bytes_val = {bytes_val}")
+
+    bytes3 = generate_zstd_streaming_bytes(
+        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz" * 1024 * 32
+    )
+    print(f"    bytes3 = {bytes3}")
+
+
+def zstd_cli_available() -> bool:
+    return not subprocess.run(
+        ["zstd", "-V"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+    ).returncode
diff --git a/numcodecs/zarr3.py b/numcodecs/zarr3.py
@@ -286,7 +286,7 @@ def __init__(self, **codec_config: dict[str, JSON]) -> None:
 
     def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
         if astype := self.codec_config.get("astype"):
-            return replace(chunk_spec, dtype=np.dtype(astype))  # type: ignore[arg-type]
+            return replace(chunk_spec, dtype=np.dtype(astype))  # type: ignore[call-overload]
         return chunk_spec
 
 
@@ -304,7 +304,7 @@ def __init__(self, **codec_config: JSON) -> None:
 
     def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
         if astype := self.codec_config.get("astype"):
-            return replace(chunk_spec, dtype=np.dtype(astype))  # type: ignore[arg-type]
+            return replace(chunk_spec, dtype=np.dtype(astype))  # type: ignore[call-overload]
         return chunk_spec
 
     def evolve_from_array_spec(self, array_spec: ArraySpec) -> FixedScaleOffset: