Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions echopype/tests/consolidate/test_add_depth.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def test_ek_depth_utils_group_variable_NaNs_logger_warnings(caplog, ek80_path):
ds_Sv = ep.calibrate.compute_Sv(ed, waveform_mode="CW", encode_mode="power")

# Set first index of group variables to NaN
ed["Platform"]["water_level"].values = np.nan # Is a scalar
ed["Platform"]["water_level"][...] = np.nan # Is a scalar
ed["Platform"]["vertical_offset"].values[0] = np.nan
ed["Platform"]["transducer_offset_z"].values[0] = np.nan
ed["Platform"]["pitch"].values[0] = np.nan
Expand Down Expand Up @@ -443,7 +443,8 @@ def test_add_depth_EK_with_platform_vertical_offsets(relpath, sonar_model, compu
ds_Sv_with_depth["depth"].data,
(ds_Sv["echo_range"] + transducer_depth).data,
rtol=1e-10,
atol=1e-10
atol=1e-10,
equal_nan=True
)


Expand Down
8 changes: 2 additions & 6 deletions echopype/tests/consolidate/test_add_location.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,16 +328,12 @@ def test_add_location_lat_lon_missing_all_NaN_errors(
if error_type == "missing":
ed["Platform"] = ed["Platform"].drop_vars(f"longitude_{datagram_type.lower()}")
elif error_type == "all_nan":
ed["Platform"][f"latitude_{datagram_type.lower()}"].data = (
[np.nan] * len(ed["Platform"][f"latitude_{datagram_type.lower()}"])
)
ed["Platform"][f"latitude_{datagram_type.lower()}"].data[:] = np.nan
else:
if error_type == "missing":
ed["Platform"] = ed["Platform"].drop_vars("longitude")
if error_type == "all_nan":
ed["Platform"]["latitude"].data = (
[np.nan] * len(ed["Platform"]["latitude"])
)
ed["Platform"]["latitude"].data[:] = np.nan

# Check if the expected error is logged
with pytest.raises(ValueError) as exc_info:
Expand Down
2 changes: 1 addition & 1 deletion echopype/tests/echodata/test_echodata.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def test_group_paths(self, converted_zarr):
def test_nbytes(self, converted_zarr):
ed = self.create_ed(converted_zarr)
assert isinstance(ed.nbytes, float)
assert ed.nbytes == 4690060.0
assert ed.nbytes == 4687964.0

def test_repr(self, converted_zarr):
zarr_path_string = str(converted_zarr.absolute())
Expand Down
24 changes: 15 additions & 9 deletions echopype/utils/coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"zarr": {
"float": {"compressors": [BloscCodec(cname="zstd", clevel=3, shuffle="bitshuffle")]},
"int": {"compressors": [BloscCodec(cname="lz4", clevel=5, shuffle="shuffle", blocksize=0)]},
"string": {
"object": {
"compressors": [BloscCodec(cname="lz4", clevel=5, shuffle="shuffle", blocksize=0)]
},
"time": {
Expand All @@ -43,9 +43,9 @@


EXPECTED_VAR_DTYPE = {
"channel": np.str_,
"cal_channel_id": np.str_,
"beam": np.str_,
"channel": np.object_,
"cal_channel_id": np.object_,
"beam": np.object_,
"channel_mode": np.float32,
"beam_stabilisation": np.byte,
"non_quantitative_processing": np.int16,
Expand All @@ -63,8 +63,8 @@ def sanitize_dtypes(ds: xr.Dataset) -> xr.Dataset:
if name in EXPECTED_VAR_DTYPE:
expected_dtype = EXPECTED_VAR_DTYPE[name]
elif np.issubdtype(var.dtype, np.object_):
# Defaulting to strings dtype for object data types
expected_dtype = np.str_
# Defaulting to variable-length UTF-8 string (object) for object data types
expected_dtype = np.object_
else:
# For everything else, this should be the same
expected_dtype = var.dtype
Expand Down Expand Up @@ -119,7 +119,13 @@ def _get_dask_auto_chunk(
tuple
The chunks
"""
# Create a tuple filled with "auto" for each dimension in the variable's shape.
# Create a tuple filled with "auto" for each dimension in the variable's shape
# For object dtype (e.g., variable-length strings), Dask cannot auto-chunk
if np.issubdtype(variable.dtype, np.object_):
# Return a single chunk for each dimension (i.e., unchunked)
return {dim: size for dim, size in variable.sizes.items()}

# Otherwise, use Dask's auto_chunks for numeric/fixed-size types
auto_tuple = tuple("auto" for _ in variable.shape)

# Generate a tuple of chunk sizes using the dask 'auto_chunks' function.
Expand Down Expand Up @@ -162,8 +168,8 @@ def get_zarr_compression(var: xr.Variable, compression_settings: dict) -> dict:
return compression_settings["float"]
elif np.issubdtype(var.dtype, np.integer):
return compression_settings["int"]
elif np.issubdtype(var.dtype, np.str_):
return compression_settings["string"]
elif np.issubdtype(var.dtype, np.str_) or np.issubdtype(var.dtype, object):
return compression_settings["object"]
elif np.issubdtype(var.dtype, np.datetime64):
return compression_settings["time"]
else:
Expand Down
Loading