Skip to content
Merged
131 changes: 130 additions & 1 deletion rust/sedona-raster/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use datafusion_common::cast::{
as_string_array, as_string_view_array, as_struct_array, as_uint32_array,
};

use crate::builder::RasterBuilder;
use crate::traits::{BandRef, Bands, NdBuffer, RasterRef};
use crate::view_entries::ViewEntry;
use sedona_schema::raster::{band_indices, raster_indices, BandDataType};
Expand Down Expand Up @@ -138,6 +139,18 @@ impl<'a> BandRef for BandRefImpl<'a> {
data_type: self.data_type,
})
}

/// Zero-copy override: share the source row's backing `Buffer` into the
/// builder (refcount bump) instead of copying the visible bytes. OutDb
/// bands have an empty data column by design.
fn append_data_into(&self, builder: &mut RasterBuilder) -> Result<(), ArrowError> {
if self.is_indb() {
builder.append_band_data_from(self.data_array, self.band_row)
} else {
builder.band_data_writer().append_value([]);
Ok(())
}
}
}

/// Arrow-backed implementation of RasterRef for a single raster row.
Expand Down Expand Up @@ -545,7 +558,7 @@ impl<'a> RasterStructArray<'a> {
mod tests {
use super::*;
use crate::builder::RasterBuilder;
use crate::traits::{BandMetadata, RasterMetadata};
use crate::traits::{BandMetadata, BandOverrides, RasterMetadata};
use arrow_array::{ArrayRef, ListArray, StructArray, UInt32Array};
use arrow_buffer::{OffsetBuffer, ScalarBuffer};
use arrow_schema::{DataType, Fields};
Expand All @@ -555,6 +568,122 @@ mod tests {
use sedona_testing::rasters::generate_test_rasters;
use std::sync::Arc;

#[test]
fn copy_into_shares_buffer_zero_copy_and_overrides() {
// 16-byte InDb band (> inline threshold, so block-backed and shareable).
let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0];
let mut ib = RasterBuilder::new(1);
ib.start_raster_nd(&transform, &["x"], &[16], None).unwrap();
ib.start_band_nd(
Some("orig"),
&["x"],
&[16],
BandDataType::UInt8,
None,
None,
None,
)
.unwrap();
ib.band_data_writer()
.append_value((0u8..16).collect::<Vec<u8>>());
ib.finish_band().unwrap();
ib.finish_raster().unwrap();
let input_array = ib.finish().unwrap();
let input_rasters = RasterStructArray::try_new(&input_array).unwrap();
let input_raster = input_rasters.get(0).unwrap();
let input_band = input_raster.band(0).unwrap();
let input_ptr = input_band.nd_buffer().unwrap().buffer.as_ptr();

// copy_into with a name override; everything else inherited.
let mut ob = RasterBuilder::new(1);
ob.start_raster_nd(&transform, &["x"], &[16], None).unwrap();
input_band
.copy_into(
&mut ob,
BandOverrides {
name: Some("derived"),
..Default::default()
},
)
.unwrap();
ob.finish_band().unwrap();
ob.finish_raster().unwrap();
let out_array = ob.finish().unwrap();
let out_rasters = RasterStructArray::try_new(&out_array).unwrap();
let out_raster = out_rasters.get(0).unwrap();
let out_band = out_raster.band(0).unwrap();

// Zero-copy: the derived band references the same backing bytes.
assert_eq!(
input_ptr,
out_band.nd_buffer().unwrap().buffer.as_ptr(),
"copy_into must share the source buffer, not copy it"
);
assert_eq!(
out_band.nd_buffer().unwrap().as_contiguous().unwrap(),
(0u8..16).collect::<Vec<u8>>().as_slice()
);
// Name overridden; dim names + data type inherited from the source.
assert_eq!(out_raster.band_name(0), Some("derived"));
assert_eq!(out_band.dim_names(), vec!["x"]);
assert_eq!(out_band.data_type(), BandDataType::UInt8);
}

#[test]
fn copy_into_with_identity_override_view_succeeds() {
// An explicit identity override composes back to the identity, so it is
// accepted and behaves exactly like the inherited (None) case — this
// exercises the new `BandOverrides::view` path end to end.
let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0];
let mut ib = RasterBuilder::new(1);
ib.start_raster_nd(&transform, &["x"], &[4], None).unwrap();
ib.start_band_nd(
Some("orig"),
&["x"],
&[4],
BandDataType::UInt8,
None,
None,
None,
)
.unwrap();
ib.band_data_writer().append_value(vec![1u8, 2, 3, 4]);
ib.finish_band().unwrap();
ib.finish_raster().unwrap();
let in_array = ib.finish().unwrap();
let in_rasters = RasterStructArray::try_new(&in_array).unwrap();
let in_raster = in_rasters.get(0).unwrap();
let in_band = in_raster.band(0).unwrap();

let identity = [ViewEntry {
source_axis: 0,
start: 0,
step: 1,
steps: 4,
}];
let mut ob = RasterBuilder::new(1);
ob.start_raster_nd(&transform, &["x"], &[4], None).unwrap();
in_band
.copy_into(
&mut ob,
BandOverrides {
view: Some(&identity),
..Default::default()
},
)
.unwrap();
ob.finish_band().unwrap();
ob.finish_raster().unwrap();
let out_array = ob.finish().unwrap();
let out_rasters = RasterStructArray::try_new(&out_array).unwrap();
let out_raster = out_rasters.get(0).unwrap();
let out_band = out_raster.band(0).unwrap();
assert_eq!(
out_band.nd_buffer().unwrap().as_contiguous().unwrap(),
&[1u8, 2, 3, 4]
);
}

#[test]
fn test_array_basic_functionality() {
// Create a simple raster for testing using the correct API
Expand Down
48 changes: 48 additions & 0 deletions rust/sedona-raster/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use std::sync::Arc;
use sedona_schema::raster::{BandDataType, RasterSchema};

use crate::traits::{BandMetadata, MetadataRef};
use crate::view_entries::{ViewEntries, ViewEntry};

/// Maximum byte length of an inline `BinaryViewArray` view. Views this short
/// store their bytes in the 16-byte view itself; longer views reference a data
Expand Down Expand Up @@ -387,6 +388,53 @@ impl RasterBuilder {
Ok(())
}

/// Like [`Self::start_band_nd`], but persists an explicit band `view`
/// (a window of offsets/steps over the source `shape`) instead of the
/// implicit identity.
///
/// **Today only the identity view is accepted**: a non-identity view
/// returns an error, because persisting one isn't wired through the band
/// reader or the `RS_EnsureLoaded` round-trip yet (tracked in
/// <https://github.com/apache/sedona-db/issues/897>). An identity `view` is
/// stored as the canonical null sentinel, exactly as `start_band_nd` does,
/// so this is a drop-in for callers that want to forward a (currently
/// always identity) view. When view persistence lands, only this method
/// changes — callers routing through it (e.g. `BandRef::copy_into`) are
/// unaffected.
#[allow(clippy::too_many_arguments)]
pub fn start_band_nd_with_view(
&mut self,
name: Option<&str>,
dim_names: &[&str],
shape: &[i64],
data_type: BandDataType,
nodata: Option<&[u8]>,
outdb_uri: Option<&str>,
outdb_format: Option<&str>,
view: &[ViewEntry],
) -> Result<(), ArrowError> {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clippy is right about this one, and it's a public API and probably worth dealing with before it is used. A common way to handle this is to have a struct NewBandArgs<'a> { ... }, which forces code to name arguments.

// Reject up front — before any column appends — so a rejected view can
// never leave the builder in a half-written state.
if !ViewEntries::new(view.to_vec()).is_identity(shape) {
return Err(ArrowError::InvalidArgumentError(
"start_band_nd_with_view: persisting a non-identity band view is \
not yet supported (see \
https://github.com/apache/sedona-db/issues/897); materialize the \
band (e.g. via RS_EnsureContiguous) first"
.into(),
));
}
self.start_band_nd(
name,
dim_names,
shape,
data_type,
nodata,
outdb_uri,
outdb_format,
)
}

/// Convenience: start a 2D band with `dim_names=["y","x"]` and `shape=[height, width]`.
///
/// Must be called after `start_raster_2d` / `start_raster_2d` which sets
Expand Down
132 changes: 131 additions & 1 deletion rust/sedona-raster/src/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
use arrow_schema::ArrowError;
use sedona_schema::raster::BandDataType;

use crate::view_entries::ViewEntry;
use crate::builder::RasterBuilder;
use crate::view_entries::{ViewEntries, ViewEntry};

/// Recognized spatial dimension-name pairs, in band C-order: the slower-
/// varying Y-like (row) axis first, the faster-varying X-like (column) axis
Expand Down Expand Up @@ -496,6 +497,30 @@ pub trait RasterRef {
}
}

/// Field overrides for [`BandRef::copy_into`]. Each field defaults to `None`,
/// meaning "inherit from the source band". `name` has no source on a `BandRef`
/// (band names live at the raster level), so it defaults to unnamed.
#[derive(Default)]
pub struct BandOverrides<'a> {
/// Name for the derived band (the source has none to inherit).
pub name: Option<&'a str>,
/// Override the dimension names; `None` inherits the source's.
pub dim_names: Option<&'a [&'a str]>,
/// Override the nodata value; `None` inherits the source's.
pub nodata: Option<&'a [u8]>,
/// Override the OutDb URI; `None` inherits the source's.
pub outdb_uri: Option<&'a str>,
/// Override the OutDb format; `None` inherits the source's.
pub outdb_format: Option<&'a str>,
/// View to apply to the derived band, expressed in the **source's visible
/// coordinates**. [`BandRef::copy_into`] composes it onto the source's own
/// view for you — you don't manage that composition and don't need to know
/// whether the source already carries a view. `None` inherits the source's
/// view unchanged. (A non-identity result isn't persistable yet; see
/// [`RasterBuilder::start_band_nd_with_view`].)
pub view: Option<&'a [ViewEntry]>,
}

/// Trait for accessing a single band/variable within an N-D raster.
///
/// This is the consumer interface. Implementations handle storage details
Expand Down Expand Up @@ -667,6 +692,77 @@ pub trait BandRef {
};
nodata_bytes_to_f64_lossless(bytes, &self.data_type()).map(Some)
}

/// Write a derived band into `builder`, inheriting every field not set in
/// `overrides` from `self`, and carrying the source bytes over.
///
/// This is the canonical "derive a band from an existing one" path — it
/// replaces hand-rebuilding via `start_band_nd` + a manual data append. The
/// data transfer is zero-copy when the implementation supports it (see
/// [`Self::append_data_into`]).
///
/// The derived band's view is the source's own view with any
/// `overrides.view` **composed on top for you**: express an override in the
/// source's *visible* coordinates and `copy_into` composes it against the
/// source's view — callers never manage that composition and don't need to
/// know whether the source already carries one. `overrides.view = None`
/// inherits the source's view unchanged.
///
/// A non-identity effective view can't be persisted yet, so it errors (the
/// gate lives in [`RasterBuilder::start_band_nd_with_view`]); in practice
/// today the source is identity-viewed and any override must compose back
/// to the identity. When view persistence lands
/// (<https://github.com/apache/sedona-db/issues/897>) this method is
/// unchanged — it already carries the view.
fn copy_into(
&self,
builder: &mut RasterBuilder,
overrides: BandOverrides<'_>,
) -> Result<(), ArrowError> {
let inherited_dims = self.dim_names();
let dim_names: Vec<&str> = match overrides.dim_names {
Some(d) => d.to_vec(),
None => inherited_dims,
};
let shape = self.raw_source_shape().to_vec();
// Compose the caller's override (if any) onto the source's own view, so
// the override is interpreted in the source's visible space and the
// caller doesn't have to. `None` keeps the source view unchanged.
let source_view = ViewEntries::new(self.view().to_vec());
let effective_view = match overrides.view {
Some(v) => source_view.compose(&ViewEntries::new(v.to_vec()))?,
None => source_view,
};
builder.start_band_nd_with_view(
overrides.name,
&dim_names,
&shape,
self.data_type(),
overrides.nodata.or_else(|| self.nodata()),
overrides.outdb_uri.or_else(|| self.outdb_uri()),
overrides.outdb_format.or_else(|| self.outdb_format()),
effective_view.as_slice(),
)?;
self.append_data_into(builder)
}

/// Append `self`'s band data as the current band's single `data` value.
///
/// The default copies the visible source bytes via `append_value`. Arrow-
/// backed implementations override this to share the source row's backing
/// `Buffer` zero-copy (a refcount bump via
/// [`RasterBuilder::append_band_data_from`]), keeping the buffer plumbing
/// encapsulated rather than exposing a raw `Buffer` accessor. Call after the
/// band's schema has been written (e.g. by [`Self::copy_into`]).
fn append_data_into(&self, builder: &mut RasterBuilder) -> Result<(), ArrowError> {
if self.is_indb() {
let ndb = self.nd_buffer()?;
builder.band_data_writer().append_value(ndb.buffer);
Comment thread
james-willis marked this conversation as resolved.
} else {
builder.band_data_writer().append_value([]);
}
Ok(())
}
}

/// Convert raw nodata bytes to f64 given a [`BandDataType`].
Expand Down Expand Up @@ -935,6 +1031,40 @@ mod tests {
assert!(b.is_spatial_2d());
}

#[test]
fn copy_into_rejects_non_identity_source_view() {
// A sliced source view (step 2 on the outer axis) composes to a
// non-identity effective view; copy_into can't persist it yet, so it
// must error rather than copy mislocated bytes.
let src = band(&["y", "x"], &[4, 5], &[ve(0, 0, 2, 2), ve(1, 0, 1, 5)]);
let mut ob = RasterBuilder::new(1);
let err = src
.copy_into(&mut ob, BandOverrides::default())
.unwrap_err()
.to_string();
assert!(err.contains("non-identity"), "unexpected error: {err}");
}

#[test]
fn copy_into_rejects_non_identity_override_view() {
// Identity source, but the caller's override slices it (step 2); the
// composed effective view is non-identity and can't be persisted yet.
let src = band(&["y", "x"], &[4, 5], &[ve(0, 0, 1, 4), ve(1, 0, 1, 5)]);
let override_view = [ve(0, 0, 2, 2), ve(1, 0, 1, 5)];
let mut ob = RasterBuilder::new(1);
let err = src
.copy_into(
&mut ob,
BandOverrides {
view: Some(&override_view),
..Default::default()
},
)
.unwrap_err()
.to_string();
assert!(err.contains("non-identity"), "unexpected error: {err}");
}

#[test]
fn is_spatial_2d_latlon_is_true() {
let b = band(&["lat", "lon"], &[4, 5], &[ve(0, 0, 1, 4), ve(1, 0, 1, 5)]);
Expand Down
Loading