Skip to content
Merged
131 changes: 130 additions & 1 deletion rust/sedona-raster/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use datafusion_common::cast::{
as_string_array, as_string_view_array, as_struct_array, as_uint32_array,
};

use crate::builder::RasterBuilder;
use crate::traits::{BandRef, Bands, NdBuffer, RasterRef};
use crate::view_entries::ViewEntry;
use sedona_schema::raster::{band_indices, raster_indices, BandDataType};
Expand Down Expand Up @@ -138,6 +139,18 @@ impl<'a> BandRef for BandRefImpl<'a> {
data_type: self.data_type,
})
}

/// Zero-copy override: share the source row's backing `Buffer` into the
/// builder (refcount bump) instead of copying the visible bytes. OutDb
/// bands have an empty data column by design.
fn append_data_into(&self, builder: &mut RasterBuilder) -> Result<(), ArrowError> {
if self.is_indb() {
builder.append_band_data_from(self.data_array, self.band_row)
} else {
builder.band_data_writer().append_value([]);
Ok(())
}
}
}

/// Arrow-backed implementation of RasterRef for a single raster row.
Expand Down Expand Up @@ -545,7 +558,7 @@ impl<'a> RasterStructArray<'a> {
mod tests {
use super::*;
use crate::builder::RasterBuilder;
use crate::traits::{BandMetadata, RasterMetadata};
use crate::traits::{BandMetadata, BandOverrides, RasterMetadata};
use arrow_array::{ArrayRef, ListArray, StructArray, UInt32Array};
use arrow_buffer::{OffsetBuffer, ScalarBuffer};
use arrow_schema::{DataType, Fields};
Expand All @@ -555,6 +568,122 @@ mod tests {
use sedona_testing::rasters::generate_test_rasters;
use std::sync::Arc;

#[test]
fn copy_into_shares_buffer_zero_copy_and_overrides() {
// 16-byte InDb band (> inline threshold, so block-backed and shareable).
let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0];
let mut ib = RasterBuilder::new(1);
ib.start_raster_nd(&transform, &["x"], &[16], None).unwrap();
ib.start_band_nd(
Some("orig"),
&["x"],
&[16],
BandDataType::UInt8,
None,
None,
None,
)
.unwrap();
ib.band_data_writer()
.append_value((0u8..16).collect::<Vec<u8>>());
ib.finish_band().unwrap();
ib.finish_raster().unwrap();
let input_array = ib.finish().unwrap();
let input_rasters = RasterStructArray::try_new(&input_array).unwrap();
let input_raster = input_rasters.get(0).unwrap();
let input_band = input_raster.band(0).unwrap();
let input_ptr = input_band.nd_buffer().unwrap().buffer.as_ptr();

// copy_into with a name override; everything else inherited.
let mut ob = RasterBuilder::new(1);
ob.start_raster_nd(&transform, &["x"], &[16], None).unwrap();
input_band
.copy_into(
&mut ob,
BandOverrides {
name: Some("derived"),
..Default::default()
},
)
.unwrap();
ob.finish_band().unwrap();
ob.finish_raster().unwrap();
let out_array = ob.finish().unwrap();
let out_rasters = RasterStructArray::try_new(&out_array).unwrap();
let out_raster = out_rasters.get(0).unwrap();
let out_band = out_raster.band(0).unwrap();

// Zero-copy: the derived band references the same backing bytes.
assert_eq!(
input_ptr,
out_band.nd_buffer().unwrap().buffer.as_ptr(),
"copy_into must share the source buffer, not copy it"
);
assert_eq!(
out_band.nd_buffer().unwrap().as_contiguous().unwrap(),
(0u8..16).collect::<Vec<u8>>().as_slice()
);
// Name overridden; dim names + data type inherited from the source.
assert_eq!(out_raster.band_name(0), Some("derived"));
assert_eq!(out_band.dim_names(), vec!["x"]);
assert_eq!(out_band.data_type(), BandDataType::UInt8);
}

#[test]
fn copy_into_with_identity_override_view_succeeds() {
// An explicit identity override composes back to the identity, so it is
// accepted and behaves exactly like the inherited (None) case — this
// exercises the new `BandOverrides::view` path end to end.
let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0];
let mut ib = RasterBuilder::new(1);
ib.start_raster_nd(&transform, &["x"], &[4], None).unwrap();
ib.start_band_nd(
Some("orig"),
&["x"],
&[4],
BandDataType::UInt8,
None,
None,
None,
)
.unwrap();
ib.band_data_writer().append_value(vec![1u8, 2, 3, 4]);
ib.finish_band().unwrap();
ib.finish_raster().unwrap();
let in_array = ib.finish().unwrap();
let in_rasters = RasterStructArray::try_new(&in_array).unwrap();
let in_raster = in_rasters.get(0).unwrap();
let in_band = in_raster.band(0).unwrap();

let identity = [ViewEntry {
source_axis: 0,
start: 0,
step: 1,
steps: 4,
}];
let mut ob = RasterBuilder::new(1);
ob.start_raster_nd(&transform, &["x"], &[4], None).unwrap();
in_band
.copy_into(
&mut ob,
BandOverrides {
view: Some(&identity),
..Default::default()
},
)
.unwrap();
ob.finish_band().unwrap();
ob.finish_raster().unwrap();
let out_array = ob.finish().unwrap();
let out_rasters = RasterStructArray::try_new(&out_array).unwrap();
let out_raster = out_rasters.get(0).unwrap();
let out_band = out_raster.band(0).unwrap();
assert_eq!(
out_band.nd_buffer().unwrap().as_contiguous().unwrap(),
&[1u8, 2, 3, 4]
);
}

#[test]
fn test_array_basic_functionality() {
// Create a simple raster for testing using the correct API
Expand Down
78 changes: 78 additions & 0 deletions rust/sedona-raster/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use std::sync::Arc;
use sedona_schema::raster::{BandDataType, RasterSchema};

use crate::traits::{BandMetadata, MetadataRef};
use crate::view_entries::{ViewEntries, ViewEntry};

/// Maximum byte length of an inline `BinaryViewArray` view. Views this short
/// store their bytes in the 16-byte view itself; longer views reference a data
Expand Down Expand Up @@ -144,6 +145,20 @@ pub struct RasterBuilder {
raster_validity: BooleanBuilder,
}

/// Arguments to [`RasterBuilder::start_band_with_view`]. Bundled into a
/// struct to keep the call site readable — eight slots is enough that
/// positional args invite mis-ordering bugs.
pub(crate) struct StartBandWithViewArgs<'a> {
pub name: Option<&'a str>,
pub dim_names: &'a [&'a str],
pub source_shape: &'a [i64],
pub view: &'a [ViewEntry],
pub data_type: BandDataType,
pub nodata: Option<&'a [u8]>,
pub outdb_uri: Option<&'a str>,
pub outdb_format: Option<&'a str>,
}

impl RasterBuilder {
/// Create a new raster builder with the specified capacity.
pub fn new(capacity: usize) -> Self {
Expand Down Expand Up @@ -387,6 +402,69 @@ impl RasterBuilder {
Ok(())
}

/// Start a band carrying an explicit `view` — a per-axis window of
/// offsets/steps over `source_shape` — rather than the implicit identity.
///
/// This is the entry point view persistence will use, so callers such as
/// [`BandRef::copy_into`] route through it unchanged once persistence
/// lands. Today the band schema stores a view only as the canonical
/// identity null sentinel, so a non-identity `view` is rejected; an
/// identity view is stored exactly as [`Self::start_band_nd`] does. View
/// persistence is tracked in
/// <https://github.com/apache/sedona-db/issues/897>.
pub(crate) fn start_band_with_view(
&mut self,
args: StartBandWithViewArgs<'_>,
) -> Result<(), ArrowError> {
let StartBandWithViewArgs {
name,
dim_names,
source_shape,
view,
data_type,
nodata,
outdb_uri,
outdb_format,
} = args;
let ndim = dim_names.len();
if ndim == 0 {
return Err(ArrowError::InvalidArgumentError(
"start_band_with_view: 0-dimensional bands are not supported".into(),
));
}
if source_shape.len() != ndim || view.len() != ndim {
return Err(ArrowError::InvalidArgumentError(format!(
"start_band_with_view: dim_names ({}), source_shape ({}), and view ({}) \
must all have the same length",
ndim,
source_shape.len(),
view.len()
)));
}
let view_entries = ViewEntries::new(view.to_vec());
view_entries.validate(source_shape)?;
// The schema stores views only as the identity null sentinel today, so a
// non-identity view can't round-trip — reject it up front, before any
// column append, rather than persisting mislocated bytes.
if !view_entries.is_identity(source_shape) {
return Err(ArrowError::InvalidArgumentError(
"start_band_with_view: persisting a non-identity band view is not yet \
supported (see https://github.com/apache/sedona-db/issues/897); \
materialize the band (e.g. via RS_EnsureContiguous) first"
.into(),
));
}
self.start_band_nd(
name,
dim_names,
source_shape,
data_type,
nodata,
outdb_uri,
outdb_format,
)
}

/// Convenience: start a 2D band with `dim_names=["y","x"]` and `shape=[height, width]`.
///
/// Must be called after `start_raster_2d` / `start_raster_2d` which sets
Expand Down
Loading
Loading