Skip to content
Merged
76 changes: 75 additions & 1 deletion rust/sedona-raster/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use arrow_array::{
};
use arrow_schema::ArrowError;

use crate::builder::RasterBuilder;
use crate::traits::{BandRef, Bands, NdBuffer, RasterRef};
use crate::view_entries::ViewEntry;
use sedona_schema::raster::{band_indices, raster_indices, BandDataType};
Expand Down Expand Up @@ -134,6 +135,18 @@ impl<'a> BandRef for BandRefImpl<'a> {
data_type: self.data_type,
})
}

/// Zero-copy override: share the source row's backing `Buffer` into the
/// builder (refcount bump) instead of copying the visible bytes. OutDb
/// bands have an empty data column by design.
fn append_data_into(&self, builder: &mut RasterBuilder) -> Result<(), ArrowError> {
if self.is_indb() {
builder.append_band_data_from(self.data_array, self.band_row)
} else {
builder.band_data_writer().append_value([]);
Ok(())
}
}
}

/// Arrow-backed implementation of RasterRef for a single raster row.
Expand Down Expand Up @@ -605,7 +618,7 @@ impl<'a> RasterStructArray<'a> {
mod tests {
use super::*;
use crate::builder::RasterBuilder;
use crate::traits::{BandMetadata, RasterMetadata};
use crate::traits::{BandMetadata, BandOverrides, RasterMetadata};
use arrow_array::{ArrayRef, ListArray, StructArray, UInt32Array};
use arrow_buffer::{OffsetBuffer, ScalarBuffer};
use arrow_schema::{DataType, Fields};
Expand All @@ -615,6 +628,67 @@ mod tests {
use sedona_testing::rasters::generate_test_rasters;
use std::sync::Arc;

#[test]
fn copy_into_shares_buffer_zero_copy_and_overrides() {
// 16-byte InDb band (> inline threshold, so block-backed and shareable).
let transform = [0.0, 1.0, 0.0, 0.0, 0.0, -1.0];
let mut ib = RasterBuilder::new(1);
ib.start_raster_nd(&transform, &["x"], &[16], None).unwrap();
ib.start_band_nd(
Some("orig"),
&["x"],
&[16],
BandDataType::UInt8,
None,
None,
None,
)
.unwrap();
ib.band_data_writer()
.append_value((0u8..16).collect::<Vec<u8>>());
ib.finish_band().unwrap();
ib.finish_raster().unwrap();
let input_array = ib.finish().unwrap();
let input_rasters = RasterStructArray::new(&input_array);
let input_raster = input_rasters.get(0).unwrap();
let input_band = input_raster.band(0).unwrap();
let input_ptr = input_band.nd_buffer().unwrap().buffer.as_ptr();

// copy_into with a name override; everything else inherited.
let mut ob = RasterBuilder::new(1);
ob.start_raster_nd(&transform, &["x"], &[16], None).unwrap();
input_band
.copy_into(
&mut ob,
BandOverrides {
name: Some("derived"),
..Default::default()
},
)
.unwrap();
ob.finish_band().unwrap();
ob.finish_raster().unwrap();
let out_array = ob.finish().unwrap();
let out_rasters = RasterStructArray::new(&out_array);
let out_raster = out_rasters.get(0).unwrap();
let out_band = out_raster.band(0).unwrap();

// Zero-copy: the derived band references the same backing bytes.
assert_eq!(
input_ptr,
out_band.nd_buffer().unwrap().buffer.as_ptr(),
"copy_into must share the source buffer, not copy it"
);
assert_eq!(
out_band.nd_buffer().unwrap().as_contiguous().unwrap(),
(0u8..16).collect::<Vec<u8>>().as_slice()
);
// Name overridden; dim names + data type inherited from the source.
assert_eq!(out_raster.band_name(0), Some("derived"));
assert_eq!(out_band.dim_names(), vec!["x"]);
assert_eq!(out_band.data_type(), BandDataType::UInt8);
}

#[test]
fn test_array_basic_functionality() {
// Create a simple raster for testing using the correct API
Expand Down
71 changes: 71 additions & 0 deletions rust/sedona-raster/src/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
use arrow_schema::ArrowError;
use sedona_schema::raster::BandDataType;

use crate::builder::RasterBuilder;
use crate::view_entries::ViewEntry;

/// Recognized spatial dimension-name pairs, in band C-order: the slower-
Expand Down Expand Up @@ -496,6 +497,23 @@ pub trait RasterRef {
}
}

/// Field overrides for [`BandRef::copy_into`]. Each field defaults to `None`,
/// meaning "inherit from the source band". `name` has no source on a `BandRef`
/// (band names live at the raster level), so it defaults to unnamed.
#[derive(Default)]
pub struct BandOverrides<'a> {
/// Name for the derived band (the source has none to inherit).
pub name: Option<&'a str>,
/// Override the dimension names; `None` inherits the source's.
pub dim_names: Option<&'a [&'a str]>,
/// Override the nodata value; `None` inherits the source's.
pub nodata: Option<&'a [u8]>,
/// Override the OutDb URI; `None` inherits the source's.
pub outdb_uri: Option<&'a str>,
/// Override the OutDb format; `None` inherits the source's.
pub outdb_format: Option<&'a str>,
}

/// Trait for accessing a single band/variable within an N-D raster.
///
/// This is the consumer interface. Implementations handle storage details
Expand Down Expand Up @@ -667,6 +685,59 @@ pub trait BandRef {
};
nodata_bytes_to_f64_lossless(bytes, &self.data_type()).map(Some)
}

/// Write a derived band into `builder`, inheriting every field not set in
/// `overrides` from `self`, and carrying the source bytes over.
///
/// This is the canonical "derive a band from an existing one" path — it
/// replaces hand-rebuilding via `start_band_nd` + a manual data append. The
/// data transfer is zero-copy when the implementation supports it (see
/// [`Self::append_data_into`]).
///
/// The derived band has an identity view: it carries the source's
/// dimension names, shape, data type, nodata, and OutDb pointers, but not a
/// non-identity `view`. (View-carrying overrides land with the view
/// machinery.)
fn copy_into(
&self,
builder: &mut RasterBuilder,
overrides: BandOverrides<'_>,
) -> Result<(), ArrowError> {
let inherited_dims = self.dim_names();
let dim_names: Vec<&str> = match overrides.dim_names {
Some(d) => d.to_vec(),
None => inherited_dims,
};
let shape = self.raw_source_shape().to_vec();
builder.start_band_nd(
overrides.name,
&dim_names,
&shape,
self.data_type(),
overrides.nodata.or_else(|| self.nodata()),
overrides.outdb_uri.or_else(|| self.outdb_uri()),
overrides.outdb_format.or_else(|| self.outdb_format()),
)?;
self.append_data_into(builder)
}

/// Append `self`'s band data as the current band's single `data` value.
///
/// The default copies the visible source bytes via `append_value`. Arrow-
/// backed implementations override this to share the source row's backing
/// `Buffer` zero-copy (a refcount bump via
/// [`RasterBuilder::append_band_data_from`]), keeping the buffer plumbing
/// encapsulated rather than exposing a raw `Buffer` accessor. Call after the
/// band's schema has been written (e.g. by [`Self::copy_into`]).
fn append_data_into(&self, builder: &mut RasterBuilder) -> Result<(), ArrowError> {
if self.is_indb() {
let ndb = self.nd_buffer()?;
builder.band_data_writer().append_value(ndb.buffer);
Comment thread
james-willis marked this conversation as resolved.
} else {
builder.band_data_writer().append_value([]);
}
Ok(())
}
}

/// Convert raw nodata bytes to f64 given a [`BandDataType`].
Expand Down
Loading