From 21395fc47f5beaa0d1921d2a4cd4508874e9cac9 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Wed, 17 Jun 2026 14:03:17 -0700 Subject: [PATCH 01/18] feat(rust/sedona-raster-functions): add RS_Value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add RS_Value to sample a raster pixel value at a point or 1-based grid cell (2-D). Reads materialised InDb pixel bytes directly from the band's NdBuffer via stride-offset math and marks the UDF needs_pixels so the planner injects RS_EnsureLoaded — no GDAL dependency. Returns null for out-of-bounds locations and nodata pixels; errors on non-2-D bands. --- docs/reference/sql/rs_value.qmd | 85 ++++ python/sedonadb/tests/functions/conftest.py | 34 ++ .../tests/functions/test_raster_functions.py | 46 ++ rust/sedona-raster-functions/Cargo.toml | 2 +- rust/sedona-raster-functions/src/lib.rs | 1 + rust/sedona-raster-functions/src/register.rs | 1 + rust/sedona-raster-functions/src/rs_value.rs | 461 ++++++++++++++++++ 7 files changed, 629 insertions(+), 1 deletion(-) create mode 100644 docs/reference/sql/rs_value.qmd create mode 100644 python/sedonadb/tests/functions/conftest.py create mode 100644 rust/sedona-raster-functions/src/rs_value.rs diff --git a/docs/reference/sql/rs_value.qmd b/docs/reference/sql/rs_value.qmd new file mode 100644 index 000000000..dffac1fd8 --- /dev/null +++ b/docs/reference/sql/rs_value.qmd @@ -0,0 +1,85 @@ +--- +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +title: RS_Value +description: > + Returns the value of a single raster pixel as a double, selected either by a + point geometry or by 1-based grid coordinates. Returns null if the location is + outside the raster or the pixel holds the band's nodata value. +kernels: + - returns: float64 + args: + - raster + - name: point + type: geometry + description: > + Point whose containing pixel is sampled (nearest pixel, no resampling). + Reprojected into the raster CRS when both carry one. + - returns: float64 + args: + - raster + - name: point + type: geometry + - name: band + type: int + description: Band index (1-based). Defaults to 1 if not specified. + - returns: float64 + args: + - raster + - name: colX + type: int + description: Column index (1-based). + - name: rowY + type: int + description: Row index (1-based). + - returns: float64 + args: + - raster + - name: colX + type: int + - name: rowY + type: int + - name: band + type: int + description: Band index (1-based). Defaults to 1 if not specified. +--- + +## Description + +`RS_Value` samples one pixel of a raster. The location is given either as a +point geometry — the value of the pixel that contains the point is returned, with +no interpolation — or as 1-based `(colX, rowY)` grid coordinates. The band +defaults to 1. + +The result is `NULL` when the point or grid cell falls outside the raster, or +when the sampled pixel equals the band's nodata value. Only 2-D rasters are +supported. + +## Examples + +```sql +SELECT RS_Value(RS_Example(), 2, 1); +``` + +```sql +SELECT RS_Value(RS_Example(), 2, 1, 2); +``` + +```sql +SELECT RS_Value(RS_Example(), ST_SetCRS(ST_Point(74.58, 110.57), 'OGC:CRS84')); +``` diff --git a/python/sedonadb/tests/functions/conftest.py b/python/sedonadb/tests/functions/conftest.py new file mode 100644 index 000000000..d71fa4c67 --- /dev/null +++ b/python/sedonadb/tests/functions/conftest.py @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +import sedonadb + + +@pytest.fixture +def raster_con(): + """A connection with a single `RS_Example()` raster registered as `rasters`. + + Uses the in-DB `RS_Example()` raster (64x32, three UInt8 bands) so the + sedonadb package's RS_ function tests carry no zarr dependency — zarr + reader behaviors are tested in the sedonadb-zarr package. A dedicated + connection (not the shared module-level one) keeps the view from leaking + into other tests. + """ + con = sedonadb.connect() + con.sql("SELECT RS_Example() AS raster").to_view("rasters") + return con diff --git a/python/sedonadb/tests/functions/test_raster_functions.py b/python/sedonadb/tests/functions/test_raster_functions.py index 590d15229..4df37a685 100644 --- a/python/sedonadb/tests/functions/test_raster_functions.py +++ b/python/sedonadb/tests/functions/test_raster_functions.py @@ -20,6 +20,12 @@ from sedonadb.testing import SedonaDB +def query_value(con, expr): + """Evaluate `expr` over the single example raster row and return the value.""" + table = con.sql(f"SELECT {expr} AS v FROM rasters").to_arrow_table() + return table["v"][0].as_py() + + @pytest.mark.parametrize( ("expr", "expected"), [ @@ -98,3 +104,43 @@ def test_rs_ensureloaded(con, sedona_testing): assert arr.shape == (512, 512) assert arr.dtype == "uint16" assert arr[0, 0] == 2324 + + +# RS_Example fills band `b` with the constant value `b`, except the top-left +# pixel (colX=1, rowY=1) which is set to the nodata value (127). Grid +# coordinates are 1-based. These also exercise the `needs_pixels` -> +# RS_EnsureLoaded planner path end to end (RS_Value reads materialised InDb +# bytes). +@pytest.mark.parametrize( + ("expr", "expected"), + [ + ("RS_Value(raster, 2, 1)", 1.0), # band 1, away from the nodata corner + ("RS_Value(raster, 64, 32)", 1.0), # bottom-right pixel, in bounds + ("RS_Value(raster, 2, 1, 1)", 1.0), # explicit band 1 == the default + ("RS_Value(raster, 2, 1, 2)", 2.0), # band 2 + ("RS_Value(raster, 2, 1, 3)", 3.0), # band 3 + ("RS_Value(raster, 1, 1)", None), # top-left pixel is nodata + ("RS_Value(raster, 1, 1, 2)", None), # nodata on band 2 too + ("RS_Value(raster, 65, 1)", None), # colX past the width (64) + ("RS_Value(raster, 1, 33)", None), # rowY past the height (32) + ("RS_Value(raster, 0, 1)", None), # colX 0 -> off the left edge + ], +) +def test_rs_value_grid(raster_con, expr, expected): + assert query_value(raster_con, expr) == expected + + +# Point sampling. (74.58, 110.57) is the centroid of pixel (10, 10) (0-based) +# in the raster's OGC:CRS84 space; the point and raster share a CRS so no +# reprojection happens. A point far outside the footprint yields NULL. +@pytest.mark.parametrize( + ("expr", "expected"), + [ + ("RS_Value(raster, ST_SetCRS(ST_Point(74.58, 110.57), 'OGC:CRS84'))", 1.0), + ("RS_Value(raster, ST_SetCRS(ST_Point(74.58, 110.57), 'OGC:CRS84'), 2)", 2.0), + ("RS_Value(raster, ST_SetCRS(ST_Point(74.58, 110.57), 'OGC:CRS84'), 3)", 3.0), + ("RS_Value(raster, ST_SetCRS(ST_Point(0.0, 0.0), 'OGC:CRS84'))", None), + ], +) +def test_rs_value_point(raster_con, expr, expected): + assert query_value(raster_con, expr) == expected diff --git a/rust/sedona-raster-functions/Cargo.toml b/rust/sedona-raster-functions/Cargo.toml index 7d65c7a03..a8ebccf3e 100644 --- a/rust/sedona-raster-functions/Cargo.toml +++ b/rust/sedona-raster-functions/Cargo.toml @@ -37,6 +37,7 @@ arrow-buffer = { workspace = true } async-trait = { workspace = true } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } +geo-traits = { workspace = true } sedona-common = { workspace = true } sedona-expr = { workspace = true } sedona-geometry = { workspace = true } @@ -49,7 +50,6 @@ wkb = { workspace = true } [dev-dependencies] criterion = { workspace = true } -geo-traits = { workspace = true } sedona-testing = { workspace = true, features = ["criterion"] } sedona-proj = { workspace = true, features = ["proj-sys"] } rstest = { workspace = true } diff --git a/rust/sedona-raster-functions/src/lib.rs b/rust/sedona-raster-functions/src/lib.rs index 3a5baa8df..4de2d5975 100644 --- a/rust/sedona-raster-functions/src/lib.rs +++ b/rust/sedona-raster-functions/src/lib.rs @@ -38,4 +38,5 @@ pub mod rs_size; pub mod rs_slice; pub mod rs_spatial_predicates; pub mod rs_srid; +pub mod rs_value; pub mod rs_worldcoordinate; diff --git a/rust/sedona-raster-functions/src/register.rs b/rust/sedona-raster-functions/src/register.rs index cfb2f008f..e3d118da4 100644 --- a/rust/sedona-raster-functions/src/register.rs +++ b/rust/sedona-raster-functions/src/register.rs @@ -77,6 +77,7 @@ pub fn default_function_set() -> FunctionSet { crate::rs_spatial_predicates::rs_within_udf, crate::rs_srid::rs_crs_udf, crate::rs_srid::rs_srid_udf, + crate::rs_value::rs_value_udf, crate::rs_worldcoordinate::rs_rastertoworldcoord_udf, crate::rs_worldcoordinate::rs_rastertoworldcoordx_udf, crate::rs_worldcoordinate::rs_rastertoworldcoordy_udf, diff --git a/rust/sedona-raster-functions/src/rs_value.rs b/rust/sedona-raster-functions/src/rs_value.rs new file mode 100644 index 000000000..35893385c --- /dev/null +++ b/rust/sedona-raster-functions/src/rs_value.rs @@ -0,0 +1,461 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! `RS_Value` — sample a raster's pixel value at a point or grid cell. +//! +//! ```text +//! RS_Value(raster, point) -> Double -- band defaults to 1 +//! RS_Value(raster, point, band) -> Double +//! RS_Value(raster, colX, rowY) -> Double -- 1-based grid coords, band 1 +//! RS_Value(raster, colX, rowY, band) -> Double +//! ``` +//! +//! Returns the value of the pixel the point falls in (nearest pixel, no +//! resampling), or the value at the given 1-based grid cell. The result is +//! `NULL` when the raster/arguments are null, the point/cell is out of bounds, +//! or the value equals the band's nodata. +//! +//! The function is tagged [`NEEDS_PIXELS_METADATA_KEY`], so the planner wraps +//! its raster argument in `RS_EnsureLoaded`; by the time a kernel runs the band +//! bytes are materialised InDb and a value is read directly from the band's +//! [`NdBuffer`](sedona_raster::traits::NdBuffer) — no GDAL involved. Only 2-D +//! rasters are supported; a band with extra (non-spatial) dimensions errors. + +use std::sync::Arc; + +use arrow_array::builder::Float64Builder; +use arrow_schema::DataType; +use datafusion_common::cast::as_int32_array; +use datafusion_common::{exec_datafusion_err, exec_err, DataFusionError, Result}; +use datafusion_expr::{ColumnarValue, Volatility}; +use geo_traits::{CoordTrait, GeometryTrait, GeometryType, PointTrait}; +use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_proj::transform::with_global_proj_engine; +use sedona_raster::affine_transformation::to_raster_coordinate; +use sedona_raster::traits::{nodata_bytes_to_f64_lossless, RasterRef}; +use sedona_schema::crs::CrsRef; +use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; +use wkb::reader::read_wkb; + +use crate::crs_utils::{crs_transform_wkb, resolve_crs}; +use crate::executor::RasterExecutor; +use crate::rs_ensure_loaded::NEEDS_PIXELS_METADATA_KEY; + +/// `RS_Value()` scalar UDF — sample a pixel value at a point or grid cell. +pub fn rs_value_udf() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "rs_value", + vec![ + Arc::new(RsValuePoint { with_band: false }), // RS_Value(raster, point) + Arc::new(RsValuePoint { with_band: true }), // RS_Value(raster, point, band) + Arc::new(RsValueGrid { with_band: false }), // RS_Value(raster, colX, rowY) + Arc::new(RsValueGrid { with_band: true }), // RS_Value(raster, colX, rowY, band) + ], + Volatility::Immutable, + ) + // The kernels read pixel bytes, so the raster argument must be materialised + // InDb first; the planner injects RS_EnsureLoaded based on this flag. + .with_metadata(NEEDS_PIXELS_METADATA_KEY, "true") +} + +/// Kernel for `RS_Value(raster, point[, band])`. +#[derive(Debug)] +struct RsValuePoint { + with_band: bool, +} + +impl SedonaScalarKernel for RsValuePoint { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let mut matchers = vec![ + ArgMatcher::is_raster(), + ArgMatcher::is_geometry_or_geography(), + ]; + if self.with_band { + matchers.push(ArgMatcher::is_integer()); + } + let matcher = ArgMatcher::new(matchers, SedonaType::Arrow(DataType::Float64)); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let num_iterations = executor.num_iterations(); + let mut builder = Float64Builder::with_capacity(num_iterations); + + // The optional band argument, materialised once as an Int32 array. + let band_array = if self.with_band { + Some( + as_int32_array( + &args[2] + .clone() + .cast_to(&DataType::Int32, None)? + .into_array(num_iterations)?, + )? + .clone(), + ) + } else { + None + }; + let mut band_iter = band_array.as_ref().map(|a| a.iter()); + + // Reprojecting the point into the raster CRS needs a PROJ engine. + with_global_proj_engine(|engine| { + executor.execute_raster_wkb_crs_void(|raster_opt, wkb_opt, point_crs| { + let band_num = next_band(&mut band_iter); + + let (raster, point_wkb) = match (raster_opt, wkb_opt) { + (Some(raster), Some(point_wkb)) => (raster, point_wkb), + _ => { + builder.append_null(); + return Ok(()); + } + }; + + // Bring the point into the raster's CRS. A reprojection only + // happens when both sides carry a (differing) CRS; otherwise + // the original WKB is sampled directly. + let raster_crs = resolve_crs(raster.crs())?; + let reprojected = + reproject_point(point_wkb, point_crs, raster_crs.as_deref(), engine)?; + let wkb = reprojected.as_deref().unwrap_or(point_wkb); + + let (x, y) = read_point_xy(wkb)?; + let (col, row) = to_raster_coordinate(raster, x, y) + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + + match sample_pixel(raster, col, row, band_num)? { + Some(value) => builder.append_value(value), + None => builder.append_null(), + } + Ok(()) + }) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +/// Kernel for `RS_Value(raster, colX, rowY[, band])` with **1-based** grid +/// coordinates. +#[derive(Debug)] +struct RsValueGrid { + with_band: bool, +} + +impl SedonaScalarKernel for RsValueGrid { + fn return_type(&self, args: &[SedonaType]) -> Result> { + let mut matchers = vec![ + ArgMatcher::is_raster(), + ArgMatcher::is_integer(), + ArgMatcher::is_integer(), + ]; + if self.with_band { + matchers.push(ArgMatcher::is_integer()); + } + let matcher = ArgMatcher::new(matchers, SedonaType::Arrow(DataType::Float64)); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let num_iterations = executor.num_iterations(); + let mut builder = Float64Builder::with_capacity(num_iterations); + + let col_array = as_int32_array( + &args[1] + .clone() + .cast_to(&DataType::Int32, None)? + .into_array(num_iterations)?, + )? + .clone(); + let row_array = as_int32_array( + &args[2] + .clone() + .cast_to(&DataType::Int32, None)? + .into_array(num_iterations)?, + )? + .clone(); + let band_array = if self.with_band { + Some( + as_int32_array( + &args[3] + .clone() + .cast_to(&DataType::Int32, None)? + .into_array(num_iterations)?, + )? + .clone(), + ) + } else { + None + }; + + let mut col_iter = col_array.iter(); + let mut row_iter = row_array.iter(); + let mut band_iter = band_array.as_ref().map(|a| a.iter()); + + executor.execute_raster_void(|_, raster_opt| { + let col = col_iter.next().flatten(); + let row = row_iter.next().flatten(); + let band_num = next_band(&mut band_iter); + + match (raster_opt, col, row) { + (Some(raster), Some(col), Some(row)) => { + // 1-based grid coordinates -> 0-based pixel indices. + match sample_pixel(raster, col as i64 - 1, row as i64 - 1, band_num)? { + Some(value) => builder.append_value(value), + None => builder.append_null(), + } + } + _ => builder.append_null(), + } + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } +} + +/// Advance a band-number iterator one row, returning a clamped 1-based band +/// number. Defaults to band 1 when the argument is absent or null and clamps +/// non-positive inputs up to 1, matching the legacy behaviour. +fn next_band( + band_iter: &mut Option>, +) -> usize { + match band_iter.as_mut() { + Some(iter) => iter.next().flatten().unwrap_or(1).max(1) as usize, + None => 1, + } +} + +/// Reproject `point_wkb` from its CRS into the raster CRS, returning the +/// transformed WKB only when a reprojection actually happened (so the caller +/// can sample the original bytes otherwise — no allocation in the common case). +/// +/// Errors if exactly one of the point / raster carries a CRS: sampling across a +/// known and an unknown CRS would silently mislocate the point. +fn reproject_point( + point_wkb: &[u8], + point_crs: CrsRef<'_>, + raster_crs: CrsRef<'_>, + engine: &dyn sedona_geometry::transform::CrsEngine, +) -> Result>> { + match (point_crs, raster_crs) { + (Some(point_crs), Some(raster_crs)) => { + if point_crs.crs_equals(raster_crs) { + Ok(None) + } else { + Ok(Some(crs_transform_wkb( + point_wkb, point_crs, raster_crs, engine, + )?)) + } + } + (None, None) => Ok(None), + (Some(_), None) => { + exec_err!("RS_Value: point has a CRS but the raster does not") + } + (None, Some(_)) => { + exec_err!("RS_Value: raster has a CRS but the point does not") + } + } +} + +/// Read the (x, y) coordinates of a WKB Point geometry. +fn read_point_xy(wkb: &[u8]) -> Result<(f64, f64)> { + let geom = read_wkb(wkb).map_err(|e| DataFusionError::External(Box::new(e)))?; + match geom.as_type() { + GeometryType::Point(point) => { + let coord = point + .coord() + .ok_or_else(|| exec_datafusion_err!("RS_Value: empty point geometry"))?; + Ok((coord.x(), coord.y())) + } + _ => exec_err!("RS_Value expects a Point geometry"), + } +} + +/// Sample band `band_num` (1-based) at 0-based pixel `(col, row)` as `f64`. +/// +/// Returns `None` when the pixel is out of bounds or equals the band's nodata. +/// Reads exactly one pixel by computing its byte offset from the band's +/// [`NdBuffer`](sedona_raster::traits::NdBuffer) strides — zero-copy and O(1), +/// no whole-band materialisation. Errors if the band index is out of range or +/// the band is not 2-D. +fn sample_pixel( + raster: &dyn RasterRef, + col: i64, + row: i64, + band_num: usize, +) -> Result> { + let band = raster + .bands() + .band(band_num) + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + let buffer = band + .nd_buffer() + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + + if buffer.shape.len() != 2 { + return exec_err!( + "RS_Value supports 2-D rasters only; band has {} dimensions", + buffer.shape.len() + ); + } + let (height, width) = (buffer.shape[0], buffer.shape[1]); + if row < 0 || row >= height || col < 0 || col >= width { + return Ok(None); + } + + // Byte offset of the (row, col) pixel via the band's own strides, so the + // read stays correct for any layout the producer hands us. + let byte_offset = buffer.offset as i64 + row * buffer.strides[0] + col * buffer.strides[1]; + let size = buffer.data_type.byte_size() as i64; + let start = usize::try_from(byte_offset) + .map_err(|_| exec_datafusion_err!("RS_Value: negative pixel byte offset"))?; + let end = usize::try_from(byte_offset + size) + .map_err(|_| exec_datafusion_err!("RS_Value: pixel byte offset overflow"))?; + let bytes = buffer.buffer.get(start..end).ok_or_else(|| { + exec_datafusion_err!("RS_Value: pixel is out of the band's buffer bounds") + })?; + + let value = nodata_bytes_to_f64_lossless(bytes, &buffer.data_type) + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + + if let Some(nodata) = band + .nodata_as_f64() + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? + { + if value == nodata || (value.is_nan() && nodata.is_nan()) { + return Ok(None); + } + } + + Ok(Some(value)) +} + +#[cfg(test)] +mod tests { + use super::*; + use datafusion_expr::ScalarUDF; + use sedona_raster::array::RasterStructArray; + use sedona_schema::datatypes::RASTER; + use sedona_schema::raster::BandDataType; + use sedona_testing::raster_spec::RasterSpec; + + /// Resolve a single `RasterRefImpl` from a one-row spec for direct + /// `sample_pixel` exercise. + fn sample(spec: RasterSpec, col: i64, row: i64, band: usize) -> Result> { + let array = spec.build(); + let rasters = RasterStructArray::try_new(&array).unwrap(); + let raster = rasters.get(0).unwrap(); + sample_pixel(&raster, col, row, band) + } + + #[test] + fn udf_metadata() { + let udf: ScalarUDF = rs_value_udf().into(); + assert_eq!(udf.name(), "rs_value"); + } + + #[test] + fn udf_marks_needs_pixels() { + assert_eq!( + rs_value_udf() + .metadata() + .get(NEEDS_PIXELS_METADATA_KEY) + .map(String::as_str), + Some("true") + ); + } + + #[test] + fn return_type_is_float64() { + // Grid (raster, int, int) resolves to a Float64 output. + let return_type = RsValueGrid { with_band: false } + .return_type(&[ + RASTER, + SedonaType::Arrow(DataType::Int32), + SedonaType::Arrow(DataType::Int32), + ]) + .unwrap(); + assert_eq!(return_type, Some(SedonaType::Arrow(DataType::Float64))); + } + + #[test] + fn samples_2d_pixels_row_major() { + // 3x2 raster, row-major pixels: + // row0 = [10, 20, 30], row1 = [40, 50, 60] + let spec = || RasterSpec::d2(3, 2).band_values(&[10u8, 20, 30, 40, 50, 60]); + assert_eq!(sample(spec(), 0, 0, 1).unwrap(), Some(10.0)); // top-left + assert_eq!(sample(spec(), 2, 0, 1).unwrap(), Some(30.0)); // top-right + assert_eq!(sample(spec(), 0, 1, 1).unwrap(), Some(40.0)); // bottom-left + assert_eq!(sample(spec(), 2, 1, 1).unwrap(), Some(60.0)); // bottom-right + } + + #[test] + fn out_of_bounds_pixel_is_none() { + let spec = || RasterSpec::d2(3, 2).band_values(&[10u8, 20, 30, 40, 50, 60]); + assert_eq!(sample(spec(), 3, 0, 1).unwrap(), None); // col == width + assert_eq!(sample(spec(), 0, 2, 1).unwrap(), None); // row == height + assert_eq!(sample(spec(), -1, 0, 1).unwrap(), None); // negative + } + + #[test] + fn nodata_pixel_is_none() { + let spec = RasterSpec::d2(2, 1).band_values(&[7u8, 9]).nodata(9u8); + assert_eq!(sample(spec.clone(), 0, 0, 1).unwrap(), Some(7.0)); + assert_eq!(sample(spec, 1, 0, 1).unwrap(), None); + } + + #[test] + fn second_band_is_addressable() { + let spec = RasterSpec::d2(2, 1) + .band_values(&[1u8, 2]) + .band_values(&[30u8, 40]); + assert_eq!(sample(spec.clone(), 1, 0, 1).unwrap(), Some(2.0)); + assert_eq!(sample(spec, 1, 0, 2).unwrap(), Some(40.0)); + } + + #[test] + fn float_band_values_round_trip() { + let spec = RasterSpec::d2(2, 1).band_values(&[1.5f32, -2.5]); + assert_eq!(sample(spec.clone(), 0, 0, 1).unwrap(), Some(1.5)); + assert_eq!(sample(spec, 1, 0, 1).unwrap(), Some(-2.5)); + } + + #[test] + fn band_out_of_range_errors() { + let spec = RasterSpec::d2(2, 1).band_values(&[1u8, 2]); + let err = sample(spec, 0, 0, 2).unwrap_err().to_string(); + assert!(err.contains("RS_Value"), "unexpected error: {err}"); + } + + #[test] + fn non_2d_band_errors() { + // A band with a leading non-spatial dimension is rejected. + let spec = RasterSpec::nd(&["time", "y", "x"], &[2, 2, 1]).band(BandDataType::UInt8); + let err = sample(spec, 0, 0, 1).unwrap_err().to_string(); + assert!(err.contains("2-D"), "unexpected error: {err}"); + } +} From 798b67c489e0b2b5f719c5ffbe64dce0963b937b Mon Sep 17 00:00:00 2001 From: jameswillis Date: Wed, 17 Jun 2026 14:15:37 -0700 Subject: [PATCH 02/18] chore(rust/sedona-raster-functions): benchmark RS_Value Add RS_Value cases (grid, point, and point+band) to the consolidated native-raster-functions Criterion bench, alongside the other RS_* kernels. --- .../benches/native-raster-functions.rs | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/rust/sedona-raster-functions/benches/native-raster-functions.rs b/rust/sedona-raster-functions/benches/native-raster-functions.rs index 2b6ffeef1..d3bd04df8 100644 --- a/rust/sedona-raster-functions/benches/native-raster-functions.rs +++ b/rust/sedona-raster-functions/benches/native-raster-functions.rs @@ -214,6 +214,38 @@ fn criterion_benchmark(c: &mut Criterion) { ), ); + // RS_Value(raster, colX, rowY) - grid sampling + benchmark::scalar( + c, + &f, + "native-raster", + "rs_value", + BenchmarkArgs::ArrayScalarScalar(Raster(64, 64), Int32(1, 64), Int32(1, 64)), + ); + // RS_Value(raster, point) + benchmark::scalar( + c, + &f, + "native-raster", + "rs_value", + BenchmarkArgs::ArrayArray( + Raster(64, 64), + Transformed(Box::new(Point), sd_apply_default_crs_udf().into()), + ), + ); + // RS_Value(raster, point, band) + benchmark::scalar( + c, + &f, + "native-raster", + "rs_value", + BenchmarkArgs::ArrayArrayScalar( + Raster(64, 64), + Transformed(Box::new(Point), sd_apply_default_crs_udf().into()), + Int32(1, 2), + ), + ); + // RS_Intersects(raster, geometry) - point benchmark::scalar( c, From 82b796821cd9cb8c6a259bf41aaba43c36431656 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Wed, 17 Jun 2026 15:34:45 -0700 Subject: [PATCH 03/18] fix(rust/sedona-raster-functions): tighten RS_Value point sampling and band handling - Floor (not truncate toward zero) the inverse transform so a point just outside the top/left edge is out of bounds rather than sampling an edge pixel. - Require a 2-D spatial (y, x) band via is_spatial_2d, not any two-axis band. - Propagate a NULL band argument to a NULL result; reject band 0/negative rather than coercing to band 1. - Document why the pixel read uses the lossless f64 conversion. - Add tests: floor edge case, NaN nodata, band 0, and the point CRS-mismatch / non-point-geometry error paths. --- docs/reference/sql/rs_value.qmd | 2 +- .../tests/functions/test_raster_functions.py | 1 + rust/sedona-raster-functions/src/rs_value.rs | 184 +++++++++++++++--- 3 files changed, 155 insertions(+), 32 deletions(-) diff --git a/docs/reference/sql/rs_value.qmd b/docs/reference/sql/rs_value.qmd index dffac1fd8..5405cc4ed 100644 --- a/docs/reference/sql/rs_value.qmd +++ b/docs/reference/sql/rs_value.qmd @@ -28,7 +28,7 @@ kernels: - name: point type: geometry description: > - Point whose containing pixel is sampled (nearest pixel, no resampling). + The pixel that contains this point is sampled (no resampling). Reprojected into the raster CRS when both carry one. - returns: float64 args: diff --git a/python/sedonadb/tests/functions/test_raster_functions.py b/python/sedonadb/tests/functions/test_raster_functions.py index 4df37a685..8513c24e3 100644 --- a/python/sedonadb/tests/functions/test_raster_functions.py +++ b/python/sedonadb/tests/functions/test_raster_functions.py @@ -124,6 +124,7 @@ def test_rs_ensureloaded(con, sedona_testing): ("RS_Value(raster, 65, 1)", None), # colX past the width (64) ("RS_Value(raster, 1, 33)", None), # rowY past the height (32) ("RS_Value(raster, 0, 1)", None), # colX 0 -> off the left edge + ("RS_Value(raster, 2, 1, CAST(NULL AS INT))", None), # NULL band -> NULL ], ) def test_rs_value_grid(raster_con, expr, expected): diff --git a/rust/sedona-raster-functions/src/rs_value.rs b/rust/sedona-raster-functions/src/rs_value.rs index 35893385c..20a7c9c19 100644 --- a/rust/sedona-raster-functions/src/rs_value.rs +++ b/rust/sedona-raster-functions/src/rs_value.rs @@ -24,10 +24,10 @@ //! RS_Value(raster, colX, rowY, band) -> Double //! ``` //! -//! Returns the value of the pixel the point falls in (nearest pixel, no -//! resampling), or the value at the given 1-based grid cell. The result is -//! `NULL` when the raster/arguments are null, the point/cell is out of bounds, -//! or the value equals the band's nodata. +//! Returns the value of the pixel that contains the point (no resampling), or +//! the value at the given 1-based grid cell. The result is `NULL` when the +//! raster/arguments are null, the point/cell is out of bounds, or the value +//! equals the band's nodata. //! //! The function is tagged [`NEEDS_PIXELS_METADATA_KEY`], so the planner wraps //! its raster argument in `RS_EnsureLoaded`; by the time a kernel runs the band @@ -45,7 +45,7 @@ use datafusion_expr::{ColumnarValue, Volatility}; use geo_traits::{CoordTrait, GeometryTrait, GeometryType, PointTrait}; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; use sedona_proj::transform::with_global_proj_engine; -use sedona_raster::affine_transformation::to_raster_coordinate; +use sedona_raster::affine_transformation::AffineMatrix; use sedona_raster::traits::{nodata_bytes_to_f64_lossless, RasterRef}; use sedona_schema::crs::CrsRef; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; @@ -119,15 +119,16 @@ impl SedonaScalarKernel for RsValuePoint { // Reprojecting the point into the raster CRS needs a PROJ engine. with_global_proj_engine(|engine| { executor.execute_raster_wkb_crs_void(|raster_opt, wkb_opt, point_crs| { - let band_num = next_band(&mut band_iter); - - let (raster, point_wkb) = match (raster_opt, wkb_opt) { - (Some(raster), Some(point_wkb)) => (raster, point_wkb), - _ => { - builder.append_null(); - return Ok(()); - } - }; + let (raster, point_wkb, band_num) = + match (raster_opt, wkb_opt, next_band(&mut band_iter)) { + (Some(raster), Some(point_wkb), Some(band_num)) => { + (raster, point_wkb, band_num) + } + _ => { + builder.append_null(); + return Ok(()); + } + }; // Bring the point into the raster's CRS. A reprojection only // happens when both sides carry a (differing) CRS; otherwise @@ -138,8 +139,13 @@ impl SedonaScalarKernel for RsValuePoint { let wkb = reprojected.as_deref().unwrap_or(point_wkb); let (x, y) = read_point_xy(wkb)?; - let (col, row) = to_raster_coordinate(raster, x, y) + // Floor (not truncate toward zero) so a point just outside the + // top/left edge maps to a negative index and is rejected as out + // of bounds, rather than truncating to 0 and sampling an edge pixel. + let (raster_x, raster_y) = AffineMatrix::from_metadata(&raster.metadata()) + .inv_transform(x, y) .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + let (col, row) = (raster_x.floor() as i64, raster_y.floor() as i64); match sample_pixel(raster, col, row, band_num)? { Some(value) => builder.append_value(value), @@ -220,8 +226,8 @@ impl SedonaScalarKernel for RsValueGrid { let row = row_iter.next().flatten(); let band_num = next_band(&mut band_iter); - match (raster_opt, col, row) { - (Some(raster), Some(col), Some(row)) => { + match (raster_opt, col, row, band_num) { + (Some(raster), Some(col), Some(row), Some(band_num)) => { // 1-based grid coordinates -> 0-based pixel indices. match sample_pixel(raster, col as i64 - 1, row as i64 - 1, band_num)? { Some(value) => builder.append_value(value), @@ -237,15 +243,17 @@ impl SedonaScalarKernel for RsValueGrid { } } -/// Advance a band-number iterator one row, returning a clamped 1-based band -/// number. Defaults to band 1 when the argument is absent or null and clamps -/// non-positive inputs up to 1, matching the legacy behaviour. +/// Advance the optional band-number iterator one row, yielding the 1-based band +/// to sample. A missing band argument defaults to band 1; a NULL band element +/// returns `None`, which the caller propagates to a NULL result. Band 0 and +/// negative values map to 0 so [`Bands::band`](sedona_raster::traits::Bands::band) +/// rejects them as not 1-based rather than being silently coerced. fn next_band( band_iter: &mut Option>, -) -> usize { +) -> Option { match band_iter.as_mut() { - Some(iter) => iter.next().flatten().unwrap_or(1).max(1) as usize, - None => 1, + None => Some(1), + Some(iter) => iter.next().flatten().map(|b| b.max(0) as usize), } } @@ -312,16 +320,15 @@ fn sample_pixel( .bands() .band(band_num) .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + + // 2-D only: the band must be a recognized spatial (y, x) grid, not just any + // two-axis band (e.g. (time, band) would have len 2 but no spatial meaning). + if !band.is_spatial_2d() { + return exec_err!("RS_Value supports 2-D rasters only; band is not a 2-D (y, x) grid"); + } let buffer = band .nd_buffer() .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; - - if buffer.shape.len() != 2 { - return exec_err!( - "RS_Value supports 2-D rasters only; band has {} dimensions", - buffer.shape.len() - ); - } let (height, width) = (buffer.shape[0], buffer.shape[1]); if row < 0 || row >= height || col < 0 || col >= width { return Ok(None); @@ -339,6 +346,10 @@ fn sample_pixel( exec_datafusion_err!("RS_Value: pixel is out of the band's buffer bounds") })?; + // Decode the pixel to f64. The lossless converter errors (rather than + // silently rounding) on Int64/UInt64 values beyond f64's exact-integer + // range (2^53) — RS_Value returns a Double, so such a pixel can't be + // represented faithfully; failing loudly is preferred over a wrong value. let value = nodata_bytes_to_f64_lossless(bytes, &buffer.data_type) .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; @@ -357,11 +368,16 @@ fn sample_pixel( #[cfg(test)] mod tests { use super::*; + use arrow_array::{Array, Float64Array}; use datafusion_expr::ScalarUDF; use sedona_raster::array::RasterStructArray; - use sedona_schema::datatypes::RASTER; + use sedona_schema::crs::lnglat; + use sedona_schema::datatypes::{Edges, RASTER}; use sedona_schema::raster::BandDataType; + use sedona_testing::create::create_array as create_geom_array; use sedona_testing::raster_spec::RasterSpec; + use sedona_testing::rasters::generate_test_rasters; + use sedona_testing::testers::ScalarUdfTester; /// Resolve a single `RasterRefImpl` from a one-row spec for direct /// `sample_pixel` exercise. @@ -451,6 +467,25 @@ mod tests { assert!(err.contains("RS_Value"), "unexpected error: {err}"); } + #[test] + fn band_zero_errors() { + // Band 0 is not coerced to band 1 — it surfaces as a 1-based error. + let spec = RasterSpec::d2(2, 1).band_values(&[1u8, 2]); + let err = sample(spec, 0, 0, 0).unwrap_err().to_string(); + assert!(err.contains("1-based"), "unexpected error: {err}"); + } + + #[test] + fn nan_nodata_pixel_is_none() { + // A float band whose nodata is NaN: a NaN pixel reads as NULL (NaN != NaN + // makes the `==` check insufficient), a normal pixel reads as its value. + let spec = RasterSpec::d2(2, 1) + .band_values(&[f64::NAN, 1.0]) + .nodata(f64::NAN); + assert_eq!(sample(spec.clone(), 0, 0, 1).unwrap(), None); + assert_eq!(sample(spec, 1, 0, 1).unwrap(), Some(1.0)); + } + #[test] fn non_2d_band_errors() { // A band with a leading non-spatial dimension is rejected. @@ -458,4 +493,91 @@ mod tests { let err = sample(spec, 0, 0, 1).unwrap_err().to_string(); assert!(err.contains("2-D"), "unexpected error: {err}"); } + + #[test] + fn point_crs_mismatch_errors() { + let udf: ScalarUDF = rs_value_udf().into(); + + // Raster has a CRS (generate_test_rasters sets OGC:CRS84), point does not. + let geom_type = SedonaType::Wkb(Edges::Planar, None); + let tester = ScalarUdfTester::new(udf.clone(), vec![RASTER, geom_type.clone()]); + let rasters = generate_test_rasters(1, None).unwrap(); + let geoms = create_geom_array(&[Some("POINT (2.1 2.6)")], &geom_type); + let err = tester + .invoke_arrays(vec![Arc::new(rasters), geoms]) + .unwrap_err() + .to_string(); + assert!( + err.contains("raster has a CRS but the point does not"), + "unexpected error: {err}" + ); + + // Point has a CRS, raster does not. + let geom_type = SedonaType::Wkb(Edges::Planar, lnglat()); + let tester = ScalarUdfTester::new(udf, vec![RASTER, geom_type.clone()]); + let rasters = RasterSpec::d2(2, 2) + .band(BandDataType::UInt8) + .crs(None) + .build(); + let geoms = create_geom_array(&[Some("POINT (0 0)")], &geom_type); + let err = tester + .invoke_arrays(vec![Arc::new(rasters), geoms]) + .unwrap_err() + .to_string(); + assert!( + err.contains("point has a CRS but the raster does not"), + "unexpected error: {err}" + ); + } + + #[test] + fn non_point_geometry_errors() { + let udf: ScalarUDF = rs_value_udf().into(); + let geom_type = SedonaType::Wkb(Edges::Planar, lnglat()); + let tester = ScalarUdfTester::new(udf, vec![RASTER, geom_type.clone()]); + let rasters = generate_test_rasters(1, None).unwrap(); + let geoms = create_geom_array(&[Some("LINESTRING (0 0, 1 1)")], &geom_type); + let err = tester + .invoke_arrays(vec![Arc::new(rasters), geoms]) + .unwrap_err() + .to_string(); + assert!( + err.contains("expects a Point geometry"), + "unexpected error: {err}" + ); + } + + #[test] + fn point_just_outside_top_edge_is_none() { + // North-up raster: origin (0, 10), 1x1 pixels (geotransform + // [c, a, b, f, d, e] = [0, 1, 0, 10, 0, -1]), so world y decreases down + // the rows. A point at y = 10.5 is just *above* the top edge: its pixel + // row is -0.5, which must floor to -1 (out of bounds -> NULL), not + // truncate toward zero to 0 (the top row). + let udf: ScalarUDF = rs_value_udf().into(); + let geom_type = SedonaType::Wkb(Edges::Planar, lnglat()); + let tester = ScalarUdfTester::new(udf, vec![RASTER, geom_type.clone()]); + let raster = || { + RasterSpec::d2(2, 2) + .band_values(&[1u8, 2, 3, 4]) + .transform([0.0, 1.0, 0.0, 10.0, 0.0, -1.0]) + .build() + }; + + // Just above the top edge -> NULL. + let geoms = create_geom_array(&[Some("POINT (0.5 10.5)")], &geom_type); + let result = tester + .invoke_arrays(vec![Arc::new(raster()), geoms]) + .unwrap(); + let arr = result.as_any().downcast_ref::().unwrap(); + assert!(arr.is_null(0), "point above the top edge should be NULL"); + + // Just inside the top row -> the top-left value (1). + let geoms = create_geom_array(&[Some("POINT (0.5 9.5)")], &geom_type); + let result = tester + .invoke_arrays(vec![Arc::new(raster()), geoms]) + .unwrap(); + let arr = result.as_any().downcast_ref::().unwrap(); + assert_eq!(arr.value(0), 1.0); + } } From 62b89735d6d7c0b582a5103f19374a234cb11393 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Mon, 22 Jun 2026 15:19:50 -0700 Subject: [PATCH 04/18] test(python): inline RS_Example and use assert_query_result for RS_Value tests Drop the raster_con fixture + query_value helper; sample the inline RS_Example() raster and assert via assert_query_result, matching the other RS_ function tests. (The RS_EnsureLoaded planner path is covered against a real OutDb raster by test_rs_ensureloaded.) --- python/sedonadb/tests/functions/conftest.py | 34 ----------- .../tests/functions/test_raster_functions.py | 58 ++++++++++--------- 2 files changed, 30 insertions(+), 62 deletions(-) delete mode 100644 python/sedonadb/tests/functions/conftest.py diff --git a/python/sedonadb/tests/functions/conftest.py b/python/sedonadb/tests/functions/conftest.py deleted file mode 100644 index d71fa4c67..000000000 --- a/python/sedonadb/tests/functions/conftest.py +++ /dev/null @@ -1,34 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pytest -import sedonadb - - -@pytest.fixture -def raster_con(): - """A connection with a single `RS_Example()` raster registered as `rasters`. - - Uses the in-DB `RS_Example()` raster (64x32, three UInt8 bands) so the - sedonadb package's RS_ function tests carry no zarr dependency — zarr - reader behaviors are tested in the sedonadb-zarr package. A dedicated - connection (not the shared module-level one) keeps the view from leaking - into other tests. - """ - con = sedonadb.connect() - con.sql("SELECT RS_Example() AS raster").to_view("rasters") - return con diff --git a/python/sedonadb/tests/functions/test_raster_functions.py b/python/sedonadb/tests/functions/test_raster_functions.py index 8513c24e3..4717c4001 100644 --- a/python/sedonadb/tests/functions/test_raster_functions.py +++ b/python/sedonadb/tests/functions/test_raster_functions.py @@ -20,12 +20,6 @@ from sedonadb.testing import SedonaDB -def query_value(con, expr): - """Evaluate `expr` over the single example raster row and return the value.""" - table = con.sql(f"SELECT {expr} AS v FROM rasters").to_arrow_table() - return table["v"][0].as_py() - - @pytest.mark.parametrize( ("expr", "expected"), [ @@ -108,27 +102,26 @@ def test_rs_ensureloaded(con, sedona_testing): # RS_Example fills band `b` with the constant value `b`, except the top-left # pixel (colX=1, rowY=1) which is set to the nodata value (127). Grid -# coordinates are 1-based. These also exercise the `needs_pixels` -> -# RS_EnsureLoaded planner path end to end (RS_Value reads materialised InDb -# bytes). +# coordinates are 1-based. (The `needs_pixels` -> RS_EnsureLoaded planner path +# is covered against a real OutDb raster by `test_rs_ensureloaded`.) @pytest.mark.parametrize( ("expr", "expected"), [ - ("RS_Value(raster, 2, 1)", 1.0), # band 1, away from the nodata corner - ("RS_Value(raster, 64, 32)", 1.0), # bottom-right pixel, in bounds - ("RS_Value(raster, 2, 1, 1)", 1.0), # explicit band 1 == the default - ("RS_Value(raster, 2, 1, 2)", 2.0), # band 2 - ("RS_Value(raster, 2, 1, 3)", 3.0), # band 3 - ("RS_Value(raster, 1, 1)", None), # top-left pixel is nodata - ("RS_Value(raster, 1, 1, 2)", None), # nodata on band 2 too - ("RS_Value(raster, 65, 1)", None), # colX past the width (64) - ("RS_Value(raster, 1, 33)", None), # rowY past the height (32) - ("RS_Value(raster, 0, 1)", None), # colX 0 -> off the left edge - ("RS_Value(raster, 2, 1, CAST(NULL AS INT))", None), # NULL band -> NULL + ("RS_Value(RS_Example(), 2, 1)", 1.0), # band 1, away from the nodata corner + ("RS_Value(RS_Example(), 64, 32)", 1.0), # bottom-right pixel, in bounds + ("RS_Value(RS_Example(), 2, 1, 1)", 1.0), # explicit band 1 == the default + ("RS_Value(RS_Example(), 2, 1, 2)", 2.0), # band 2 + ("RS_Value(RS_Example(), 2, 1, 3)", 3.0), # band 3 + ("RS_Value(RS_Example(), 1, 1)", None), # top-left pixel is nodata + ("RS_Value(RS_Example(), 1, 1, 2)", None), # nodata on band 2 too + ("RS_Value(RS_Example(), 65, 1)", None), # colX past the width (64) + ("RS_Value(RS_Example(), 1, 33)", None), # rowY past the height (32) + ("RS_Value(RS_Example(), 0, 1)", None), # colX 0 -> off the left edge + ("RS_Value(RS_Example(), 2, 1, CAST(NULL AS INT))", None), # NULL band -> NULL ], ) -def test_rs_value_grid(raster_con, expr, expected): - assert query_value(raster_con, expr) == expected +def test_rs_value_grid(expr, expected): + SedonaDB().assert_query_result(f"SELECT {expr}", expected) # Point sampling. (74.58, 110.57) is the centroid of pixel (10, 10) (0-based) @@ -137,11 +130,20 @@ def test_rs_value_grid(raster_con, expr, expected): @pytest.mark.parametrize( ("expr", "expected"), [ - ("RS_Value(raster, ST_SetCRS(ST_Point(74.58, 110.57), 'OGC:CRS84'))", 1.0), - ("RS_Value(raster, ST_SetCRS(ST_Point(74.58, 110.57), 'OGC:CRS84'), 2)", 2.0), - ("RS_Value(raster, ST_SetCRS(ST_Point(74.58, 110.57), 'OGC:CRS84'), 3)", 3.0), - ("RS_Value(raster, ST_SetCRS(ST_Point(0.0, 0.0), 'OGC:CRS84'))", None), + ( + "RS_Value(RS_Example(), ST_SetCRS(ST_Point(74.58, 110.57), 'OGC:CRS84'))", + 1.0, + ), + ( + "RS_Value(RS_Example(), ST_SetCRS(ST_Point(74.58, 110.57), 'OGC:CRS84'), 2)", + 2.0, + ), + ( + "RS_Value(RS_Example(), ST_SetCRS(ST_Point(74.58, 110.57), 'OGC:CRS84'), 3)", + 3.0, + ), + ("RS_Value(RS_Example(), ST_SetCRS(ST_Point(0.0, 0.0), 'OGC:CRS84'))", None), ], ) -def test_rs_value_point(raster_con, expr, expected): - assert query_value(raster_con, expr) == expected +def test_rs_value_point(expr, expected): + SedonaDB().assert_query_result(f"SELECT {expr}", expected) From d593bb5a5da93c0ea22604b690c1903bc4851d4d Mon Sep 17 00:00:00 2001 From: jameswillis Date: Tue, 23 Jun 2026 11:01:49 -0700 Subject: [PATCH 05/18] feat(rust/sedona-raster-functions): RS_Value empty-point -> NULL, sample via WkbHeader - POINT EMPTY now samples to NULL instead of erroring. - Extract the point x/y via a header-only WkbHeader parse (no full geometry walk), which also yields NaN coords for empties and lets the empty check short-circuit before any CRS resolution. - Hold the cast Int32 arguments as ArrayRefs and borrow typed views instead of cloning the typed arrays. - Note that the f64->i64 index cast saturates (never panics). --- .../tests/functions/test_raster_functions.py | 5 + rust/sedona-raster-functions/src/rs_value.rs | 144 ++++++++++-------- 2 files changed, 86 insertions(+), 63 deletions(-) diff --git a/python/sedonadb/tests/functions/test_raster_functions.py b/python/sedonadb/tests/functions/test_raster_functions.py index 4717c4001..be258fa1d 100644 --- a/python/sedonadb/tests/functions/test_raster_functions.py +++ b/python/sedonadb/tests/functions/test_raster_functions.py @@ -143,6 +143,11 @@ def test_rs_value_grid(expr, expected): 3.0, ), ("RS_Value(RS_Example(), ST_SetCRS(ST_Point(0.0, 0.0), 'OGC:CRS84'))", None), + # POINT EMPTY has no location to sample -> NULL (not an error). + ( + "RS_Value(RS_Example(), ST_SetCRS(ST_GeomFromText('POINT EMPTY'), 'OGC:CRS84'))", + None, + ), ], ) def test_rs_value_point(expr, expected): diff --git a/rust/sedona-raster-functions/src/rs_value.rs b/rust/sedona-raster-functions/src/rs_value.rs index 20a7c9c19..76c6204f2 100644 --- a/rust/sedona-raster-functions/src/rs_value.rs +++ b/rust/sedona-raster-functions/src/rs_value.rs @@ -26,8 +26,8 @@ //! //! Returns the value of the pixel that contains the point (no resampling), or //! the value at the given 1-based grid cell. The result is `NULL` when the -//! raster/arguments are null, the point/cell is out of bounds, or the value -//! equals the band's nodata. +//! raster/arguments are null, the point is empty, the point/cell is out of +//! bounds, or the value equals the band's nodata. //! //! The function is tagged [`NEEDS_PIXELS_METADATA_KEY`], so the planner wraps //! its raster argument in `RS_EnsureLoaded`; by the time a kernel runs the band @@ -37,19 +37,19 @@ use std::sync::Arc; -use arrow_array::builder::Float64Builder; +use arrow_array::{builder::Float64Builder, ArrayRef}; use arrow_schema::DataType; use datafusion_common::cast::as_int32_array; -use datafusion_common::{exec_datafusion_err, exec_err, DataFusionError, Result}; +use datafusion_common::{exec_datafusion_err, exec_err, Result}; use datafusion_expr::{ColumnarValue, Volatility}; -use geo_traits::{CoordTrait, GeometryTrait, GeometryType, PointTrait}; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; +use sedona_geometry::types::GeometryTypeId; +use sedona_geometry::wkb_header::WkbHeader; use sedona_proj::transform::with_global_proj_engine; use sedona_raster::affine_transformation::AffineMatrix; use sedona_raster::traits::{nodata_bytes_to_f64_lossless, RasterRef}; use sedona_schema::crs::CrsRef; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; -use wkb::reader::read_wkb; use crate::crs_utils::{crs_transform_wkb, resolve_crs}; use crate::executor::RasterExecutor; @@ -100,21 +100,16 @@ impl SedonaScalarKernel for RsValuePoint { let num_iterations = executor.num_iterations(); let mut builder = Float64Builder::with_capacity(num_iterations); - // The optional band argument, materialised once as an Int32 array. - let band_array = if self.with_band { - Some( - as_int32_array( - &args[2] - .clone() - .cast_to(&DataType::Int32, None)? - .into_array(num_iterations)?, - )? - .clone(), - ) + // The optional band argument, materialised once as an Int32 array. Held + // as an `ArrayRef` so the typed view below borrows it instead of cloning + // the typed `Int32Array`. + let band_arr = if self.with_band { + Some(int32_array_arg(&args[2], num_iterations)?) } else { None }; - let mut band_iter = band_array.as_ref().map(|a| a.iter()); + let band_array = band_arr.as_ref().map(|a| as_int32_array(a)).transpose()?; + let mut band_iter = band_array.map(|a| a.iter()); // Reprojecting the point into the raster CRS needs a PROJ engine. with_global_proj_engine(|engine| { @@ -130,21 +125,46 @@ impl SedonaScalarKernel for RsValuePoint { } }; + // Header-only parse: reject non-points up front, and treat + // POINT EMPTY (NaN coords) as "no location to sample" -> NULL, + // before doing any reprojection work. + let header = WkbHeader::try_new(point_wkb) + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + if header + .geometry_type_id() + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? + != GeometryTypeId::Point + { + return exec_err!("RS_Value expects a Point geometry"); + } + if header + .is_empty() + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? + { + builder.append_null(); + return Ok(()); + } + // Bring the point into the raster's CRS. A reprojection only // happens when both sides carry a (differing) CRS; otherwise - // the original WKB is sampled directly. + // the original coordinates are sampled directly. let raster_crs = resolve_crs(raster.crs())?; - let reprojected = - reproject_point(point_wkb, point_crs, raster_crs.as_deref(), engine)?; - let wkb = reprojected.as_deref().unwrap_or(point_wkb); - - let (x, y) = read_point_xy(wkb)?; + let (x, y) = + match reproject_point(point_wkb, point_crs, raster_crs.as_deref(), engine)? { + Some(reprojected) => WkbHeader::try_new(&reprojected) + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? + .first_xy(), + None => header.first_xy(), + }; // Floor (not truncate toward zero) so a point just outside the // top/left edge maps to a negative index and is rejected as out // of bounds, rather than truncating to 0 and sampling an edge pixel. let (raster_x, raster_y) = AffineMatrix::from_metadata(&raster.metadata()) .inv_transform(x, y) .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + // `f64 -> i64` is a saturating cast (it never panics): a NaN + // index becomes 0 and ±∞ saturate to the i64 bounds, all of + // which the bounds check in `sample_pixel` rejects as NULL. let (col, row) = (raster_x.floor() as i64, raster_y.floor() as i64); match sample_pixel(raster, col, row, band_num)? { @@ -189,37 +209,22 @@ impl SedonaScalarKernel for RsValueGrid { let num_iterations = executor.num_iterations(); let mut builder = Float64Builder::with_capacity(num_iterations); - let col_array = as_int32_array( - &args[1] - .clone() - .cast_to(&DataType::Int32, None)? - .into_array(num_iterations)?, - )? - .clone(); - let row_array = as_int32_array( - &args[2] - .clone() - .cast_to(&DataType::Int32, None)? - .into_array(num_iterations)?, - )? - .clone(); - let band_array = if self.with_band { - Some( - as_int32_array( - &args[3] - .clone() - .cast_to(&DataType::Int32, None)? - .into_array(num_iterations)?, - )? - .clone(), - ) + // Hold the cast arrays as `ArrayRef`s so the typed views below borrow + // them instead of cloning the typed `Int32Array`s. + let col_arr = int32_array_arg(&args[1], num_iterations)?; + let row_arr = int32_array_arg(&args[2], num_iterations)?; + let band_arr = if self.with_band { + Some(int32_array_arg(&args[3], num_iterations)?) } else { None }; + let col_array = as_int32_array(&col_arr)?; + let row_array = as_int32_array(&row_arr)?; + let band_array = band_arr.as_ref().map(|a| as_int32_array(a)).transpose()?; let mut col_iter = col_array.iter(); let mut row_iter = row_array.iter(); - let mut band_iter = band_array.as_ref().map(|a| a.iter()); + let mut band_iter = band_array.map(|a| a.iter()); executor.execute_raster_void(|_, raster_opt| { let col = col_iter.next().flatten(); @@ -243,6 +248,16 @@ impl SedonaScalarKernel for RsValueGrid { } } +/// Materialise an integer argument as an owned `Int32` [`ArrayRef`] for the +/// batch. Callers keep the returned `ArrayRef` alive and borrow a typed +/// `&Int32Array` view from it (via `as_int32_array`) rather than cloning the +/// typed array. +fn int32_array_arg(arg: &ColumnarValue, num_iterations: usize) -> Result { + arg.clone() + .cast_to(&DataType::Int32, None)? + .into_array(num_iterations) +} + /// Advance the optional band-number iterator one row, yielding the 1-based band /// to sample. A missing band argument defaults to band 1; a NULL band element /// returns `None`, which the caller propagates to a NULL result. Band 0 and @@ -289,20 +304,6 @@ fn reproject_point( } } -/// Read the (x, y) coordinates of a WKB Point geometry. -fn read_point_xy(wkb: &[u8]) -> Result<(f64, f64)> { - let geom = read_wkb(wkb).map_err(|e| DataFusionError::External(Box::new(e)))?; - match geom.as_type() { - GeometryType::Point(point) => { - let coord = point - .coord() - .ok_or_else(|| exec_datafusion_err!("RS_Value: empty point geometry"))?; - Ok((coord.x(), coord.y())) - } - _ => exec_err!("RS_Value expects a Point geometry"), - } -} - /// Sample band `band_num` (1-based) at 0-based pixel `(col, row)` as `f64`. /// /// Returns `None` when the pixel is out of bounds or equals the band's nodata. @@ -547,6 +548,23 @@ mod tests { ); } + #[test] + fn empty_point_is_none() { + // POINT EMPTY has no location to sample, so the result is NULL rather + // than an error. The empty check short-circuits before CRS resolution, + // so a missing/again-mismatched point CRS does not matter here. + let udf: ScalarUDF = rs_value_udf().into(); + let geom_type = SedonaType::Wkb(Edges::Planar, lnglat()); + let tester = ScalarUdfTester::new(udf, vec![RASTER, geom_type.clone()]); + let rasters = generate_test_rasters(1, None).unwrap(); + let geoms = create_geom_array(&[Some("POINT EMPTY")], &geom_type); + let result = tester + .invoke_arrays(vec![Arc::new(rasters), geoms]) + .unwrap(); + let arr = result.as_any().downcast_ref::().unwrap(); + assert!(arr.is_null(0), "POINT EMPTY should sample to NULL"); + } + #[test] fn point_just_outside_top_edge_is_none() { // North-up raster: origin (0, 10), 1x1 pixels (geotransform From a61ec7d3363381a358eea16c5ec3194d3929034b Mon Sep 17 00:00:00 2001 From: jameswillis Date: Tue, 23 Jun 2026 11:22:22 -0700 Subject: [PATCH 06/18] docs(rs_value): use the 3-arg ST_Point(x, y, crs) form in the point example --- docs/reference/sql/rs_value.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/sql/rs_value.qmd b/docs/reference/sql/rs_value.qmd index 5405cc4ed..873a00292 100644 --- a/docs/reference/sql/rs_value.qmd +++ b/docs/reference/sql/rs_value.qmd @@ -81,5 +81,5 @@ SELECT RS_Value(RS_Example(), 2, 1, 2); ``` ```sql -SELECT RS_Value(RS_Example(), ST_SetCRS(ST_Point(74.58, 110.57), 'OGC:CRS84')); +SELECT RS_Value(RS_Example(), ST_Point(74.58, 110.57, 'OGC:CRS84')); ``` From b9722e92540d958a15640683adaa05f283119576 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Tue, 23 Jun 2026 11:28:39 -0700 Subject: [PATCH 07/18] fix(rust/sedona-raster-functions): use checked arithmetic for RS_Value pixel byte offset The (row, col) -> byte-offset computation multiplied and added i64 strides and offset unchecked, which could panic on overflow in debug builds given a pathological stride/offset. Use checked_mul/checked_add so a bad layout surfaces as an error instead of a panic. --- rust/sedona-raster-functions/src/rs_value.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/rust/sedona-raster-functions/src/rs_value.rs b/rust/sedona-raster-functions/src/rs_value.rs index 76c6204f2..1ef8a063d 100644 --- a/rust/sedona-raster-functions/src/rs_value.rs +++ b/rust/sedona-raster-functions/src/rs_value.rs @@ -336,12 +336,22 @@ fn sample_pixel( } // Byte offset of the (row, col) pixel via the band's own strides, so the - // read stays correct for any layout the producer hands us. - let byte_offset = buffer.offset as i64 + row * buffer.strides[0] + col * buffer.strides[1]; + // read stays correct for any layout the producer hands us. Checked + // arithmetic throughout: `row`/`col` are already in bounds, but a corrupt + // stride or offset must surface as an error, never an i64 overflow panic. let size = buffer.data_type.byte_size() as i64; + let byte_offset = row + .checked_mul(buffer.strides[0]) + .zip(col.checked_mul(buffer.strides[1])) + .and_then(|(r, c)| r.checked_add(c)) + .and_then(|rc| rc.checked_add(buffer.offset as i64)) + .ok_or_else(|| exec_datafusion_err!("RS_Value: pixel byte offset overflow"))?; + let end_offset = byte_offset + .checked_add(size) + .ok_or_else(|| exec_datafusion_err!("RS_Value: pixel byte offset overflow"))?; let start = usize::try_from(byte_offset) .map_err(|_| exec_datafusion_err!("RS_Value: negative pixel byte offset"))?; - let end = usize::try_from(byte_offset + size) + let end = usize::try_from(end_offset) .map_err(|_| exec_datafusion_err!("RS_Value: pixel byte offset overflow"))?; let bytes = buffer.buffer.get(start..end).ok_or_else(|| { exec_datafusion_err!("RS_Value: pixel is out of the band's buffer bounds") From ee7eaabfbbefbbedd84b9d7e939917e955df2148 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Tue, 23 Jun 2026 11:35:50 -0700 Subject: [PATCH 08/18] feat(rust/sedona-raster-functions): defer RS_Value grid-coordinate variant Ship RS_Value as point-only for now; remove the (raster, colX, rowY[, band]) grid kernels, their docs/examples, Python tests, and benchmark. The grid variant will be introduced as its own function later (its indexing order conflicts with PostGIS ST_Value). Pixel sampling by 0-based (col, row) is retained internally via sample_pixel and still covered by unit tests. --- docs/reference/sql/rs_value.qmd | 45 ++------- .../tests/functions/test_raster_functions.py | 31 +----- .../benches/native-raster-functions.rs | 8 -- rust/sedona-raster-functions/src/rs_value.rs | 98 ++----------------- 4 files changed, 23 insertions(+), 159 deletions(-) diff --git a/docs/reference/sql/rs_value.qmd b/docs/reference/sql/rs_value.qmd index 873a00292..f99d7efc0 100644 --- a/docs/reference/sql/rs_value.qmd +++ b/docs/reference/sql/rs_value.qmd @@ -18,9 +18,9 @@ title: RS_Value description: > - Returns the value of a single raster pixel as a double, selected either by a - point geometry or by 1-based grid coordinates. Returns null if the location is - outside the raster or the pixel holds the band's nodata value. + Returns the value of a single raster pixel as a double, selected by a point + geometry. Returns null if the location is outside the raster or the pixel + holds the band's nodata value. kernels: - returns: float64 args: @@ -38,48 +38,19 @@ kernels: - name: band type: int description: Band index (1-based). Defaults to 1 if not specified. - - returns: float64 - args: - - raster - - name: colX - type: int - description: Column index (1-based). - - name: rowY - type: int - description: Row index (1-based). - - returns: float64 - args: - - raster - - name: colX - type: int - - name: rowY - type: int - - name: band - type: int - description: Band index (1-based). Defaults to 1 if not specified. --- ## Description -`RS_Value` samples one pixel of a raster. The location is given either as a -point geometry — the value of the pixel that contains the point is returned, with -no interpolation — or as 1-based `(colX, rowY)` grid coordinates. The band -defaults to 1. +`RS_Value` samples one pixel of a raster. The location is given as a point +geometry — the value of the pixel that contains the point is returned, with no +interpolation. The band defaults to 1. -The result is `NULL` when the point or grid cell falls outside the raster, or -when the sampled pixel equals the band's nodata value. Only 2-D rasters are -supported. +The result is `NULL` when the point falls outside the raster, or when the +sampled pixel equals the band's nodata value. Only 2-D rasters are supported. ## Examples -```sql -SELECT RS_Value(RS_Example(), 2, 1); -``` - -```sql -SELECT RS_Value(RS_Example(), 2, 1, 2); -``` - ```sql SELECT RS_Value(RS_Example(), ST_Point(74.58, 110.57, 'OGC:CRS84')); ``` diff --git a/python/sedonadb/tests/functions/test_raster_functions.py b/python/sedonadb/tests/functions/test_raster_functions.py index be258fa1d..21d93b985 100644 --- a/python/sedonadb/tests/functions/test_raster_functions.py +++ b/python/sedonadb/tests/functions/test_raster_functions.py @@ -100,33 +100,12 @@ def test_rs_ensureloaded(con, sedona_testing): assert arr[0, 0] == 2324 -# RS_Example fills band `b` with the constant value `b`, except the top-left -# pixel (colX=1, rowY=1) which is set to the nodata value (127). Grid -# coordinates are 1-based. (The `needs_pixels` -> RS_EnsureLoaded planner path +# Point sampling. RS_Example fills band `b` with the constant value `b`, except +# the top-left pixel which is set to the nodata value (127). (74.58, 110.57) is +# the centroid of pixel (10, 10) (0-based) in the raster's OGC:CRS84 space; the +# point and raster share a CRS so no reprojection happens. A point far outside +# the footprint yields NULL. (The `needs_pixels` -> RS_EnsureLoaded planner path # is covered against a real OutDb raster by `test_rs_ensureloaded`.) -@pytest.mark.parametrize( - ("expr", "expected"), - [ - ("RS_Value(RS_Example(), 2, 1)", 1.0), # band 1, away from the nodata corner - ("RS_Value(RS_Example(), 64, 32)", 1.0), # bottom-right pixel, in bounds - ("RS_Value(RS_Example(), 2, 1, 1)", 1.0), # explicit band 1 == the default - ("RS_Value(RS_Example(), 2, 1, 2)", 2.0), # band 2 - ("RS_Value(RS_Example(), 2, 1, 3)", 3.0), # band 3 - ("RS_Value(RS_Example(), 1, 1)", None), # top-left pixel is nodata - ("RS_Value(RS_Example(), 1, 1, 2)", None), # nodata on band 2 too - ("RS_Value(RS_Example(), 65, 1)", None), # colX past the width (64) - ("RS_Value(RS_Example(), 1, 33)", None), # rowY past the height (32) - ("RS_Value(RS_Example(), 0, 1)", None), # colX 0 -> off the left edge - ("RS_Value(RS_Example(), 2, 1, CAST(NULL AS INT))", None), # NULL band -> NULL - ], -) -def test_rs_value_grid(expr, expected): - SedonaDB().assert_query_result(f"SELECT {expr}", expected) - - -# Point sampling. (74.58, 110.57) is the centroid of pixel (10, 10) (0-based) -# in the raster's OGC:CRS84 space; the point and raster share a CRS so no -# reprojection happens. A point far outside the footprint yields NULL. @pytest.mark.parametrize( ("expr", "expected"), [ diff --git a/rust/sedona-raster-functions/benches/native-raster-functions.rs b/rust/sedona-raster-functions/benches/native-raster-functions.rs index d3bd04df8..53ed72a3b 100644 --- a/rust/sedona-raster-functions/benches/native-raster-functions.rs +++ b/rust/sedona-raster-functions/benches/native-raster-functions.rs @@ -214,14 +214,6 @@ fn criterion_benchmark(c: &mut Criterion) { ), ); - // RS_Value(raster, colX, rowY) - grid sampling - benchmark::scalar( - c, - &f, - "native-raster", - "rs_value", - BenchmarkArgs::ArrayScalarScalar(Raster(64, 64), Int32(1, 64), Int32(1, 64)), - ); // RS_Value(raster, point) benchmark::scalar( c, diff --git a/rust/sedona-raster-functions/src/rs_value.rs b/rust/sedona-raster-functions/src/rs_value.rs index 1ef8a063d..bc1a65720 100644 --- a/rust/sedona-raster-functions/src/rs_value.rs +++ b/rust/sedona-raster-functions/src/rs_value.rs @@ -15,19 +15,16 @@ // specific language governing permissions and limitations // under the License. -//! `RS_Value` — sample a raster's pixel value at a point or grid cell. +//! `RS_Value` — sample a raster's pixel value at a point. //! //! ```text -//! RS_Value(raster, point) -> Double -- band defaults to 1 -//! RS_Value(raster, point, band) -> Double -//! RS_Value(raster, colX, rowY) -> Double -- 1-based grid coords, band 1 -//! RS_Value(raster, colX, rowY, band) -> Double +//! RS_Value(raster, point) -> Double -- band defaults to 1 +//! RS_Value(raster, point, band) -> Double //! ``` //! -//! Returns the value of the pixel that contains the point (no resampling), or -//! the value at the given 1-based grid cell. The result is `NULL` when the -//! raster/arguments are null, the point is empty, the point/cell is out of -//! bounds, or the value equals the band's nodata. +//! Returns the value of the pixel that contains the point (no resampling). The +//! result is `NULL` when the raster/arguments are null, the point is empty, the +//! point is out of bounds, or the value equals the band's nodata. //! //! The function is tagged [`NEEDS_PIXELS_METADATA_KEY`], so the planner wraps //! its raster argument in `RS_EnsureLoaded`; by the time a kernel runs the band @@ -55,15 +52,13 @@ use crate::crs_utils::{crs_transform_wkb, resolve_crs}; use crate::executor::RasterExecutor; use crate::rs_ensure_loaded::NEEDS_PIXELS_METADATA_KEY; -/// `RS_Value()` scalar UDF — sample a pixel value at a point or grid cell. +/// `RS_Value()` scalar UDF — sample a pixel value at a point. pub fn rs_value_udf() -> SedonaScalarUDF { SedonaScalarUDF::new( "rs_value", vec![ Arc::new(RsValuePoint { with_band: false }), // RS_Value(raster, point) Arc::new(RsValuePoint { with_band: true }), // RS_Value(raster, point, band) - Arc::new(RsValueGrid { with_band: false }), // RS_Value(raster, colX, rowY) - Arc::new(RsValueGrid { with_band: true }), // RS_Value(raster, colX, rowY, band) ], Volatility::Immutable, ) @@ -179,75 +174,6 @@ impl SedonaScalarKernel for RsValuePoint { } } -/// Kernel for `RS_Value(raster, colX, rowY[, band])` with **1-based** grid -/// coordinates. -#[derive(Debug)] -struct RsValueGrid { - with_band: bool, -} - -impl SedonaScalarKernel for RsValueGrid { - fn return_type(&self, args: &[SedonaType]) -> Result> { - let mut matchers = vec![ - ArgMatcher::is_raster(), - ArgMatcher::is_integer(), - ArgMatcher::is_integer(), - ]; - if self.with_band { - matchers.push(ArgMatcher::is_integer()); - } - let matcher = ArgMatcher::new(matchers, SedonaType::Arrow(DataType::Float64)); - matcher.match_args(args) - } - - fn invoke_batch( - &self, - arg_types: &[SedonaType], - args: &[ColumnarValue], - ) -> Result { - let executor = RasterExecutor::new(arg_types, args); - let num_iterations = executor.num_iterations(); - let mut builder = Float64Builder::with_capacity(num_iterations); - - // Hold the cast arrays as `ArrayRef`s so the typed views below borrow - // them instead of cloning the typed `Int32Array`s. - let col_arr = int32_array_arg(&args[1], num_iterations)?; - let row_arr = int32_array_arg(&args[2], num_iterations)?; - let band_arr = if self.with_band { - Some(int32_array_arg(&args[3], num_iterations)?) - } else { - None - }; - let col_array = as_int32_array(&col_arr)?; - let row_array = as_int32_array(&row_arr)?; - let band_array = band_arr.as_ref().map(|a| as_int32_array(a)).transpose()?; - - let mut col_iter = col_array.iter(); - let mut row_iter = row_array.iter(); - let mut band_iter = band_array.map(|a| a.iter()); - - executor.execute_raster_void(|_, raster_opt| { - let col = col_iter.next().flatten(); - let row = row_iter.next().flatten(); - let band_num = next_band(&mut band_iter); - - match (raster_opt, col, row, band_num) { - (Some(raster), Some(col), Some(row), Some(band_num)) => { - // 1-based grid coordinates -> 0-based pixel indices. - match sample_pixel(raster, col as i64 - 1, row as i64 - 1, band_num)? { - Some(value) => builder.append_value(value), - None => builder.append_null(), - } - } - _ => builder.append_null(), - } - Ok(()) - })?; - - executor.finish(Arc::new(builder.finish())) - } -} - /// Materialise an integer argument as an owned `Int32` [`ArrayRef`] for the /// batch. Callers keep the returned `ArrayRef` alive and borrow a typed /// `&Int32Array` view from it (via `as_int32_array`) rather than cloning the @@ -418,13 +344,9 @@ mod tests { #[test] fn return_type_is_float64() { - // Grid (raster, int, int) resolves to a Float64 output. - let return_type = RsValueGrid { with_band: false } - .return_type(&[ - RASTER, - SedonaType::Arrow(DataType::Int32), - SedonaType::Arrow(DataType::Int32), - ]) + // (raster, point) resolves to a Float64 output. + let return_type = RsValuePoint { with_band: false } + .return_type(&[RASTER, SedonaType::Wkb(Edges::Planar, lnglat())]) .unwrap(); assert_eq!(return_type, Some(SedonaType::Arrow(DataType::Float64))); } From 14e0132c87bdc8c72ae925c49666220a807d2123 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Tue, 23 Jun 2026 13:50:33 -0700 Subject: [PATCH 09/18] test(rust/sedona-raster-functions): benchmark RS_Value scalar-raster sampling Add a scalar-raster (one raster, many points) RS_Value case alongside the existing array-raster cases. The scalar case is the one where per-point band / nodata / buffer / CRS resolution can be hoisted out of the sample loop, so it is the baseline for the upcoming optimization. Today it measures the same as the array-raster case (~172 ns/point), confirming the hoist isn't happening. --- .../benches/native-raster-functions.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/rust/sedona-raster-functions/benches/native-raster-functions.rs b/rust/sedona-raster-functions/benches/native-raster-functions.rs index 53ed72a3b..156ae4c0f 100644 --- a/rust/sedona-raster-functions/benches/native-raster-functions.rs +++ b/rust/sedona-raster-functions/benches/native-raster-functions.rs @@ -214,7 +214,7 @@ fn criterion_benchmark(c: &mut Criterion) { ), ); - // RS_Value(raster, point) + // RS_Value(raster, point) — array raster (a distinct raster per row). benchmark::scalar( c, &f, @@ -225,6 +225,19 @@ fn criterion_benchmark(c: &mut Criterion) { Transformed(Box::new(Point), sd_apply_default_crs_udf().into()), ), ); + // RS_Value(raster, point) — scalar raster (one raster, many points). This + // is the case where per-point band/nodata/buffer resolution can be hoisted + // out of the sample loop, so it is the primary target for optimization. + benchmark::scalar( + c, + &f, + "native-raster", + "rs_value", + BenchmarkArgs::ScalarArray( + Raster(64, 64), + Transformed(Box::new(Point), sd_apply_default_crs_udf().into()), + ), + ); // RS_Value(raster, point, band) benchmark::scalar( c, From 415357e19e45f2a1f53148ccb3dfce6e6359faa6 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Tue, 23 Jun 2026 14:15:18 -0700 Subject: [PATCH 10/18] perf(rust/sedona-raster-functions): hoist scalar-raster state in RS_Value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For a constant (scalar) raster sampled with the default band — the common RS_Value(raster_expr, point_column) shape — resolve the band buffer, affine transform, and CRS once for the batch, then drive a selection vector + tight sample loop instead of re-resolving all of that per point. The general array-raster path (a distinct raster per row, which can't hoist) is unchanged. Factors the per-pixel read into read_pixel (shared with sample_pixel) and exposes the executor's geom WKB/CRS accessor pub(crate). ~3x faster on the scalar-raster benchmark (172 -> 59 ns/point); array-raster unchanged. --- rust/sedona-raster-functions/src/executor.rs | 11 +- rust/sedona-raster-functions/src/rs_value.rs | 161 ++++++++++++++++++- 2 files changed, 161 insertions(+), 11 deletions(-) diff --git a/rust/sedona-raster-functions/src/executor.rs b/rust/sedona-raster-functions/src/executor.rs index 34959bf7d..39cbb2573 100644 --- a/rust/sedona-raster-functions/src/executor.rs +++ b/rust/sedona-raster-functions/src/executor.rs @@ -47,7 +47,7 @@ pub struct RasterExecutor<'a, 'b> { // operations are expensive relative to per-element dispatch overhead, the cost // of matching on each access is negligible in practice. #[derive(Clone)] -enum ItemWkbAccessor { +pub(crate) enum ItemWkbAccessor { Binary(BinaryArray), BinaryView(BinaryViewArray), } @@ -76,7 +76,7 @@ impl ItemWkbAccessor { // Same enum-dispatch rationale as `ItemWkbAccessor` above: the per-element // match cost is dwarfed by the raster and CRS operations performed on each row. -enum GeomWkbCrsAccessor { +pub(crate) enum GeomWkbCrsAccessor { WkbArray { wkb: ItemWkbAccessor, static_crs: Crs, @@ -104,7 +104,7 @@ enum GeomWkbCrsAccessor { impl GeomWkbCrsAccessor { #[inline] - fn get(&mut self, i: usize) -> Result<(Option<&[u8]>, CrsRef<'_>)> { + pub(crate) fn get(&mut self, i: usize) -> Result<(Option<&[u8]>, CrsRef<'_>)> { match self { Self::Null => Ok((None, None)), Self::WkbArray { wkb, static_crs } => { @@ -527,7 +527,10 @@ impl<'a, 'b> RasterExecutor<'a, 'b> { } } - fn make_geom_wkb_crs_accessor(&self, arg_index: usize) -> Result { + pub(crate) fn make_geom_wkb_crs_accessor( + &self, + arg_index: usize, + ) -> Result { let sedona_type = self .arg_types .get(arg_index) diff --git a/rust/sedona-raster-functions/src/rs_value.rs b/rust/sedona-raster-functions/src/rs_value.rs index bc1a65720..60f76225d 100644 --- a/rust/sedona-raster-functions/src/rs_value.rs +++ b/rust/sedona-raster-functions/src/rs_value.rs @@ -34,17 +34,18 @@ use std::sync::Arc; -use arrow_array::{builder::Float64Builder, ArrayRef}; +use arrow_array::{builder::Float64Builder, ArrayRef, Float64Array, StructArray}; use arrow_schema::DataType; use datafusion_common::cast::as_int32_array; -use datafusion_common::{exec_datafusion_err, exec_err, Result}; +use datafusion_common::{exec_datafusion_err, exec_err, Result, ScalarValue}; use datafusion_expr::{ColumnarValue, Volatility}; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; use sedona_geometry::types::GeometryTypeId; use sedona_geometry::wkb_header::WkbHeader; use sedona_proj::transform::with_global_proj_engine; use sedona_raster::affine_transformation::AffineMatrix; -use sedona_raster::traits::{nodata_bytes_to_f64_lossless, RasterRef}; +use sedona_raster::array::RasterStructArray; +use sedona_raster::traits::{nodata_bytes_to_f64_lossless, NdBuffer, RasterRef}; use sedona_schema::crs::CrsRef; use sedona_schema::{datatypes::SedonaType, matchers::ArgMatcher}; @@ -91,6 +92,16 @@ impl SedonaScalarKernel for RsValuePoint { arg_types: &[SedonaType], args: &[ColumnarValue], ) -> Result { + // Fast path: a constant (scalar) raster sampled with the default band + // lets us resolve the band buffer, affine transform, and CRS once for + // the whole batch instead of per point. This is the common + // RS_Value(raster_expr, point_column) shape. + if !self.with_band { + if let ColumnarValue::Scalar(ScalarValue::Struct(raster_struct)) = &args[0] { + return self.invoke_scalar_default_band(arg_types, args, raster_struct.as_ref()); + } + } + let executor = RasterExecutor::new(arg_types, args); let num_iterations = executor.num_iterations(); let mut builder = Float64Builder::with_capacity(num_iterations); @@ -174,6 +185,101 @@ impl SedonaScalarKernel for RsValuePoint { } } +impl RsValuePoint { + /// Optimized path for a constant (scalar) raster sampled with the default + /// band: the band buffer, affine transform, and raster CRS are resolved + /// once for the whole batch, then a selection vector drives a tight sample + /// loop. Behaviour matches the general path exactly. + fn invoke_scalar_default_band( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + raster_struct: &StructArray, + ) -> Result { + let executor = RasterExecutor::new(arg_types, args); + let n = executor.num_iterations(); + + let rasters = RasterStructArray::try_new(raster_struct)?; + if rasters.is_null(0) { + // A NULL raster makes every output NULL. + return executor.finish(Arc::new(Float64Array::from(vec![None; n]))); + } + let raster = rasters.get(0)?; + + // Resolve the default band (1) and its buffer/nodata once. + let band = raster + .bands() + .band(1) + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + if !band.is_spatial_2d() { + return exec_err!("RS_Value supports 2-D rasters only; band is not a 2-D (y, x) grid"); + } + let buffer = band + .nd_buffer() + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + let nodata = band + .nodata_as_f64() + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + + // Affine transform and raster CRS, resolved once for all points. + let affine = AffineMatrix::from_metadata(&raster.metadata()); + let raster_crs = resolve_crs(raster.crs())?; + + let mut geom = executor.make_geom_wkb_crs_accessor(1)?; + + // Phase 1 — selection vector: collect (row, x, y) for the points worth + // sampling (non-null, non-empty Point), reprojected into the raster CRS. + // Null/empty points are left out and stay NULL in the output. + let mut selection: Vec<(usize, f64, f64)> = Vec::with_capacity(n); + with_global_proj_engine(|engine| { + for i in 0..n { + let (wkb_opt, point_crs) = geom.get(i)?; + let Some(point_wkb) = wkb_opt else { + continue; + }; + let header = WkbHeader::try_new(point_wkb) + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + if header + .geometry_type_id() + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? + != GeometryTypeId::Point + { + return exec_err!("RS_Value expects a Point geometry"); + } + if header + .is_empty() + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? + { + continue; + } + let (x, y) = + match reproject_point(point_wkb, point_crs, raster_crs.as_deref(), engine)? { + Some(reprojected) => WkbHeader::try_new(&reprojected) + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? + .first_xy(), + None => header.first_xy(), + }; + selection.push((i, x, y)); + } + Ok(()) + })?; + + // Phase 2 — sample each selected point from the hoisted band buffer. + let mut out: Vec> = vec![None; n]; + for (i, x, y) in selection { + let (raster_x, raster_y) = affine + .inv_transform(x, y) + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + // Saturating `f64 -> i64` cast (never panics); the bounds check in + // `read_pixel` rejects NaN/±∞ indices as NULL. + let (col, row) = (raster_x.floor() as i64, raster_y.floor() as i64); + out[i] = read_pixel(&buffer, nodata, col, row)?; + } + + executor.finish(Arc::new(Float64Array::from(out))) + } +} + /// Materialise an integer argument as an owned `Int32` [`ArrayRef`] for the /// batch. Callers keep the returned `ArrayRef` alive and borrow a typed /// `&Int32Array` view from it (via `as_int32_array`) rather than cloning the @@ -256,6 +362,17 @@ fn sample_pixel( let buffer = band .nd_buffer() .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + let nodata = band + .nodata_as_f64() + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + read_pixel(&buffer, nodata, col, row) +} + +/// Read pixel `(col, row)` from an already-resolved band buffer and nodata +/// value. Returns `None` for an out-of-bounds pixel or one that equals nodata. +/// Hoisting the band resolution out of this function lets callers sample many +/// points from one buffer without re-resolving per point. +fn read_pixel(buffer: &NdBuffer, nodata: Option, col: i64, row: i64) -> Result> { let (height, width) = (buffer.shape[0], buffer.shape[1]); if row < 0 || row >= height || col < 0 || col >= width { return Ok(None); @@ -290,10 +407,7 @@ fn sample_pixel( let value = nodata_bytes_to_f64_lossless(bytes, &buffer.data_type) .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; - if let Some(nodata) = band - .nodata_as_f64() - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? - { + if let Some(nodata) = nodata { if value == nodata || (value.is_nan() && nodata.is_nan()) { return Ok(None); } @@ -530,4 +644,37 @@ mod tests { let arr = result.as_any().downcast_ref::().unwrap(); assert_eq!(arr.value(0), 1.0); } + + #[test] + fn scalar_raster_samples_via_fast_path() { + // A constant (scalar) raster with the default band takes the optimized + // hoisted path; verify it samples the right pixel and rejects + // out-of-bounds, matching the general (array-raster) path. + let udf: ScalarUDF = rs_value_udf().into(); + let geom_type = SedonaType::Wkb(Edges::Planar, lnglat()); + let tester = ScalarUdfTester::new(udf, vec![RASTER, geom_type.clone()]); + + // North-up 2x2 raster, origin (0, 10), 1x1 pixels; row-major [1,2 / 3,4]. + let raster = RasterSpec::d2(2, 2) + .band_values(&[1u8, 2, 3, 4]) + .transform([0.0, 1.0, 0.0, 10.0, 0.0, -1.0]) + .build(); + + let sample = |wkt: &str| -> Option { + let geoms = create_geom_array(&[Some(wkt)], &geom_type); + let result = tester + .invoke(vec![ + ColumnarValue::Scalar(ScalarValue::Struct(Arc::new(raster.clone()))), + ColumnarValue::Array(geoms), + ]) + .unwrap(); + let arr = result.into_array(1).unwrap(); + let arr = arr.as_any().downcast_ref::().unwrap(); + (!arr.is_null(0)).then(|| arr.value(0)) + }; + + assert_eq!(sample("POINT (0.5 9.5)"), Some(1.0)); // pixel (0, 0) + assert_eq!(sample("POINT (1.5 8.5)"), Some(4.0)); // pixel (1, 1) + assert_eq!(sample("POINT (100 100)"), None); // outside the footprint + } } From 450d9dbb7c052af8d152cf18f2e8790aca8ae62a Mon Sep 17 00:00:00 2001 From: jameswillis Date: Tue, 23 Jun 2026 15:25:28 -0700 Subject: [PATCH 11/18] perf(rust/sedona-raster-functions): hoist RS_Value CRS-transform decision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolve once whether a column-level point CRS needs reprojecting into the raster CRS (column_reproject_decision), then skip the per-point reproject_point call — and its crs_equals, whose to_authority_code() allocates a String every point. Profiling put that at ~15 ns/point (~37% of the equal-CRS sample path), uniform across CRS types (lng/lat, EPSG:4326, EPSG:3857). A unit test verifies the equal-CRS short-circuit actually fires for the lng/lat case. --- rust/sedona-raster-functions/src/rs_value.rs | 85 +++++++++++++++++++- 1 file changed, 83 insertions(+), 2 deletions(-) diff --git a/rust/sedona-raster-functions/src/rs_value.rs b/rust/sedona-raster-functions/src/rs_value.rs index 60f76225d..48aff711b 100644 --- a/rust/sedona-raster-functions/src/rs_value.rs +++ b/rust/sedona-raster-functions/src/rs_value.rs @@ -224,6 +224,10 @@ impl RsValuePoint { // Affine transform and raster CRS, resolved once for all points. let affine = AffineMatrix::from_metadata(&raster.metadata()); let raster_crs = resolve_crs(raster.crs())?; + // Decide reprojection once when the point CRS is column-level (the common + // case). This skips a per-point `crs_equals`, which allocates a String — + // ~15 ns/point, uniform across CRS types. + let needs_reproject = column_reproject_decision(&arg_types[1], raster_crs.as_deref())?; let mut geom = executor.make_geom_wkb_crs_accessor(1)?; @@ -252,13 +256,18 @@ impl RsValuePoint { { continue; } - let (x, y) = + // Skip the per-point reproject (and its crs_equals) when the + // column-level CRSes already match; otherwise reproject per point. + let (x, y) = if needs_reproject == Some(false) { + header.first_xy() + } else { match reproject_point(point_wkb, point_crs, raster_crs.as_deref(), engine)? { Some(reprojected) => WkbHeader::try_new(&reprojected) .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? .first_xy(), None => header.first_xy(), - }; + } + }; selection.push((i, x, y)); } Ok(()) @@ -336,6 +345,32 @@ fn reproject_point( } } +/// For a **column-level** point CRS, decide once whether sampling needs a +/// reprojection into the raster CRS: +/// - `Some(true)` — CRSes differ, reproject each point; +/// - `Some(false)` — CRSes match (or both absent), sample original coordinates; +/// - `None` — the point CRS is carried per row, so decide per point. +/// +/// Errors when exactly one side carries a CRS. Hoisting this out of the per-point +/// loop avoids a per-point `crs_equals`, whose `to_authority_code()` allocates a +/// `String` every call (~15 ns/point, uniform across CRS types). +fn column_reproject_decision( + point_type: &SedonaType, + raster_crs: CrsRef<'_>, +) -> Result> { + let point_crs = match point_type { + SedonaType::Wkb(_, c) | SedonaType::WkbView(_, c) => c.as_deref(), + // A per-item CRS varies by row; the caller decides per point. + _ => return Ok(None), + }; + match (point_crs, raster_crs) { + (Some(p), Some(r)) => Ok(Some(!p.crs_equals(r))), + (None, None) => Ok(Some(false)), + (Some(_), None) => exec_err!("RS_Value: point has a CRS but the raster does not"), + (None, Some(_)) => exec_err!("RS_Value: raster has a CRS but the point does not"), + } +} + /// Sample band `band_num` (1-based) at 0-based pixel `(col, row)` as `f64`. /// /// Returns `None` when the pixel is out of bounds or equals the band's nodata. @@ -677,4 +712,50 @@ mod tests { assert_eq!(sample("POINT (1.5 8.5)"), Some(4.0)); // pixel (1, 1) assert_eq!(sample("POINT (100 100)"), None); // outside the footprint } + + #[test] + fn crs_decision_equal_crs_skips_reproject() { + // The common case: a lng/lat point CRS and a lng/lat raster are detected + // as equal, so the per-point reproject (and its crs_equals) is skipped. + // This is what the B1 hoist relies on — if it returned Some(true) the + // optimization would silently no-op. + let raster = RasterSpec::d2(2, 2).band(BandDataType::UInt8).build(); // default lng/lat + let rasters = RasterStructArray::try_new(&raster).unwrap(); + let raster_crs = resolve_crs(rasters.get(0).unwrap().crs()).unwrap(); + let point_type = SedonaType::Wkb(Edges::Planar, lnglat()); + assert_eq!( + column_reproject_decision(&point_type, raster_crs.as_deref()).unwrap(), + Some(false), + "lng/lat point + lng/lat raster must be detected as equal (skip reproject)" + ); + } + + #[test] + fn crs_decision_differing_crs_reprojects() { + use sedona_schema::crs::deserialize_crs; + let raster = RasterSpec::d2(2, 2) + .crs(Some("EPSG:4326")) + .band(BandDataType::UInt8) + .build(); + let rasters = RasterStructArray::try_new(&raster).unwrap(); + let raster_crs = resolve_crs(rasters.get(0).unwrap().crs()).unwrap(); + let point_type = SedonaType::Wkb(Edges::Planar, deserialize_crs("EPSG:3857").unwrap()); + assert_eq!( + column_reproject_decision(&point_type, raster_crs.as_deref()).unwrap(), + Some(true), + "EPSG:3857 point + EPSG:4326 raster must require reprojection" + ); + } + + #[test] + fn crs_decision_one_sided_crs_errors() { + let raster = RasterSpec::d2(2, 2) + .crs(None) + .band(BandDataType::UInt8) + .build(); + let rasters = RasterStructArray::try_new(&raster).unwrap(); + let raster_crs = resolve_crs(rasters.get(0).unwrap().crs()).unwrap(); + let point_type = SedonaType::Wkb(Edges::Planar, lnglat()); + assert!(column_reproject_decision(&point_type, raster_crs.as_deref()).is_err()); + } } From 4ea333ab4975d9a92353fb9dbf8f329b23f51c6b Mon Sep 17 00:00:00 2001 From: jameswillis Date: Tue, 23 Jun 2026 16:09:39 -0700 Subject: [PATCH 12/18] perf(rust/sedona-raster-functions): parse Point coords with a fixed-offset reader in RS_Value Add read_point_xy to sedona-geometry: a Point-only WKB reader that pulls the leading (x, y) from the fixed coordinate offset instead of running the general header parser. RS_Value now uses it on both the scalar fast path and the general batch path, preserving non-Point -> error and POINT EMPTY -> NULL semantics. The WKB parse was the dominant per-point cost after band/affine/CRS hoisting; the scalar (one-raster, many-points) benchmark improves ~56%, the array cases ~12%. --- rust/sedona-geometry/src/wkb_header.rs | 169 +++++++++++++++++++ rust/sedona-raster-functions/src/rs_value.rs | 64 +++---- 2 files changed, 191 insertions(+), 42 deletions(-) diff --git a/rust/sedona-geometry/src/wkb_header.rs b/rust/sedona-geometry/src/wkb_header.rs index b1cdfcac0..792b47f85 100644 --- a/rust/sedona-geometry/src/wkb_header.rs +++ b/rust/sedona-geometry/src/wkb_header.rs @@ -152,6 +152,77 @@ impl WkbHeader { } } +/// Read the `(x, y)` of a WKB Point without parsing the geometry generally. +/// +/// This is a fast path for the common case of sampling/indexing by a single +/// point: a Point WKB has a fixed layout (`byte order`, `type code`, optional +/// SRID, then the coordinate), so the coordinate is at a known offset and can +/// be read with a couple of fixed-offset loads. It deliberately handles only +/// Points — any other geometry is an error. +/// +/// Returns `Ok(None)` for `POINT EMPTY` (encoded as `NaN`/`NaN`), `Ok(Some((x, y)))` +/// for a finite point, and an error if the bytes are not a Point or are truncated. +/// Accepts both ISO (with Z/M dimension flags) and EWKB (with Z/M/SRID flags) +/// encodings; only the first two ordinates are read. +pub fn read_point_xy(buf: &[u8]) -> Result, SedonaGeometryError> { + let little_endian = match buf.first() { + Some(1) => true, + Some(0) => false, + _ => { + return Err(SedonaGeometryError::Invalid( + "WKB: missing or invalid byte order".to_string(), + )) + } + }; + + let read_u32 = |offset: usize| -> Result { + let bytes: [u8; 4] = buf + .get(offset..offset + 4) + .ok_or_else(|| SedonaGeometryError::Invalid("WKB: truncated header".to_string()))? + .try_into() + .unwrap(); + Ok(if little_endian { + u32::from_le_bytes(bytes) + } else { + u32::from_be_bytes(bytes) + }) + }; + + let read_f64 = |offset: usize| -> Result { + let bytes: [u8; 8] = buf + .get(offset..offset + 8) + .ok_or_else(|| SedonaGeometryError::Invalid("WKB: truncated coordinate".to_string()))? + .try_into() + .unwrap(); + Ok(if little_endian { + f64::from_le_bytes(bytes) + } else { + f64::from_be_bytes(bytes) + }) + }; + + let type_code = read_u32(1)?; + // The low three bits of the base type identify a Point (1) regardless of the + // ISO dimension digits or EWKB high-bit flags. + if type_code & 0x7 != 1 { + return Err(SedonaGeometryError::Invalid(format!( + "WKB: expected a Point (type code {type_code})" + ))); + } + + // EWKB carries a 4-byte SRID between the type code and the coordinate. + let has_srid = type_code & 0x2000_0000 != 0; + let coord_offset = if has_srid { 9 } else { 5 }; + + let x = read_f64(coord_offset)?; + let y = read_f64(coord_offset + 8)?; + if x.is_nan() && y.is_nan() { + Ok(None) + } else { + Ok(Some((x, y))) + } +} + // A helper struct for calculating the WKBHeader struct WkbBuffer<'a> { buf: &'a [u8], @@ -1059,4 +1130,102 @@ mod tests { let header = WkbHeader::try_new(&wkb).unwrap(); assert!(header.is_empty().unwrap()); } + + #[test] + fn read_point_xy_iso_little_endian() { + // 2-D / 3-D / 4-D ISO points all expose the same leading (x, y), and the + // result must agree with the general parser's `first_xy`. + for wkt_value in [ + "POINT (1 2)", + "POINT Z (1 2 3)", + "POINT M (1 2 3)", + "POINT ZM (1 2 3 4)", + ] { + let wkb = make_wkb(wkt_value); + assert_eq!( + read_point_xy(&wkb).unwrap(), + Some((1.0, 2.0)), + "{wkt_value}" + ); + assert_eq!( + read_point_xy(&wkb).unwrap(), + Some(WkbHeader::try_new(&wkb).unwrap().first_xy()), + "disagrees with first_xy for {wkt_value}" + ); + } + } + + #[test] + fn read_point_xy_big_endian() { + // Byte order 0x00, type 1 (BE), x = 1.0, y = 2.0 — all big-endian. + let wkb: [u8; 21] = [ + 0x00, // big-endian + 0x00, 0x00, 0x00, 0x01, // type code 1 (Point) + 0x3F, 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // x = 1.0 + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // y = 2.0 + ]; + assert_eq!(read_point_xy(&wkb).unwrap(), Some((1.0, 2.0))); + } + + #[test] + fn read_point_xy_ewkb_with_srid() { + use sedona_testing::fixtures::*; + + // The SRID sits between the type code and the coordinate; the reader must + // skip it for every dimensionality. + for wkb in [ + &POINT_WITH_SRID_4326_EWKB[..], + &POINT_Z_WITH_SRID_3857_EWKB[..], + &POINT_M_WITH_SRID_4326_EWKB[..], + &POINT_ZM_WITH_SRID_4326_EWKB[..], + ] { + assert_eq!(read_point_xy(wkb).unwrap(), Some((1.0, 2.0))); + assert_eq!( + read_point_xy(wkb).unwrap(), + Some(WkbHeader::try_new(wkb).unwrap().first_xy()) + ); + } + } + + #[test] + fn read_point_xy_empty_is_none() { + // POINT EMPTY is encoded as NaN/NaN and reads back as "no coordinate". + assert_eq!(read_point_xy(&make_wkb("POINT EMPTY")).unwrap(), None); + assert_eq!(read_point_xy(&make_wkb("POINT Z EMPTY")).unwrap(), None); + + use sedona_testing::fixtures::*; + assert_eq!( + read_point_xy(&POINT_EMPTY_WITH_SRID_4326_EWKB).unwrap(), + None + ); + } + + #[test] + fn read_point_xy_rejects_non_point() { + for wkt_value in [ + "LINESTRING (1 2, 3 4)", + "POLYGON ((0 0, 0 1, 1 0, 0 0))", + "MULTIPOINT ((1 2))", + "GEOMETRYCOLLECTION (POINT (1 2))", + ] { + let err = read_point_xy(&make_wkb(wkt_value)).unwrap_err().to_string(); + assert!(err.contains("expected a Point"), "{wkt_value}: {err}"); + } + } + + #[test] + fn read_point_xy_rejects_truncated() { + let wkb = make_wkb("POINT (1 2)"); + // Empty input has no byte order; everything shorter than a full 2-D point + // header + coordinate must error rather than read past the end. + assert!(read_point_xy(&[]).is_err()); + for i in 1..wkb.len() { + assert!( + read_point_xy(&wkb[0..i]).is_err(), + "0..{i} unexpectedly succeeded" + ); + } + // Invalid byte-order flag. + assert!(read_point_xy(&[0x02, 0, 0, 0, 1]).is_err()); + } } diff --git a/rust/sedona-raster-functions/src/rs_value.rs b/rust/sedona-raster-functions/src/rs_value.rs index 48aff711b..074a2e941 100644 --- a/rust/sedona-raster-functions/src/rs_value.rs +++ b/rust/sedona-raster-functions/src/rs_value.rs @@ -40,8 +40,7 @@ use datafusion_common::cast::as_int32_array; use datafusion_common::{exec_datafusion_err, exec_err, Result, ScalarValue}; use datafusion_expr::{ColumnarValue, Volatility}; use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}; -use sedona_geometry::types::GeometryTypeId; -use sedona_geometry::wkb_header::WkbHeader; +use sedona_geometry::wkb_header::read_point_xy; use sedona_proj::transform::with_global_proj_engine; use sedona_raster::affine_transformation::AffineMatrix; use sedona_raster::array::RasterStructArray; @@ -131,25 +130,15 @@ impl SedonaScalarKernel for RsValuePoint { } }; - // Header-only parse: reject non-points up front, and treat + // Point-only fast parse: reject non-points up front, and treat // POINT EMPTY (NaN coords) as "no location to sample" -> NULL, // before doing any reprojection work. - let header = WkbHeader::try_new(point_wkb) - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; - if header - .geometry_type_id() - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? - != GeometryTypeId::Point - { - return exec_err!("RS_Value expects a Point geometry"); - } - if header - .is_empty() - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? - { + let Some((px, py)) = + read_point_xy(point_wkb).map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? + else { builder.append_null(); return Ok(()); - } + }; // Bring the point into the raster's CRS. A reprojection only // happens when both sides carry a (differing) CRS; otherwise @@ -157,10 +146,12 @@ impl SedonaScalarKernel for RsValuePoint { let raster_crs = resolve_crs(raster.crs())?; let (x, y) = match reproject_point(point_wkb, point_crs, raster_crs.as_deref(), engine)? { - Some(reprojected) => WkbHeader::try_new(&reprojected) + Some(reprojected) => read_point_xy(&reprojected) .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? - .first_xy(), - None => header.first_xy(), + .ok_or_else(|| { + exec_datafusion_err!("RS_Value: reprojected point is empty") + })?, + None => (px, py), }; // Floor (not truncate toward zero) so a point just outside the // top/left edge maps to a negative index and is rejected as out @@ -241,31 +232,23 @@ impl RsValuePoint { let Some(point_wkb) = wkb_opt else { continue; }; - let header = WkbHeader::try_new(point_wkb) - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; - if header - .geometry_type_id() - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? - != GeometryTypeId::Point - { - return exec_err!("RS_Value expects a Point geometry"); - } - if header - .is_empty() - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? - { + let Some((px, py)) = + read_point_xy(point_wkb).map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? + else { continue; - } + }; // Skip the per-point reproject (and its crs_equals) when the // column-level CRSes already match; otherwise reproject per point. let (x, y) = if needs_reproject == Some(false) { - header.first_xy() + (px, py) } else { match reproject_point(point_wkb, point_crs, raster_crs.as_deref(), engine)? { - Some(reprojected) => WkbHeader::try_new(&reprojected) + Some(reprojected) => read_point_xy(&reprojected) .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? - .first_xy(), - None => header.first_xy(), + .ok_or_else(|| { + exec_datafusion_err!("RS_Value: reprojected point is empty") + })?, + None => (px, py), } }; selection.push((i, x, y)); @@ -623,10 +606,7 @@ mod tests { .invoke_arrays(vec![Arc::new(rasters), geoms]) .unwrap_err() .to_string(); - assert!( - err.contains("expects a Point geometry"), - "unexpected error: {err}" - ); + assert!(err.contains("expected a Point"), "unexpected error: {err}"); } #[test] From e251419bbe2d543880fef47872dcfaac183e72c0 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Tue, 23 Jun 2026 16:58:24 -0700 Subject: [PATCH 13/18] refactor(rust/sedona-raster-functions): dedup RS_Value point sampling, fix edge cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review feedback on the RS_Value fast path: - Extract resolve_point_xy / xy_to_pixel shared by the scalar fast path and the general batch path, so the point->pixel logic is identical by construction instead of by hand. - Defer band/2-D validation in the scalar path until at least one point needs sampling, so an all-null/all-empty batch returns NULL rather than erroring on an unsupported raster — matching the general path. - Treat a non-finite point ordinate (NaN/Inf) as no location to sample (NULL), rather than letting a NaN index saturate to pixel column 0. - Reuse SRID_FLAG_BIT in read_point_xy instead of a magic literal. - Move geo-traits back to dev-dependencies (only used under #[cfg(test)]). --- rust/sedona-geometry/src/wkb_header.rs | 2 +- rust/sedona-raster-functions/Cargo.toml | 2 +- rust/sedona-raster-functions/src/rs_value.rs | 241 +++++++++++++------ 3 files changed, 166 insertions(+), 79 deletions(-) diff --git a/rust/sedona-geometry/src/wkb_header.rs b/rust/sedona-geometry/src/wkb_header.rs index 792b47f85..24b36730f 100644 --- a/rust/sedona-geometry/src/wkb_header.rs +++ b/rust/sedona-geometry/src/wkb_header.rs @@ -211,7 +211,7 @@ pub fn read_point_xy(buf: &[u8]) -> Result, SedonaGeometryErr } // EWKB carries a 4-byte SRID between the type code and the coordinate. - let has_srid = type_code & 0x2000_0000 != 0; + let has_srid = type_code & SRID_FLAG_BIT != 0; let coord_offset = if has_srid { 9 } else { 5 }; let x = read_f64(coord_offset)?; diff --git a/rust/sedona-raster-functions/Cargo.toml b/rust/sedona-raster-functions/Cargo.toml index a8ebccf3e..7d65c7a03 100644 --- a/rust/sedona-raster-functions/Cargo.toml +++ b/rust/sedona-raster-functions/Cargo.toml @@ -37,7 +37,6 @@ arrow-buffer = { workspace = true } async-trait = { workspace = true } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } -geo-traits = { workspace = true } sedona-common = { workspace = true } sedona-expr = { workspace = true } sedona-geometry = { workspace = true } @@ -50,6 +49,7 @@ wkb = { workspace = true } [dev-dependencies] criterion = { workspace = true } +geo-traits = { workspace = true } sedona-testing = { workspace = true, features = ["criterion"] } sedona-proj = { workspace = true, features = ["proj-sys"] } rstest = { workspace = true } diff --git a/rust/sedona-raster-functions/src/rs_value.rs b/rust/sedona-raster-functions/src/rs_value.rs index 074a2e941..b0f2c7db9 100644 --- a/rust/sedona-raster-functions/src/rs_value.rs +++ b/rust/sedona-raster-functions/src/rs_value.rs @@ -130,42 +130,22 @@ impl SedonaScalarKernel for RsValuePoint { } }; - // Point-only fast parse: reject non-points up front, and treat - // POINT EMPTY (NaN coords) as "no location to sample" -> NULL, - // before doing any reprojection work. - let Some((px, py)) = - read_point_xy(point_wkb).map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? + // Parse the point and bring it into the raster's CRS. Null/empty + // points (and non-finite coordinates) have no location to sample. + let raster_crs = resolve_crs(raster.crs())?; + let Some((x, y)) = + resolve_point_xy(point_wkb, point_crs, raster_crs.as_deref(), false, engine)? else { builder.append_null(); return Ok(()); }; - // Bring the point into the raster's CRS. A reprojection only - // happens when both sides carry a (differing) CRS; otherwise - // the original coordinates are sampled directly. - let raster_crs = resolve_crs(raster.crs())?; - let (x, y) = - match reproject_point(point_wkb, point_crs, raster_crs.as_deref(), engine)? { - Some(reprojected) => read_point_xy(&reprojected) - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? - .ok_or_else(|| { - exec_datafusion_err!("RS_Value: reprojected point is empty") - })?, - None => (px, py), - }; - // Floor (not truncate toward zero) so a point just outside the - // top/left edge maps to a negative index and is rejected as out - // of bounds, rather than truncating to 0 and sampling an edge pixel. - let (raster_x, raster_y) = AffineMatrix::from_metadata(&raster.metadata()) - .inv_transform(x, y) - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; - // `f64 -> i64` is a saturating cast (it never panics): a NaN - // index becomes 0 and ±∞ saturate to the i64 bounds, all of - // which the bounds check in `sample_pixel` rejects as NULL. - let (col, row) = (raster_x.floor() as i64, raster_y.floor() as i64); - - match sample_pixel(raster, col, row, band_num)? { - Some(value) => builder.append_value(value), + let affine = AffineMatrix::from_metadata(&raster.metadata()); + match xy_to_pixel(&affine, x, y)? { + Some((col, row)) => match sample_pixel(raster, col, row, band_num)? { + Some(value) => builder.append_value(value), + None => builder.append_null(), + }, None => builder.append_null(), } Ok(()) @@ -178,9 +158,14 @@ impl SedonaScalarKernel for RsValuePoint { impl RsValuePoint { /// Optimized path for a constant (scalar) raster sampled with the default - /// band: the band buffer, affine transform, and raster CRS are resolved - /// once for the whole batch, then a selection vector drives a tight sample - /// loop. Behaviour matches the general path exactly. + /// band: the affine transform, raster CRS, and band buffer are resolved once + /// for the whole batch, then a selection vector drives a tight sample loop. + /// + /// Sampling behaviour matches the general path: it uses the same + /// [`resolve_point_xy`]/[`xy_to_pixel`] helpers, so null/empty/non-finite + /// points yield NULL identically. Band resolution (and its 2-D check) is + /// deferred until at least one point needs sampling, so an all-null/all-empty + /// batch returns NULL without touching the band — as the general path does. fn invoke_scalar_default_band( &self, arg_types: &[SedonaType], @@ -197,21 +182,6 @@ impl RsValuePoint { } let raster = rasters.get(0)?; - // Resolve the default band (1) and its buffer/nodata once. - let band = raster - .bands() - .band(1) - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; - if !band.is_spatial_2d() { - return exec_err!("RS_Value supports 2-D rasters only; band is not a 2-D (y, x) grid"); - } - let buffer = band - .nd_buffer() - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; - let nodata = band - .nodata_as_f64() - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; - // Affine transform and raster CRS, resolved once for all points. let affine = AffineMatrix::from_metadata(&raster.metadata()); let raster_crs = resolve_crs(raster.crs())?; @@ -219,6 +189,7 @@ impl RsValuePoint { // case). This skips a per-point `crs_equals`, which allocates a String — // ~15 ns/point, uniform across CRS types. let needs_reproject = column_reproject_decision(&arg_types[1], raster_crs.as_deref())?; + let skip_reproject = needs_reproject == Some(false); let mut geom = executor.make_geom_wkb_crs_accessor(1)?; @@ -232,40 +203,45 @@ impl RsValuePoint { let Some(point_wkb) = wkb_opt else { continue; }; - let Some((px, py)) = - read_point_xy(point_wkb).map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? - else { - continue; - }; - // Skip the per-point reproject (and its crs_equals) when the - // column-level CRSes already match; otherwise reproject per point. - let (x, y) = if needs_reproject == Some(false) { - (px, py) - } else { - match reproject_point(point_wkb, point_crs, raster_crs.as_deref(), engine)? { - Some(reprojected) => read_point_xy(&reprojected) - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? - .ok_or_else(|| { - exec_datafusion_err!("RS_Value: reprojected point is empty") - })?, - None => (px, py), - } - }; - selection.push((i, x, y)); + if let Some((x, y)) = resolve_point_xy( + point_wkb, + point_crs, + raster_crs.as_deref(), + skip_reproject, + engine, + )? { + selection.push((i, x, y)); + } } Ok(()) })?; - // Phase 2 — sample each selected point from the hoisted band buffer. let mut out: Vec> = vec![None; n]; + if selection.is_empty() { + return executor.finish(Arc::new(Float64Array::from(out))); + } + + // Resolve the default band (1) and its buffer/nodata once, now that we + // know at least one point needs sampling. + let band = raster + .bands() + .band(1) + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + if !band.is_spatial_2d() { + return exec_err!("RS_Value supports 2-D rasters only; band is not a 2-D (y, x) grid"); + } + let buffer = band + .nd_buffer() + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + let nodata = band + .nodata_as_f64() + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + + // Phase 2 — sample each selected point from the hoisted band buffer. for (i, x, y) in selection { - let (raster_x, raster_y) = affine - .inv_transform(x, y) - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; - // Saturating `f64 -> i64` cast (never panics); the bounds check in - // `read_pixel` rejects NaN/±∞ indices as NULL. - let (col, row) = (raster_x.floor() as i64, raster_y.floor() as i64); - out[i] = read_pixel(&buffer, nodata, col, row)?; + if let Some((col, row)) = xy_to_pixel(&affine, x, y)? { + out[i] = read_pixel(&buffer, nodata, col, row)?; + } } executor.finish(Arc::new(Float64Array::from(out))) @@ -354,6 +330,62 @@ fn column_reproject_decision( } } +/// Parse a Point WKB and return its `(x, y)` in the raster's CRS, or `None` when +/// there is nothing to sample (the point is empty — both ordinates NaN). +/// +/// `skip_reproject` is the hoisted column-level decision: when `true` the +/// original coordinates are returned without consulting [`reproject_point`] (and +/// its per-point `crs_equals`); when `false` reprojection is decided per point. +fn resolve_point_xy( + point_wkb: &[u8], + point_crs: CrsRef<'_>, + raster_crs: CrsRef<'_>, + skip_reproject: bool, + engine: &dyn sedona_geometry::transform::CrsEngine, +) -> Result> { + let Some((px, py)) = + read_point_xy(point_wkb).map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? + else { + return Ok(None); + }; + if skip_reproject { + return Ok(Some((px, py))); + } + // A reprojection only happens when both sides carry a (differing) CRS; + // otherwise the original coordinates are sampled directly. + match reproject_point(point_wkb, point_crs, raster_crs, engine)? { + Some(reprojected) => { + let xy = read_point_xy(&reprojected) + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))? + .ok_or_else(|| exec_datafusion_err!("RS_Value: reprojected point is empty"))?; + Ok(Some(xy)) + } + None => Ok(Some((px, py))), + } +} + +/// Map a coordinate in the raster's CRS to a 0-based `(col, row)` pixel index, +/// or `None` when the point has no location to sample. +/// +/// A non-finite coordinate (a Point with a NaN/Inf ordinate, e.g. `POINT(NaN 5)`) +/// returns `None`: without this guard a NaN would survive `inv_transform` and the +/// saturating `f64 -> i64` cast would turn it into 0 (in bounds), silently +/// sampling pixel column 0 rather than yielding NULL. +fn xy_to_pixel(affine: &AffineMatrix, x: f64, y: f64) -> Result> { + if !x.is_finite() || !y.is_finite() { + return Ok(None); + } + let (raster_x, raster_y) = affine + .inv_transform(x, y) + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + // Floor (not truncate toward zero) so a point just outside the top/left edge + // maps to a negative index and is rejected as out of bounds, rather than + // truncating to 0 and sampling an edge pixel. The `f64 -> i64` cast saturates + // (never panics); the bounds check in `read_pixel`/`sample_pixel` rejects an + // out-of-range index as NULL. + Ok(Some((raster_x.floor() as i64, raster_y.floor() as i64))) +} + /// Sample band `band_num` (1-based) at 0-based pixel `(col, row)` as `f64`. /// /// Returns `None` when the pixel is out of bounds or equals the band's nodata. @@ -693,6 +725,61 @@ mod tests { assert_eq!(sample("POINT (100 100)"), None); // outside the footprint } + #[test] + fn non_finite_point_ordinate_is_none() { + // A point with a NaN/Inf ordinate (e.g. POINT(NaN 5)) has no location to + // sample. Without the finite guard a NaN survives `inv_transform` and the + // saturating cast turns it into pixel column 0, silently sampling a value. + let raster = RasterSpec::d2(2, 2) + .band_values(&[1u8, 2, 3, 4]) + .transform([0.0, 1.0, 0.0, 10.0, 0.0, -1.0]) + .build(); + let rasters = RasterStructArray::try_new(&raster).unwrap(); + let affine = AffineMatrix::from_metadata(&rasters.get(0).unwrap().metadata()); + + assert_eq!(xy_to_pixel(&affine, f64::NAN, 5.0).unwrap(), None); + assert_eq!(xy_to_pixel(&affine, 5.0, f64::NAN).unwrap(), None); + assert_eq!(xy_to_pixel(&affine, f64::INFINITY, 5.0).unwrap(), None); + // A finite in-bounds point still maps to a real pixel. + assert_eq!(xy_to_pixel(&affine, 0.5, 9.5).unwrap(), Some((0, 0))); + } + + #[test] + fn scalar_all_null_points_defer_band_resolution() { + // The scalar fast path defers band/2-D validation until a point needs + // sampling: an all-null point column over an unsupported (non-2-D) raster + // returns all-NULL rather than erroring, matching the general path. A + // valid point over the same raster still surfaces the 2-D error. + let udf: ScalarUDF = rs_value_udf().into(); + let geom_type = SedonaType::Wkb(Edges::Planar, None); + let tester = ScalarUdfTester::new(udf, vec![RASTER, geom_type.clone()]); + let raster = RasterSpec::nd(&["time", "y", "x"], &[2, 2, 1]) + .band(BandDataType::UInt8) + .crs(None) + .build(); + + let invoke = |wkt: Option<&str>| { + let geoms = create_geom_array(&[wkt], &geom_type); + tester.invoke(vec![ + ColumnarValue::Scalar(ScalarValue::Struct(Arc::new(raster.clone()))), + ColumnarValue::Array(geoms), + ]) + }; + + // All-null point -> NULL, no band resolution, no error. + let result = invoke(None).unwrap(); + let arr = result.into_array(1).unwrap(); + let arr = arr.as_any().downcast_ref::().unwrap(); + assert!( + arr.is_null(0), + "all-null point over a non-2-D raster should be NULL" + ); + + // A real point forces band resolution, which rejects the non-2-D raster. + let err = invoke(Some("POINT (0 0)")).unwrap_err().to_string(); + assert!(err.contains("2-D"), "unexpected error: {err}"); + } + #[test] fn crs_decision_equal_crs_skips_reproject() { // The common case: a lng/lat point CRS and a lng/lat raster are detected From b9af45f2f74b79395f2b1d949650787e1df9fe75 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Tue, 23 Jun 2026 17:12:52 -0700 Subject: [PATCH 14/18] perf(rust/sedona-raster-functions): extend RS_Value scalar fast path to the band argument A constant (scalar) raster now takes the hoisted fast path regardless of the band argument, not just the 2-arg default-band case: - no band argument or a constant (scalar) band -> the band buffer is hoisted once, same as band 1; - a band column -> the band buffer is resolved per row (its NdBuffer borrows from the band and can't be cached across distinct bands), but the affine/CRS/reproject work is still hoisted out of the per-point loop. A NULL scalar band yields an all-NULL result. Selecting a different band no longer drops RS_Value(scalar_raster, point_col, band) onto the per-row path. --- rust/sedona-raster-functions/src/rs_value.rs | 234 +++++++++++++++---- 1 file changed, 195 insertions(+), 39 deletions(-) diff --git a/rust/sedona-raster-functions/src/rs_value.rs b/rust/sedona-raster-functions/src/rs_value.rs index b0f2c7db9..a265339d5 100644 --- a/rust/sedona-raster-functions/src/rs_value.rs +++ b/rust/sedona-raster-functions/src/rs_value.rs @@ -34,7 +34,7 @@ use std::sync::Arc; -use arrow_array::{builder::Float64Builder, ArrayRef, Float64Array, StructArray}; +use arrow_array::{builder::Float64Builder, Array, ArrayRef, Float64Array, StructArray}; use arrow_schema::DataType; use datafusion_common::cast::as_int32_array; use datafusion_common::{exec_datafusion_err, exec_err, Result, ScalarValue}; @@ -91,14 +91,13 @@ impl SedonaScalarKernel for RsValuePoint { arg_types: &[SedonaType], args: &[ColumnarValue], ) -> Result { - // Fast path: a constant (scalar) raster sampled with the default band - // lets us resolve the band buffer, affine transform, and CRS once for - // the whole batch instead of per point. This is the common - // RS_Value(raster_expr, point_column) shape. - if !self.with_band { - if let ColumnarValue::Scalar(ScalarValue::Struct(raster_struct)) = &args[0] { - return self.invoke_scalar_default_band(arg_types, args, raster_struct.as_ref()); - } + // Fast path: a constant (scalar) raster lets us resolve the affine + // transform and CRS once for the whole batch instead of per point — the + // common RS_Value(raster_expr, point_column[, band]) shape. The band + // argument does not change this: a constant band also hoists its buffer, + // and only a band *column* falls back to per-row band resolution. + if let ColumnarValue::Scalar(ScalarValue::Struct(raster_struct)) = &args[0] { + return self.invoke_scalar_raster(arg_types, args, raster_struct.as_ref()); } let executor = RasterExecutor::new(arg_types, args); @@ -157,16 +156,20 @@ impl SedonaScalarKernel for RsValuePoint { } impl RsValuePoint { - /// Optimized path for a constant (scalar) raster sampled with the default - /// band: the affine transform, raster CRS, and band buffer are resolved once - /// for the whole batch, then a selection vector drives a tight sample loop. + /// Optimized path for a constant (scalar) raster: the affine transform and + /// raster CRS are resolved once for the whole batch, then a selection vector + /// drives a tight sample loop. This serves every band shape: + /// - no band argument or a constant band → the band buffer is hoisted too; + /// - a band *column* → the band buffer is resolved per row (its `NdBuffer` + /// borrows from the band, which can't be cached across distinct bands), + /// but the affine/CRS/reproject work is still hoisted. /// /// Sampling behaviour matches the general path: it uses the same /// [`resolve_point_xy`]/[`xy_to_pixel`] helpers, so null/empty/non-finite /// points yield NULL identically. Band resolution (and its 2-D check) is /// deferred until at least one point needs sampling, so an all-null/all-empty /// batch returns NULL without touching the band — as the general path does. - fn invoke_scalar_default_band( + fn invoke_scalar_raster( &self, arg_types: &[SedonaType], args: &[ColumnarValue], @@ -182,6 +185,35 @@ impl RsValuePoint { } let raster = rasters.get(0)?; + // Band selection: a missing band argument (default 1) or a scalar band is + // constant for the batch and lets us hoist the band buffer; a band column + // is resolved per row. A NULL scalar band makes every output NULL. + let mut const_band: Option = None; + let mut band_values: Option = None; + if self.with_band { + match &args[2] { + ColumnarValue::Scalar(scalar) => { + let arr = ColumnarValue::Scalar(scalar.clone()) + .cast_to(&DataType::Int32, None)? + .into_array(1)?; + let arr = as_int32_array(&arr)?; + if arr.is_null(0) { + return executor.finish(Arc::new(Float64Array::from(vec![None; n]))); + } + // Match `next_band`: clamp to 0 so band 0/negative surface as a + // not-1-based error from `Bands::band` rather than being coerced. + const_band = Some(arr.value(0).max(0) as usize); + } + other => band_values = Some(int32_array_arg(other, n)?), + } + } else { + const_band = Some(1); + } + let band_array = band_values + .as_ref() + .map(|a| as_int32_array(a)) + .transpose()?; + // Affine transform and raster CRS, resolved once for all points. let affine = AffineMatrix::from_metadata(&raster.metadata()); let raster_crs = resolve_crs(raster.crs())?; @@ -193,12 +225,22 @@ impl RsValuePoint { let mut geom = executor.make_geom_wkb_crs_accessor(1)?; - // Phase 1 — selection vector: collect (row, x, y) for the points worth - // sampling (non-null, non-empty Point), reprojected into the raster CRS. - // Null/empty points are left out and stay NULL in the output. - let mut selection: Vec<(usize, f64, f64)> = Vec::with_capacity(n); + // Phase 1 — selection vector: collect (row, x, y, band) for the points + // worth sampling (non-null, non-empty Point with a non-null band), + // reprojected into the raster CRS. Skipped rows stay NULL in the output. + let mut selection: Vec<(usize, f64, f64, usize)> = Vec::with_capacity(n); + let mut band_iter = band_array.map(|a| a.iter()); with_global_proj_engine(|engine| { for i in 0..n { + // Advance the band column in lockstep with the row index; a NULL + // band element leaves the row NULL (matching the general path). + let band_num = match const_band { + Some(b) => b, + None => match next_band(&mut band_iter) { + Some(b) => b, + None => continue, + }, + }; let (wkb_opt, point_crs) = geom.get(i)?; let Some(point_wkb) = wkb_opt else { continue; @@ -210,7 +252,7 @@ impl RsValuePoint { skip_reproject, engine, )? { - selection.push((i, x, y)); + selection.push((i, x, y, band_num)); } } Ok(()) @@ -221,26 +263,37 @@ impl RsValuePoint { return executor.finish(Arc::new(Float64Array::from(out))); } - // Resolve the default band (1) and its buffer/nodata once, now that we - // know at least one point needs sampling. - let band = raster - .bands() - .band(1) - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; - if !band.is_spatial_2d() { - return exec_err!("RS_Value supports 2-D rasters only; band is not a 2-D (y, x) grid"); - } - let buffer = band - .nd_buffer() - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; - let nodata = band - .nodata_as_f64() - .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; - - // Phase 2 — sample each selected point from the hoisted band buffer. - for (i, x, y) in selection { - if let Some((col, row)) = xy_to_pixel(&affine, x, y)? { - out[i] = read_pixel(&buffer, nodata, col, row)?; + // Phase 2 — sample. A constant band resolves its buffer/nodata once (now + // that we know a point needs sampling); a band column resolves per row. + match const_band { + Some(band_num) => { + let band = raster + .bands() + .band(band_num) + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + if !band.is_spatial_2d() { + return exec_err!( + "RS_Value supports 2-D rasters only; band is not a 2-D (y, x) grid" + ); + } + let buffer = band + .nd_buffer() + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + let nodata = band + .nodata_as_f64() + .map_err(|e| exec_datafusion_err!("RS_Value: {e}"))?; + for (i, x, y, _band) in selection { + if let Some((col, row)) = xy_to_pixel(&affine, x, y)? { + out[i] = read_pixel(&buffer, nodata, col, row)?; + } + } + } + None => { + for (i, x, y, band_num) in selection { + if let Some((col, row)) = xy_to_pixel(&affine, x, y)? { + out[i] = sample_pixel(&raster, col, row, band_num)?; + } + } } } @@ -469,7 +522,7 @@ fn read_pixel(buffer: &NdBuffer, nodata: Option, col: i64, row: i64) -> Res #[cfg(test)] mod tests { use super::*; - use arrow_array::{Array, Float64Array}; + use arrow_array::{Array, Float64Array, Int32Array}; use datafusion_expr::ScalarUDF; use sedona_raster::array::RasterStructArray; use sedona_schema::crs::lnglat; @@ -725,6 +778,109 @@ mod tests { assert_eq!(sample("POINT (100 100)"), None); // outside the footprint } + #[test] + fn scalar_raster_constant_band_uses_fast_path() { + // A scalar raster with a constant (scalar) band argument takes the same + // hoisted fast path as the 2-arg default-band case, just on that band. + let udf: ScalarUDF = rs_value_udf().into(); + let geom_type = SedonaType::Wkb(Edges::Planar, lnglat()); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + geom_type.clone(), + SedonaType::Arrow(DataType::Int32), + ], + ); + // North-up 2x2, two bands: band 1 [1,2,3,4], band 2 [10,20,30,40]. + let raster = RasterSpec::d2(2, 2) + .band_values(&[1u8, 2, 3, 4]) + .band_values(&[10u8, 20, 30, 40]) + .transform([0.0, 1.0, 0.0, 10.0, 0.0, -1.0]) + .build(); + let geoms = create_geom_array(&[Some("POINT (0.5 9.5)")], &geom_type); // pixel (0, 0) + let result = tester + .invoke(vec![ + ColumnarValue::Scalar(ScalarValue::Struct(Arc::new(raster))), + ColumnarValue::Array(geoms), + ColumnarValue::Scalar(ScalarValue::Int32(Some(2))), + ]) + .unwrap(); + let arr = result.into_array(1).unwrap(); + let arr = arr.as_any().downcast_ref::().unwrap(); + assert_eq!(arr.value(0), 10.0); // band 2, pixel (0, 0) + } + + #[test] + fn scalar_raster_band_column_resolves_per_row() { + // A scalar raster with a band *column* still hoists affine/CRS/reproject, + // but resolves the band buffer per row. A NULL band element -> NULL. + let udf: ScalarUDF = rs_value_udf().into(); + let geom_type = SedonaType::Wkb(Edges::Planar, lnglat()); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + geom_type.clone(), + SedonaType::Arrow(DataType::Int32), + ], + ); + let raster = RasterSpec::d2(2, 2) + .band_values(&[1u8, 2, 3, 4]) + .band_values(&[10u8, 20, 30, 40]) + .transform([0.0, 1.0, 0.0, 10.0, 0.0, -1.0]) + .build(); + // Three points at pixel (0, 0), sampling band 1, band 2, then a NULL band. + let geoms = create_geom_array( + &[ + Some("POINT (0.5 9.5)"), + Some("POINT (0.5 9.5)"), + Some("POINT (0.5 9.5)"), + ], + &geom_type, + ); + let bands = Arc::new(Int32Array::from(vec![Some(1), Some(2), None])); + let result = tester + .invoke(vec![ + ColumnarValue::Scalar(ScalarValue::Struct(Arc::new(raster))), + ColumnarValue::Array(geoms), + ColumnarValue::Array(bands), + ]) + .unwrap(); + let arr = result.into_array(3).unwrap(); + let arr = arr.as_any().downcast_ref::().unwrap(); + assert_eq!(arr.value(0), 1.0); // band 1 + assert_eq!(arr.value(1), 10.0); // band 2 + assert!(arr.is_null(2), "NULL band element should be NULL"); + } + + #[test] + fn scalar_raster_null_scalar_band_is_all_null() { + // A NULL scalar band makes every output NULL without touching the band. + let udf: ScalarUDF = rs_value_udf().into(); + let geom_type = SedonaType::Wkb(Edges::Planar, lnglat()); + let tester = ScalarUdfTester::new( + udf, + vec![ + RASTER, + geom_type.clone(), + SedonaType::Arrow(DataType::Int32), + ], + ); + let raster = RasterSpec::d2(2, 2).band_values(&[1u8, 2, 3, 4]).build(); + let geoms = create_geom_array(&[Some("POINT (0.5 0.5)")], &geom_type); + let result = tester + .invoke(vec![ + ColumnarValue::Scalar(ScalarValue::Struct(Arc::new(raster))), + ColumnarValue::Array(geoms), + ColumnarValue::Scalar(ScalarValue::Int32(None)), + ]) + .unwrap(); + let arr = result.into_array(1).unwrap(); + let arr = arr.as_any().downcast_ref::().unwrap(); + assert!(arr.is_null(0), "NULL scalar band should sample to NULL"); + } + #[test] fn non_finite_point_ordinate_is_none() { // A point with a NaN/Inf ordinate (e.g. POINT(NaN 5)) has no location to From 9f6c94cad174f36c1fe31f716b746242215f4ed8 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Tue, 23 Jun 2026 17:18:03 -0700 Subject: [PATCH 15/18] docs(rust/sedona-raster-functions): explain why RS_Value errors instead of a WGS84 pivot Unlike the spatial predicates, RS_Value propagates a failed point->raster-CRS transform as an error rather than falling back to a WGS84 pivot: sampling must land the point in the raster's own CRS (the only space its affine/pixel grid is defined in), so there is no neutral CRS to fall back to. --- rust/sedona-raster-functions/src/rs_value.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/rust/sedona-raster-functions/src/rs_value.rs b/rust/sedona-raster-functions/src/rs_value.rs index a265339d5..b28b2a59a 100644 --- a/rust/sedona-raster-functions/src/rs_value.rs +++ b/rust/sedona-raster-functions/src/rs_value.rs @@ -331,6 +331,14 @@ fn next_band( /// /// Errors if exactly one of the point / raster carries a CRS: sampling across a /// known and an unknown CRS would silently mislocate the point. +/// +/// Unlike the spatial predicates (`RS_Intersects` et al.), which fall back to a +/// WGS84 pivot when a direct transform between two CRSes fails, a failed +/// transform here is propagated as an error. Sampling has to land the point in +/// the raster's own CRS — that is the only space its affine/pixel grid is +/// defined in — so there is no neutral CRS to fall back to: a WGS84 pivot would +/// silently sample the wrong pixel rather than compare geometries in a shared +/// space. fn reproject_point( point_wkb: &[u8], point_crs: CrsRef<'_>, From 67af522d235068153fe39682aa8f54958779ecb1 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Wed, 24 Jun 2026 11:04:50 -0700 Subject: [PATCH 16/18] test(rust/sedona-geometry): hoist fixtures import to the test module The wkb_header tests repeated `use sedona_testing::fixtures::*;` in eight functions; lift it to the `mod tests` block once. --- rust/sedona-geometry/src/wkb_header.rs | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/rust/sedona-geometry/src/wkb_header.rs b/rust/sedona-geometry/src/wkb_header.rs index 24b36730f..35a1c3028 100644 --- a/rust/sedona-geometry/src/wkb_header.rs +++ b/rust/sedona-geometry/src/wkb_header.rs @@ -448,6 +448,7 @@ fn calc_dimensions(code: u32) -> Result { #[cfg(test)] mod tests { use super::*; + use sedona_testing::fixtures::*; use std::str::FromStr; use wkb::writer::{write_geometry, WriteOptions}; use wkt::Wkt; @@ -585,8 +586,6 @@ mod tests { #[test] fn ewkb() { - use sedona_testing::fixtures::*; - // Test POINT with SRID 4326 let header = WkbHeader::try_new(&POINT_WITH_SRID_4326_EWKB).unwrap(); assert_eq!(header.srid(), 4326); @@ -669,8 +668,6 @@ mod tests { #[test] fn srid_linestring() { - use sedona_testing::fixtures::*; - let header = WkbHeader::try_new(&LINESTRING_WITH_SRID_4326_EWKB).unwrap(); assert_eq!(header.srid(), 4326); assert_eq!( @@ -684,8 +681,6 @@ mod tests { #[test] fn srid_polygon() { - use sedona_testing::fixtures::*; - let header = WkbHeader::try_new(&POLYGON_WITH_SRID_4326_EWKB).unwrap(); assert_eq!(header.srid(), 4326); assert_eq!(header.geometry_type_id().unwrap(), GeometryTypeId::Polygon); @@ -696,8 +691,6 @@ mod tests { #[test] fn multipoint_with_srid() { - use sedona_testing::fixtures::*; - let header = WkbHeader::try_new(&MULTIPOINT_WITH_SRID_4326_EWKB).unwrap(); assert_eq!(header.srid(), 4326); assert_eq!( @@ -711,8 +704,6 @@ mod tests { #[test] fn srid_empty_geometries_with_srid() { - use sedona_testing::fixtures::*; - // Test POINT EMPTY with SRID let header = WkbHeader::try_new(&POINT_EMPTY_WITH_SRID_4326_EWKB).unwrap(); assert_eq!(header.srid(), 4326); @@ -1014,9 +1005,7 @@ mod tests { } #[test] - fn incomplete_ewkb_buffers() { - use sedona_testing::fixtures::*; - // Test incomplete EWKB buffers + fn incomplete_ewkb_buffers() { // Test incomplete EWKB buffers // 1 byte_order + 4 geometry type + 4 srid + 8 x + 8 y let wkb = POINT_WITH_SRID_4326_EWKB; @@ -1169,8 +1158,6 @@ mod tests { #[test] fn read_point_xy_ewkb_with_srid() { - use sedona_testing::fixtures::*; - // The SRID sits between the type code and the coordinate; the reader must // skip it for every dimensionality. for wkb in [ @@ -1192,8 +1179,6 @@ mod tests { // POINT EMPTY is encoded as NaN/NaN and reads back as "no coordinate". assert_eq!(read_point_xy(&make_wkb("POINT EMPTY")).unwrap(), None); assert_eq!(read_point_xy(&make_wkb("POINT Z EMPTY")).unwrap(), None); - - use sedona_testing::fixtures::*; assert_eq!( read_point_xy(&POINT_EMPTY_WITH_SRID_4326_EWKB).unwrap(), None From 526e9de4b6080a63b6a03c6ffb1a2d4f8a582b35 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Wed, 24 Jun 2026 11:08:42 -0700 Subject: [PATCH 17/18] test(python/sedonadb): cross-check RS_Value against rasterio Build an in-memory raster from a random numpy array (via Raster.from_numpy) with a known geotransform and no CRS, then sample pixel centers, off-center positions, and random interior points; assert RS_Value matches what rasterio reads at the same world coordinates. Adds rasterio to the test extras; the test skips if it is not installed. --- python/sedonadb/pyproject.toml | 1 + .../tests/functions/test_raster_functions.py | 87 +++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/python/sedonadb/pyproject.toml b/python/sedonadb/pyproject.toml index a610d1641..b0f83c6a8 100644 --- a/python/sedonadb/pyproject.toml +++ b/python/sedonadb/pyproject.toml @@ -42,6 +42,7 @@ test = [ "polars", "pytest", "pyyaml", + "rasterio", ] geopandas = [ "adbc-driver-manager[dbapi]", diff --git a/python/sedonadb/tests/functions/test_raster_functions.py b/python/sedonadb/tests/functions/test_raster_functions.py index 5a594cf92..37640bcf8 100644 --- a/python/sedonadb/tests/functions/test_raster_functions.py +++ b/python/sedonadb/tests/functions/test_raster_functions.py @@ -135,3 +135,90 @@ def test_rs_ensureloaded(con, sedona_testing): ) def test_rs_value_point(expr, expected): SedonaDB().assert_query_result(f"SELECT {expr}", expected) + + +def test_rs_value_matches_rasterio(con): + """Cross-check RS_Value against rasterio on a random raster. + + Builds an in-memory raster from a random numpy array with a known + geotransform and no CRS (so neither engine reprojects), then samples a dense + set of points and asserts RS_Value returns exactly what rasterio reads at the + same world coordinates. Points cover every pixel center plus four off-center + positions per pixel (toward the corners, kept inside the pixel to avoid floor + ambiguity at exact boundaries) and a batch of random interior points. + """ + import numpy as np + import pandas as pd + + pytest.importorskip("rasterio") + from rasterio.io import MemoryFile + from rasterio.transform import Affine + + from sedonadb.raster import Raster + + rng = np.random.default_rng(42) + height, width = 7, 5 + data = rng.random((height, width)) * 1000.0 + + # GDAL-order geotransform: origin (100, 500), 2-wide pixels, -3 tall + # (north-up), no skew. Shared verbatim by both engines. + gdal_transform = (100.0, 2.0, 0.0, 500.0, 0.0, -3.0) + affine = Affine.from_gdal(*gdal_transform) + + # Sample points in pixel space (col_frac, row_frac). + pixel_points = [] + for row in range(height): + for col in range(width): + for du, dv in [ + (0.5, 0.5), + (0.25, 0.25), + (0.75, 0.75), + (0.25, 0.75), + (0.75, 0.25), + ]: + pixel_points.append((col + du, row + dv)) + n_random = 150 + rand_cols = rng.integers(0, width, n_random) + rand_rows = rng.integers(0, height, n_random) + pixel_points.extend( + zip( + rand_cols + rng.uniform(0.1, 0.9, n_random), + rand_rows + rng.uniform(0.1, 0.9, n_random), + ) + ) + + # Map pixel-space positions to world coordinates via the shared affine. + xs, ys = zip(*(affine * (u, v) for u, v in pixel_points)) + + # rasterio reference: a real GDAL read of the same array (no CRS). + with MemoryFile() as mem: + with mem.open( + driver="GTiff", + height=height, + width=width, + count=1, + dtype="float64", + transform=affine, + ) as dst: + dst.write(data, 1) + with mem.open() as src: + expected = [vals[0] for vals in src.sample(list(zip(xs, ys)))] + + # sedonadb: sample the same points via RS_Value over a scalar raster. + raster = Raster.from_numpy(data, transform=gdal_transform) + pts = con.create_data_frame(pd.DataFrame({"idx": range(len(xs)), "x": xs, "y": ys})) + view = "test_rs_value_matches_rasterio_pts" + pts.to_view(view) + try: + got = ( + con.sql( + f"SELECT RS_Value($1, ST_Point(x, y)) AS v FROM {view} ORDER BY idx", + params=(raster,), + ) + .to_arrow_table()["v"] + .to_pylist() + ) + finally: + con.drop_view(view) + + assert got == pytest.approx(expected) From 7effb0818111ec5c962294070f20ce40e6b09f86 Mon Sep 17 00:00:00 2001 From: jameswillis Date: Wed, 24 Jun 2026 11:22:19 -0700 Subject: [PATCH 18/18] style(rust/sedona-geometry): fix rustfmt in wkb_header test The fixtures-import hoist left a trailing comment glued to the function's opening brace; split it onto its own line. --- rust/sedona-geometry/src/wkb_header.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rust/sedona-geometry/src/wkb_header.rs b/rust/sedona-geometry/src/wkb_header.rs index 35a1c3028..2e1b1c8e0 100644 --- a/rust/sedona-geometry/src/wkb_header.rs +++ b/rust/sedona-geometry/src/wkb_header.rs @@ -1005,7 +1005,8 @@ mod tests { } #[test] - fn incomplete_ewkb_buffers() { // Test incomplete EWKB buffers + fn incomplete_ewkb_buffers() { + // Test incomplete EWKB buffers // 1 byte_order + 4 geometry type + 4 srid + 8 x + 8 y let wkb = POINT_WITH_SRID_4326_EWKB;