Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions c/sedona-gdal/src/dyn_load.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ fn load_all_symbols(lib: &Library, api: &mut SedonaGdalApi) -> Result<(), GdalIn
load_fn!(lib, api, VSIFileFromMemBuffer);
load_fn!(lib, api, VSIFCloseL);
load_fn!(lib, api, VSIUnlink);
load_fn!(lib, api, VSIOpenDir);
load_fn!(lib, api, VSIGetNextDirEntry);
load_fn!(lib, api, VSICloseDir);
load_fn!(lib, api, VSIGetMemFileBuffer);
load_fn!(lib, api, VSIFree);
load_fn!(lib, api, VSIMalloc);
Expand Down
12 changes: 12 additions & 0 deletions c/sedona-gdal/src/gdal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
//! at every call site.

use crate::config;
use crate::cpl::CslStringList;
use crate::dataset::Dataset;
use crate::driver::{Driver, DriverManager};
use crate::errors::Result;
Expand Down Expand Up @@ -171,6 +172,17 @@ impl Gdal {
vsi::get_vsi_mem_file_bytes_owned(self.api, file_name)
}

/// Open a VSI directory for iteration.
/// See also [`vsi::open_dir`].
pub fn open_vsi_dir(
&self,
path: &str,
recurse_depth: i32,
options: Option<&CslStringList>,
) -> Result<crate::vsi::VsiDir> {
crate::vsi::open_dir(self.api, path, recurse_depth, options)
}

// -- Raster operations ---------------------------------------------------

/// Create a bare in-memory MEM dataset with GDAL-owned bands.
Expand Down
27 changes: 27 additions & 0 deletions c/sedona-gdal/src/gdal_dyn_bindgen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,24 @@ pub type OGRLayerH = *mut c_void;
pub type OGRFeatureH = *mut c_void;
pub type OGRFieldDefnH = *mut c_void;
pub type VSILFILE = *mut c_void;
pub type VSIDIR = c_void;

#[repr(C)]
pub struct VSIDIREntry {
pub pszName: *const c_char,
pub nMode: c_int,
pub bModeKnown: c_int,
pub nSize: vsi_l_offset,
pub bSizeKnown: c_int,
pub nMTime: GIntBig,
pub bMTimeKnown: c_int,
}

pub type vsi_l_offset = u64;
pub type GIntBig = i64;

pub const VSI_S_IFMT: c_int = 0o170000;
pub const VSI_S_IFREG: c_int = 0o100000;

// --- Enum types ---

Expand Down Expand Up @@ -459,6 +477,15 @@ pub(crate) struct SedonaGdalApi {
>,
pub VSIFCloseL: Option<unsafe extern "C" fn(fp: VSILFILE) -> c_int>,
pub VSIUnlink: Option<unsafe extern "C" fn(pszFilename: *const c_char) -> c_int>,
pub VSIOpenDir: Option<
unsafe extern "C" fn(
pszPath: *const c_char,
nRecurseDepth: c_int,
papszOptions: *const *const c_char,
) -> *mut VSIDIR,
>,
pub VSIGetNextDirEntry: Option<unsafe extern "C" fn(dir: *mut VSIDIR) -> *const VSIDIREntry>,
pub VSICloseDir: Option<unsafe extern "C" fn(dir: *mut VSIDIR)>,
pub VSIGetMemFileBuffer: Option<
unsafe extern "C" fn(
pszFilename: *const c_char,
Expand Down
125 changes: 125 additions & 0 deletions c/sedona-gdal/src/vsi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
use std::ffi::CString;
use std::ops::Deref;

use crate::cpl::CslStringList;
use crate::errors::{GdalError, Result};
use crate::gdal_api::{call_gdal_api, GdalApi};

Expand Down Expand Up @@ -147,6 +148,36 @@ pub fn unlink_mem_file(api: &'static GdalApi, file_name: &str) -> Result<()> {
Ok(())
}

/// Return the directory separator for the specified path.
///
/// Default is forward slash. The only exception currently is the Windows
/// file system which returns backslash, unless the specified path is of the
/// form "{drive_letter}:/{rest_of_the_path}".
///
/// This function replicates the `VSIGetDirectorySeparator` function of GDAL 3.9+.
/// We do not call the GDAL function directly since we want to be compatible with older
/// GDAL versions.
pub fn directory_separator_for_path(path: &str) -> &'static str {
if path.starts_with("http://") || path.starts_with("https://") {
"/"
} else {
#[cfg(windows)]
{
// Return forward slash for paths of the form
// "{drive_letter}:/{rest_of_the_path}", and backslash otherwise.
if path.len() >= 3 && path.as_bytes()[1] == b':' && path.as_bytes()[2] == b'/' {
"/"
} else {
"\\"
}
}
#[cfg(not(windows))]
{
"/"
}
}
}

#[cfg(test)]
pub(crate) fn with_memfile<T>(
api: &'static GdalApi,
Expand Down Expand Up @@ -208,6 +239,80 @@ pub fn get_vsi_mem_file_bytes_owned(api: &'static GdalApi, file_name: &str) -> R
Ok(buffer.as_ref().to_vec())
}

pub struct VsiDirEntry {
pub name: String,
pub mode: Option<i32>,
pub size: Option<crate::gdal_dyn_bindgen::vsi_l_offset>,
pub mtime: Option<crate::gdal_dyn_bindgen::GIntBig>,
}

pub struct VsiDir {
api: &'static crate::gdal_api::GdalApi,
handle: *mut crate::gdal_dyn_bindgen::VSIDIR,
}

impl VsiDir {
pub fn next_entry(&mut self) -> Option<VsiDirEntry> {
let entry = unsafe { (self.api.inner.VSIGetNextDirEntry?)(self.handle) };
if entry.is_null() {
return None;
}
let entry = unsafe { &*entry };

let name = if entry.pszName.is_null() {
String::new()
} else {
unsafe { std::ffi::CStr::from_ptr(entry.pszName) }
.to_string_lossy()
.into_owned()
};

Some(VsiDirEntry {
name,
mode: (entry.bModeKnown != 0).then_some(entry.nMode),
size: (entry.bSizeKnown != 0).then_some(entry.nSize),
mtime: (entry.bMTimeKnown != 0).then_some(entry.nMTime),
})
}
}

impl Iterator for VsiDir {
type Item = VsiDirEntry;

fn next(&mut self) -> Option<Self::Item> {
self.next_entry()
}
}

impl Drop for VsiDir {
fn drop(&mut self) {
if !self.handle.is_null() {
if let Some(close) = self.api.inner.VSICloseDir {
unsafe { close(self.handle) };
}
self.handle = std::ptr::null_mut();
}
}
}

pub fn open_dir(
api: &'static crate::gdal_api::GdalApi,
path: &str,
recurse_depth: i32,
options: Option<&CslStringList>,
) -> crate::errors::Result<VsiDir> {
let c_path = std::ffi::CString::new(path)?;
let options_ptr: *const *const std::os::raw::c_char = options
.map(|opts| opts.as_ptr() as *const *const std::os::raw::c_char)
.unwrap_or(std::ptr::null());
let handle =
unsafe { call_gdal_api!(api, VSIOpenDir, c_path.as_ptr(), recurse_depth, options_ptr) };
if handle.is_null() {
return Err(api.last_null_pointer_err("VSIOpenDir"));
}
Ok(VsiDir { api, handle })
}

#[cfg(all(test, feature = "gdal-sys"))]
mod tests {
use super::*;
Expand Down Expand Up @@ -300,4 +405,24 @@ mod tests {
})
.unwrap();
}

#[test]
fn test_directory_separator_for_path() {
#[cfg(windows)]
{
assert_eq!(directory_separator_for_path("/vsis3/bucket/prefix"), r"\");
assert_eq!(directory_separator_for_path("https://host/data.tif"), "/");
assert_eq!(directory_separator_for_path(r"C:\data\dir"), r"\");
assert_eq!(directory_separator_for_path(r"C:/data/dir"), "/");
assert_eq!(directory_separator_for_path("/tmp/data"), r"\");
}
#[cfg(not(windows))]
{
assert_eq!(directory_separator_for_path("/vsis3/bucket/prefix"), "/");
assert_eq!(directory_separator_for_path("https://host/data.tif"), "/");
assert_eq!(directory_separator_for_path(r"C:\data\dir"), "/");
assert_eq!(directory_separator_for_path(r"C:/data/dir"), "/");
assert_eq!(directory_separator_for_path("/tmp/data"), "/");
}
}
}
51 changes: 51 additions & 0 deletions docs/reference/sql/rs_geotiff_tiles.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
---
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

title: rs_geotiff_tiles
description: Reads a GeoTIFF file or directory of GeoTIFF files as tile rows.
kernels:
- returns: table
args:
- name: path
type: string
- returns: table
args:
- name: path
type: string
- name: recursive
type: bool
---

## Description

`rs_geotiff_tiles()` reads a GeoTIFF file, or a directory of GeoTIFF files, and
returns one row per internal GDAL block. Each row contains the source `path`,
the zero-based tile indices `x` and `y`, and an out-db `rast` value pointing
back to the source file.

## Examples

```sql
SELECT path, x, y
FROM rs_geotiff_tiles('../../../submodules/sedona-testing/data/raster/test4.tiff');
```

```sql
SELECT RS_MetaData(rast)
FROM rs_geotiff_tiles('../../../submodules/sedona-testing/data/raster/test4.tiff');
```
Comment on lines +43 to +51

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you ensure these are user-runnable examples using a URL? If we end up getting transient failures we could do something fancy with the doc renderer that transforms sedona-testing urls into submodule paths.

1 change: 1 addition & 0 deletions rust/sedona-geoparquet/src/statistics_accumulator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ impl GeoStatsAccumulator for GeographyGeoStatsAccumulator {
#[cfg(test)]
mod test {
use super::*;
#[cfg(feature = "s2geography")]
use parquet::geospatial::bounding_box::BoundingBox;
use sedona_schema::datatypes::{WKB_GEOGRAPHY, WKB_VIEW_GEOGRAPHY};
use sedona_testing::create::create_scalar;
Expand Down
8 changes: 8 additions & 0 deletions rust/sedona-raster-gdal/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,11 @@ arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
arrow-schema = { workspace = true }
async-trait = { workspace = true }
datafusion = { workspace = true, default_features = false }

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we can avoid the full datafusion crate as a dependency that would be helpful for compilation order

datafusion-common = { workspace = true }
datafusion-common-runtime = { workspace = true }
datafusion-expr = { workspace = true }
futures = { workspace = true }
lru = { workspace = true }
sedona-common = { workspace = true }
sedona-expr = { workspace = true }
Expand All @@ -63,3 +66,8 @@ path = "benches/rs_frompath.rs"
harness = false
name = "rs_metadata"
path = "benches/rs_metadata.rs"

[[bench]]
harness = false
name = "rs_geotiff_tiles"
path = "benches/rs_geotiff_tiles.rs"
Loading
Loading