diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 03b6b5f79..d79d7a23a 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -128,7 +128,7 @@ jobs: - name: Install dependencies shell: bash run: | - sudo apt-get update && sudo apt-get install -y libgeos-dev + sudo apt-get update && sudo apt-get install -y libgeos-dev libgdal-dev - name: Check if: matrix.name == 'check' diff --git a/Cargo.lock b/Cargo.lock index 6cbb4d3e7..68eea1273 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2736,6 +2736,16 @@ dependencies = [ "slab", ] +[[package]] +name = "gdal-sys" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cceef1cc08a1f031c5717cb645bb361a3114470cc142cc96bc5e62b79695632e" +dependencies = [ + "pkg-config", + "semver", +] + [[package]] name = "generational-arena" version = "0.2.9" @@ -5127,6 +5137,7 @@ dependencies = [ "sedona-datasource", "sedona-expr", "sedona-functions", + "sedona-gdal", "sedona-geo", "sedona-geometry", "sedona-geoparquet", @@ -5281,6 +5292,16 @@ dependencies = [ "wkt 0.14.0", ] +[[package]] +name = "sedona-gdal" +version = "0.3.0" +dependencies = [ + "gdal-sys", + "libloading 0.9.0", + "sedona-testing", + "thiserror 2.0.17", +] + [[package]] name = "sedona-geo" version = "0.3.0" @@ -5707,6 +5728,7 @@ dependencies = [ "sedona-adbc", "sedona-datasource", "sedona-expr", + "sedona-gdal", "sedona-geometry", "sedona-geoparquet", "sedona-proj", diff --git a/Cargo.toml b/Cargo.toml index df7f87571..134275fdc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ members = [ "c/sedona-geoarrow-c", "c/sedona-geos", "c/sedona-libgpuspatial", + "c/sedona-gdal", "c/sedona-proj", "c/sedona-s2geography", "c/sedona-tg", @@ -150,6 +151,7 @@ sedona-testing = { version = "0.3.0", path = "rust/sedona-testing" } # C wrapper crates sedona-geoarrow-c = { version = "0.3.0", path = "c/sedona-geoarrow-c" } sedona-geos = { version = "0.3.0", path = "c/sedona-geos" } +sedona-gdal = { version = "0.3.0", path = "c/sedona-gdal", default-features = false } sedona-proj = { version = "0.3.0", path = "c/sedona-proj", default-features = false } sedona-s2geography = { version = "0.3.0", path = "c/sedona-s2geography" } sedona-tg = { version = "0.3.0", path = "c/sedona-tg" } diff --git a/c/sedona-gdal/Cargo.toml b/c/sedona-gdal/Cargo.toml new file mode 100644 index 000000000..115bdab60 --- /dev/null +++ b/c/sedona-gdal/Cargo.toml @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "sedona-gdal" +version.workspace = true +license.workspace = true +keywords.workspace = true +categories.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true +description.workspace = true +readme.workspace = true +edition.workspace = true +rust-version.workspace = true + +[dependencies] +gdal-sys = { version = "0.12.0", optional = true } +libloading = { workspace = true } +thiserror = { workspace = true } + +[features] +default = ["gdal-sys"] +gdal-sys = ["dep:gdal-sys"] + +[dev-dependencies] +sedona-testing = { workspace = true } diff --git a/c/sedona-gdal/src/dyn_load.rs b/c/sedona-gdal/src/dyn_load.rs new file mode 100644 index 000000000..f85f05b01 --- /dev/null +++ b/c/sedona-gdal/src/dyn_load.rs @@ -0,0 +1,315 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::path::Path; + +use libloading::Library; + +use crate::errors::GdalInitLibraryError; +use crate::gdal_dyn_bindgen::SedonaGdalApi; + +/// Load a single symbol from the library and write it into the given field. +/// +/// We load as a raw `*const ()` pointer and transmute to the target function pointer +/// type. This is the standard pattern for dynamic symbol loading where the loaded +/// symbol's signature is known but cannot be expressed as a generic parameter to +/// `Library::get` (because each field has a different signature). +/// +/// On failure returns a `GdalInitLibraryError` with the symbol name and the +/// underlying OS error message. +macro_rules! load_fn { + ($lib:expr, $api:expr, $name:ident) => { + // The target types here are too verbose to annotate for each call site + #[allow(clippy::missing_transmute_annotations)] + { + $api.$name = Some(unsafe { + let sym = $lib + .get::<*const ()>(concat!(stringify!($name), "\0").as_bytes()) + .map_err(|e| { + GdalInitLibraryError::LibraryError(format!( + "Failed to load symbol {}: {}", + stringify!($name), + e + )) + })?; + std::mem::transmute(sym.into_raw().into_raw()) + }); + } + }; +} + +/// Try to load a symbol under one of several names (e.g. for C++ mangled symbols). +/// Writes the first successful match into `$api.$field`. Returns an error only if +/// *none* of the names resolve. +macro_rules! load_fn_any { + ($lib:expr, $api:expr, $field:ident, [$($name:expr),+ $(,)?]) => {{ + let mut found = false; + $( + if !found { + if let Ok(sym) = unsafe { $lib.get::<*const ()>($name) } { + // The target types here are too verbose to annotate for each call site + #[allow(clippy::missing_transmute_annotations)] + { + $api.$field = Some(unsafe { std::mem::transmute(sym.into_raw().into_raw()) }); + } + found = true; + } + } + )+ + if !found { + return Err(GdalInitLibraryError::LibraryError(format!( + "Failed to resolve {} under any known mangled name", + stringify!($field), + ))); + } + }}; +} + +/// Populate all function-pointer fields of [`SedonaGdalApi`] from the given +/// [`Library`] handle. +fn load_all_symbols(lib: &Library, api: &mut SedonaGdalApi) -> Result<(), GdalInitLibraryError> { + // --- Dataset --- + load_fn!(lib, api, GDALOpenEx); + load_fn!(lib, api, GDALClose); + load_fn!(lib, api, GDALGetRasterXSize); + load_fn!(lib, api, GDALGetRasterYSize); + load_fn!(lib, api, GDALGetRasterCount); + load_fn!(lib, api, GDALGetRasterBand); + load_fn!(lib, api, GDALGetGeoTransform); + load_fn!(lib, api, GDALSetGeoTransform); + load_fn!(lib, api, GDALGetProjectionRef); + load_fn!(lib, api, GDALSetProjection); + load_fn!(lib, api, GDALGetSpatialRef); + load_fn!(lib, api, GDALSetSpatialRef); + load_fn!(lib, api, GDALCreateCopy); + load_fn!(lib, api, GDALDatasetCreateLayer); + + // --- Driver --- + load_fn!(lib, api, GDALAllRegister); + load_fn!(lib, api, GDALGetDriverByName); + load_fn!(lib, api, GDALCreate); + + // --- Band --- + load_fn!(lib, api, GDALAddBand); + load_fn!(lib, api, GDALRasterIO); + load_fn!(lib, api, GDALRasterIOEx); + load_fn!(lib, api, GDALGetRasterDataType); + load_fn!(lib, api, GDALGetRasterBandXSize); + load_fn!(lib, api, GDALGetRasterBandYSize); + load_fn!(lib, api, GDALGetBlockSize); + load_fn!(lib, api, GDALGetRasterNoDataValue); + load_fn!(lib, api, GDALSetRasterNoDataValue); + load_fn!(lib, api, GDALDeleteRasterNoDataValue); + load_fn!(lib, api, GDALSetRasterNoDataValueAsUInt64); + load_fn!(lib, api, GDALSetRasterNoDataValueAsInt64); + + // --- SpatialRef --- + load_fn!(lib, api, OSRNewSpatialReference); + load_fn!(lib, api, OSRDestroySpatialReference); + load_fn!(lib, api, OSRExportToPROJJSON); + load_fn!(lib, api, OSRClone); + load_fn!(lib, api, OSRRelease); + + // --- Geometry --- + load_fn!(lib, api, OGR_G_CreateFromWkb); + load_fn!(lib, api, OGR_G_CreateFromWkt); + load_fn!(lib, api, OGR_G_ExportToIsoWkb); + load_fn!(lib, api, OGR_G_WkbSize); + load_fn!(lib, api, OGR_G_GetEnvelope); + load_fn!(lib, api, OGR_G_DestroyGeometry); + + // --- Vector / Layer --- + load_fn!(lib, api, OGR_L_ResetReading); + load_fn!(lib, api, OGR_L_GetNextFeature); + load_fn!(lib, api, OGR_L_CreateField); + load_fn!(lib, api, OGR_L_GetFeatureCount); + load_fn!(lib, api, OGR_F_GetGeometryRef); + load_fn!(lib, api, OGR_F_GetFieldIndex); + load_fn!(lib, api, OGR_F_GetFieldAsDouble); + load_fn!(lib, api, OGR_F_GetFieldAsInteger); + load_fn!(lib, api, OGR_F_IsFieldSetAndNotNull); + load_fn!(lib, api, OGR_F_Destroy); + load_fn!(lib, api, OGR_Fld_Create); + load_fn!(lib, api, OGR_Fld_Destroy); + + // --- VSI --- + load_fn!(lib, api, VSIFileFromMemBuffer); + load_fn!(lib, api, VSIFCloseL); + load_fn!(lib, api, VSIUnlink); + load_fn!(lib, api, VSIGetMemFileBuffer); + load_fn!(lib, api, VSIFree); + load_fn!(lib, api, VSIMalloc); + + // --- VRT --- + load_fn!(lib, api, VRTCreate); + load_fn!(lib, api, VRTAddSimpleSource); + + // --- Rasterize / Polygonize --- + load_fn!(lib, api, GDALRasterizeGeometries); + load_fn!(lib, api, GDALFPolygonize); + load_fn!(lib, api, GDALPolygonize); + + // --- Version --- + load_fn!(lib, api, GDALVersionInfo); + + // --- Config --- + load_fn!(lib, api, CPLSetThreadLocalConfigOption); + + // --- Error --- + load_fn!(lib, api, CPLGetLastErrorNo); + load_fn!(lib, api, CPLGetLastErrorMsg); + load_fn!(lib, api, CPLErrorReset); + + // --- Data Type --- + load_fn!(lib, api, GDALGetDataTypeSizeBytes); + + // --- C++ API: MEMDataset::Create (resolved via mangled symbol names) --- + // The symbol is mangled differently on Linux, macOS, and MSVC, and the + // `char**` vs `const char**` parameter also affects the mangling. + load_fn_any!( + lib, + api, + MEMDatasetCreate, + [ + // Linux and macOS + b"_ZN10MEMDataset6CreateEPKciii12GDALDataTypePPc\0", + // MSVC + b"?Create@MEMDataset@@SAPEAV1@PEBDHHHW4GDALDataType@@PEAPEAD@Z\0", + ] + ); + + Ok(()) +} + +/// Load a GDAL shared library from `path` and populate a [`SedonaGdalApi`] struct. +/// +/// Returns the `(Library, SedonaGdalApi)` pair. The caller is responsible for +/// keeping the `Library` alive for the lifetime of the function pointers. +pub(crate) fn load_gdal_from_path( + path: &Path, +) -> Result<(Library, SedonaGdalApi), GdalInitLibraryError> { + let lib = unsafe { Library::new(path.as_os_str()) }.map_err(|e| { + GdalInitLibraryError::LibraryError(format!( + "Failed to load GDAL library from {}: {}", + path.display(), + e + )) + })?; + + let mut api = SedonaGdalApi::default(); + load_all_symbols(&lib, &mut api)?; + Ok((lib, api)) +} + +/// Load GDAL symbols from the current process image (equivalent to `dlopen(NULL)`). +/// +/// Returns the `(Library, SedonaGdalApi)` pair. The caller is responsible for +/// keeping the `Library` alive for the lifetime of the function pointers. +pub(crate) fn load_gdal_from_current_process( +) -> Result<(Library, SedonaGdalApi), GdalInitLibraryError> { + let lib = current_process_library()?; + let mut api = SedonaGdalApi::default(); + load_all_symbols(&lib, &mut api)?; + Ok((lib, api)) +} + +/// Open a handle to the current process image. +#[cfg(unix)] +fn current_process_library() -> Result { + Ok(libloading::os::unix::Library::this().into()) +} + +#[cfg(windows)] +fn current_process_library() -> Result { + // Safety: loading symbols from the current process is safe. + Ok(unsafe { libloading::os::windows::Library::this() } + .map_err(|e| { + GdalInitLibraryError::LibraryError(format!( + "Failed to open current process handle: {}", + e + )) + })? + .into()) +} + +#[cfg(not(any(unix, windows)))] +fn current_process_library() -> Result { + Err(GdalInitLibraryError::Invalid( + "current_process_library() is not implemented for this platform. \ + Only Unix and Windows are supported.", + )) +} + +#[cfg(test)] +mod test { + use super::*; + + /// Loading from an invalid path should return a `LibraryError`. + #[test] + fn test_shared_library_error() { + let err = load_gdal_from_path(Path::new("/not/a/valid/gdal/library.so")).unwrap_err(); + assert!( + matches!(err, GdalInitLibraryError::LibraryError(_)), + "Expected LibraryError, got: {err:?}" + ); + assert!( + !err.to_string().is_empty(), + "Error message should not be empty" + ); + } + + /// Verify that loading from the current process succeeds when GDAL symbols + /// are linked in (i.e. the `gdal-sys` feature is enabled). + #[cfg(feature = "gdal-sys")] + #[test] + fn test_load_from_current_process() { + let (_lib, api) = load_gdal_from_current_process() + .expect("load_gdal_from_current_process should succeed when gdal-sys is linked"); + + // Spot-check that key function pointers were resolved. + assert!( + api.GDALAllRegister.is_some(), + "GDALAllRegister should be loaded" + ); + assert!(api.GDALOpenEx.is_some(), "GDALOpenEx should be loaded"); + assert!(api.GDALClose.is_some(), "GDALClose should be loaded"); + } + + /// Test loading from an explicit shared library path, gated behind an + /// environment variable. Skips gracefully when the variable is unset. + #[test] + fn test_load_from_shared_library() { + if let Ok(gdal_library) = std::env::var("SEDONA_GDAL_TEST_SHARED_LIBRARY") { + if !gdal_library.is_empty() { + let (_lib, api) = load_gdal_from_path(Path::new(&gdal_library)) + .expect("Should load GDAL from SEDONA_GDAL_TEST_SHARED_LIBRARY"); + + assert!( + api.GDALAllRegister.is_some(), + "GDALAllRegister should be loaded" + ); + assert!(api.GDALOpenEx.is_some(), "GDALOpenEx should be loaded"); + return; + } + } + + println!( + "Skipping test_load_from_shared_library - \ + SEDONA_GDAL_TEST_SHARED_LIBRARY environment variable not set" + ); + } +} diff --git a/c/sedona-gdal/src/errors.rs b/c/sedona-gdal/src/errors.rs new file mode 100644 index 000000000..166e04236 --- /dev/null +++ b/c/sedona-gdal/src/errors.rs @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Ported (and contains copied code) from georust/gdal: +//! . +//! Original code is licensed under MIT. + +use thiserror::Error; + +/// Error type for the sedona-gdal crate initialization and library loading. +#[derive(Error, Debug)] +pub enum GdalInitLibraryError { + #[error("{0}")] + Invalid(String), + #[error("{0}")] + LibraryError(String), +} + +/// Error type compatible with the georust/gdal error variants used in this codebase. +#[derive(Clone, Debug, Error)] +pub enum GdalError { + #[error("CPL error class: '{class:?}', error number: '{number}', error msg: '{msg}'")] + CplError { + class: u32, + number: i32, + msg: String, + }, +} diff --git a/c/sedona-gdal/src/gdal_api.rs b/c/sedona-gdal/src/gdal_api.rs new file mode 100644 index 000000000..d0c07f6f6 --- /dev/null +++ b/c/sedona-gdal/src/gdal_api.rs @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::ffi::CStr; +use std::path::PathBuf; + +use libloading::Library; + +use crate::dyn_load; +use crate::errors::{GdalError, GdalInitLibraryError}; +use crate::gdal_dyn_bindgen::SedonaGdalApi; + +/// Invoke a function pointer from the `SedonaGdalApi` struct. +/// +/// # Panics +/// +/// Panics if the function pointer is `None`. This is unreachable in correct usage +/// because all function pointers are guaranteed to be `Some` after successful +/// initialization of [`GdalApi`] via [`GdalApi::try_from_shared_library`] or +/// [`GdalApi::try_from_current_process`], and you cannot obtain a `&GdalApi` +/// without successful initialization. +macro_rules! call_gdal_api { + ($api:expr, $func:ident $(, $arg:expr)*) => { + if let Some(func) = $api.inner.$func { + func($($arg),*) + } else { + panic!("{} function not available", stringify!($func)) + } + }; +} + +#[derive(Debug)] +pub struct GdalApi { + pub(crate) inner: SedonaGdalApi, + /// The dynamically loaded GDAL library. Kept alive for the lifetime of the + /// function pointers in `inner`. This is never dropped because the `GdalApi` + /// lives in a `static OnceLock` (see `global.rs`). + _lib: Library, + name: String, +} + +impl GdalApi { + pub fn try_from_shared_library(shared_library: PathBuf) -> Result { + let (lib, inner) = dyn_load::load_gdal_from_path(&shared_library)?; + Ok(Self { + inner, + _lib: lib, + name: shared_library.to_string_lossy().into_owned(), + }) + } + + pub fn try_from_current_process() -> Result { + let (lib, inner) = dyn_load::load_gdal_from_current_process()?; + Ok(Self { + inner, + _lib: lib, + name: "current_process".to_string(), + }) + } + + pub fn name(&self) -> &str { + &self.name + } + + /// Query GDAL version information. + /// + /// `request` is one of the standard `GDALVersionInfo` keys: + /// - `"RELEASE_NAME"` — e.g. `"3.8.4"` + /// - `"VERSION_NUM"` — e.g. `"3080400"` + /// - `"BUILD_INFO"` — multi-line build details + pub fn version_info(&self, request: &str) -> String { + let c_request = std::ffi::CString::new(request).unwrap(); + let ptr = unsafe { call_gdal_api!(self, GDALVersionInfo, c_request.as_ptr()) }; + if ptr.is_null() { + String::new() + } else { + unsafe { CStr::from_ptr(ptr) } + .to_string_lossy() + .into_owned() + } + } + + /// Check the last CPL error and return a `GdalError`, it always returns an error struct + /// (even when the error number is 0). + pub fn last_cpl_err(&self, default_err_class: u32) -> GdalError { + let err_no = unsafe { call_gdal_api!(self, CPLGetLastErrorNo) }; + let err_msg = unsafe { + let msg_ptr = call_gdal_api!(self, CPLGetLastErrorMsg); + if msg_ptr.is_null() { + String::new() + } else { + CStr::from_ptr(msg_ptr).to_string_lossy().into_owned() + } + }; + unsafe { call_gdal_api!(self, CPLErrorReset) }; + GdalError::CplError { + class: default_err_class, + number: err_no, + msg: err_msg, + } + } +} diff --git a/c/sedona-gdal/src/gdal_dyn_bindgen.rs b/c/sedona-gdal/src/gdal_dyn_bindgen.rs new file mode 100644 index 000000000..1b05a2c18 --- /dev/null +++ b/c/sedona-gdal/src/gdal_dyn_bindgen.rs @@ -0,0 +1,533 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] +#![allow(non_upper_case_globals)] +#![allow(dead_code)] +#![allow(clippy::type_complexity)] + +use std::os::raw::{c_char, c_double, c_int, c_uchar, c_uint, c_void}; + +// --- Scalar type aliases --- + +pub type GSpacing = i64; +pub type CPLErr = c_int; +pub type OGRErr = c_int; +pub type GDALRWFlag = c_int; +pub type OGRwkbByteOrder = c_int; +pub type GDALOpenFlags = c_uint; +pub type GDALRIOResampleAlg = c_int; + +// --- Opaque handle types --- + +pub type GDALDatasetH = *mut c_void; +pub type GDALDriverH = *mut c_void; +pub type GDALRasterBandH = *mut c_void; +pub type OGRSpatialReferenceH = *mut c_void; +pub type OGRGeometryH = *mut c_void; +pub type OGRLayerH = *mut c_void; +pub type OGRFeatureH = *mut c_void; +pub type OGRFieldDefnH = *mut c_void; +pub type VSILFILE = *mut c_void; + +// --- Enum types --- + +#[repr(C)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum GDALDataType { + GDT_Unknown = 0, + GDT_Byte = 1, + GDT_Int8 = 14, + GDT_UInt16 = 2, + GDT_Int16 = 3, + GDT_UInt32 = 4, + GDT_Int32 = 5, + GDT_UInt64 = 12, + GDT_Int64 = 13, + GDT_Float16 = 15, + GDT_Float32 = 6, + GDT_Float64 = 7, + GDT_CInt16 = 8, + GDT_CInt32 = 9, + GDT_CFloat16 = 16, + GDT_CFloat32 = 10, + GDT_CFloat64 = 11, +} + +impl GDALDataType { + #[allow(clippy::result_unit_err)] + pub fn try_from_ordinal(value: i32) -> Result { + match value { + 0 => Ok(GDALDataType::GDT_Unknown), + 1 => Ok(GDALDataType::GDT_Byte), + 2 => Ok(GDALDataType::GDT_UInt16), + 3 => Ok(GDALDataType::GDT_Int16), + 4 => Ok(GDALDataType::GDT_UInt32), + 5 => Ok(GDALDataType::GDT_Int32), + 6 => Ok(GDALDataType::GDT_Float32), + 7 => Ok(GDALDataType::GDT_Float64), + 8 => Ok(GDALDataType::GDT_CInt16), + 9 => Ok(GDALDataType::GDT_CInt32), + 10 => Ok(GDALDataType::GDT_CFloat32), + 11 => Ok(GDALDataType::GDT_CFloat64), + 12 => Ok(GDALDataType::GDT_UInt64), + 13 => Ok(GDALDataType::GDT_Int64), + 14 => Ok(GDALDataType::GDT_Int8), + 15 => Ok(GDALDataType::GDT_Float16), + 16 => Ok(GDALDataType::GDT_CFloat16), + _ => Err(()), + } + } +} + +#[repr(C)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum OGRwkbGeometryType { + wkbUnknown = 0, + wkbPoint = 1, + wkbLineString = 2, + wkbPolygon = 3, + wkbMultiPoint = 4, + wkbMultiLineString = 5, + wkbMultiPolygon = 6, + wkbGeometryCollection = 7, +} + +#[repr(C)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum OGRFieldType { + OFTInteger = 0, + OFTIntegerList = 1, + OFTReal = 2, + OFTRealList = 3, + OFTString = 4, + OFTStringList = 5, + OFTWideString = 6, + OFTWideStringList = 7, + OFTBinary = 8, + OFTDate = 9, + OFTTime = 10, + OFTDateTime = 11, + OFTInteger64 = 12, + OFTInteger64List = 13, +} + +// --- Function pointer type aliases --- + +/// Type alias for the GDAL transformer callback (`GDALTransformerFunc`). +/// +/// Signature: `(pTransformerArg, bDstToSrc, nPointCount, x, y, z, panSuccess) -> c_int` +pub type GDALTransformerFunc = unsafe extern "C" fn( + pTransformerArg: *mut c_void, + bDstToSrc: c_int, + nPointCount: c_int, + x: *mut c_double, + y: *mut c_double, + z: *mut c_double, + panSuccess: *mut c_int, +) -> c_int; + +// --- Structs --- + +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct OGREnvelope { + pub MinX: c_double, + pub MaxX: c_double, + pub MinY: c_double, + pub MaxY: c_double, +} + +/// GDAL progress callback type. +pub type GDALProgressFunc = Option< + unsafe extern "C" fn( + dfComplete: c_double, + pszMessage: *const c_char, + pProgressArg: *mut c_void, + ) -> c_int, +>; + +/// Extra arguments for `GDALRasterIOEx`. +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct GDALRasterIOExtraArg { + pub nVersion: c_int, + pub eResampleAlg: GDALRIOResampleAlg, + pub pfnProgress: GDALProgressFunc, + pub pProgressData: *mut c_void, + pub bFloatingPointWindowValidity: c_int, + pub dfXOff: c_double, + pub dfYOff: c_double, + pub dfXSize: c_double, + pub dfYSize: c_double, +} + +impl Default for GDALRasterIOExtraArg { + fn default() -> Self { + Self { + nVersion: 1, + eResampleAlg: GRIORA_NearestNeighbour, + pfnProgress: None, + pProgressData: std::ptr::null_mut(), + bFloatingPointWindowValidity: 0, + dfXOff: 0.0, + dfYOff: 0.0, + dfXSize: 0.0, + dfYSize: 0.0, + } + } +} + +// --- GDALRIOResampleAlg constants --- + +pub const GRIORA_NearestNeighbour: GDALRIOResampleAlg = 0; +pub const GRIORA_Bilinear: GDALRIOResampleAlg = 1; +pub const GRIORA_Cubic: GDALRIOResampleAlg = 2; +pub const GRIORA_CubicSpline: GDALRIOResampleAlg = 3; +pub const GRIORA_Lanczos: GDALRIOResampleAlg = 4; +pub const GRIORA_Average: GDALRIOResampleAlg = 5; +pub const GRIORA_Mode: GDALRIOResampleAlg = 6; +pub const GRIORA_Gauss: GDALRIOResampleAlg = 7; + +// --- GDAL open flags constants --- + +pub const GDAL_OF_READONLY: GDALOpenFlags = 0x00; +pub const GDAL_OF_UPDATE: GDALOpenFlags = 0x01; +pub const GDAL_OF_RASTER: GDALOpenFlags = 0x02; +pub const GDAL_OF_VECTOR: GDALOpenFlags = 0x04; +pub const GDAL_OF_VERBOSE_ERROR: GDALOpenFlags = 0x40; + +// --- GDALRWFlag constants --- + +pub const GF_Read: GDALRWFlag = 0; +pub const GF_Write: GDALRWFlag = 1; + +// --- CPLErr constants --- + +pub const CE_None: CPLErr = 0; +pub const CE_Debug: CPLErr = 1; +pub const CE_Warning: CPLErr = 2; +pub const CE_Failure: CPLErr = 3; +pub const CE_Fatal: CPLErr = 4; + +// --- OGRErr constants --- + +pub const OGRERR_NONE: OGRErr = 0; + +// --- OGRwkbByteOrder constants --- + +pub const wkbXDR: OGRwkbByteOrder = 0; // Big endian +pub const wkbNDR: OGRwkbByteOrder = 1; // Little endian + +// --- The main API struct mirroring C SedonaGdalApi --- + +#[repr(C)] +#[derive(Debug, Copy, Clone, Default)] +pub(crate) struct SedonaGdalApi { + // --- Dataset --- + pub GDALOpenEx: Option< + unsafe extern "C" fn( + pszFilename: *const c_char, + nOpenFlags: c_uint, + papszAllowedDrivers: *const *const c_char, + papszOpenOptions: *const *const c_char, + papszSiblingFiles: *const *const c_char, + ) -> GDALDatasetH, + >, + pub GDALClose: Option, + pub GDALGetRasterXSize: Option c_int>, + pub GDALGetRasterYSize: Option c_int>, + pub GDALGetRasterCount: Option c_int>, + pub GDALGetRasterBand: + Option GDALRasterBandH>, + pub GDALGetGeoTransform: + Option CPLErr>, + pub GDALSetGeoTransform: + Option CPLErr>, + pub GDALGetProjectionRef: Option *const c_char>, + pub GDALSetProjection: + Option CPLErr>, + pub GDALGetSpatialRef: Option OGRSpatialReferenceH>, + pub GDALSetSpatialRef: + Option CPLErr>, + pub GDALCreateCopy: Option< + unsafe extern "C" fn( + hDriver: GDALDriverH, + pszFilename: *const c_char, + hSrcDS: GDALDatasetH, + bStrict: c_int, + papszOptions: *mut *mut c_char, + pfnProgress: *mut c_void, + pProgressData: *mut c_void, + ) -> GDALDatasetH, + >, + pub GDALDatasetCreateLayer: Option< + unsafe extern "C" fn( + hDS: GDALDatasetH, + pszName: *const c_char, + hSpatialRef: OGRSpatialReferenceH, + eGType: OGRwkbGeometryType, + papszOptions: *mut *mut c_char, + ) -> OGRLayerH, + >, + + // --- Driver --- + pub GDALAllRegister: Option, + pub GDALGetDriverByName: Option GDALDriverH>, + pub GDALCreate: Option< + unsafe extern "C" fn( + hDriver: GDALDriverH, + pszFilename: *const c_char, + nXSize: c_int, + nYSize: c_int, + nBands: c_int, + eType: GDALDataType, + papszOptions: *mut *mut c_char, + ) -> GDALDatasetH, + >, + + // --- Band --- + pub GDALAddBand: Option< + unsafe extern "C" fn( + hDS: GDALDatasetH, + eType: GDALDataType, + papszOptions: *mut *mut c_char, + ) -> CPLErr, + >, + pub GDALRasterIO: Option< + unsafe extern "C" fn( + hRBand: GDALRasterBandH, + eRWFlag: GDALRWFlag, + nDSXOff: c_int, + nDSYOff: c_int, + nDSXSize: c_int, + nDSYSize: c_int, + pBuffer: *mut c_void, + nBXSize: c_int, + nBYSize: c_int, + eBDataType: GDALDataType, + nPixelSpace: GSpacing, + nLineSpace: GSpacing, + ) -> CPLErr, + >, + pub GDALRasterIOEx: Option< + unsafe extern "C" fn( + hRBand: GDALRasterBandH, + eRWFlag: GDALRWFlag, + nDSXOff: c_int, + nDSYOff: c_int, + nDSXSize: c_int, + nDSYSize: c_int, + pBuffer: *mut c_void, + nBXSize: c_int, + nBYSize: c_int, + eBDataType: GDALDataType, + nPixelSpace: GSpacing, + nLineSpace: GSpacing, + psExtraArg: *mut GDALRasterIOExtraArg, + ) -> CPLErr, + >, + pub GDALGetRasterDataType: Option GDALDataType>, + pub GDALGetRasterBandXSize: Option c_int>, + pub GDALGetRasterBandYSize: Option c_int>, + pub GDALGetBlockSize: Option< + unsafe extern "C" fn(hBand: GDALRasterBandH, pnXSize: *mut c_int, pnYSize: *mut c_int), + >, + pub GDALGetRasterNoDataValue: + Option c_double>, + pub GDALSetRasterNoDataValue: + Option CPLErr>, + pub GDALDeleteRasterNoDataValue: Option CPLErr>, + pub GDALSetRasterNoDataValueAsUInt64: + Option CPLErr>, + pub GDALSetRasterNoDataValueAsInt64: + Option CPLErr>, + + // --- SpatialRef --- + pub OSRNewSpatialReference: + Option OGRSpatialReferenceH>, + pub OSRDestroySpatialReference: Option, + pub OSRExportToPROJJSON: Option< + unsafe extern "C" fn( + hSRS: OGRSpatialReferenceH, + ppszResult: *mut *mut c_char, + papszOptions: *const *const c_char, + ) -> OGRErr, + >, + pub OSRClone: Option OGRSpatialReferenceH>, + pub OSRRelease: Option, + + // --- Geometry --- + pub OGR_G_CreateFromWkb: Option< + unsafe extern "C" fn( + pabyData: *const c_void, + hSRS: OGRSpatialReferenceH, + phGeometry: *mut OGRGeometryH, + nBytes: c_int, + ) -> OGRErr, + >, + pub OGR_G_CreateFromWkt: Option< + unsafe extern "C" fn( + ppszData: *mut *mut c_char, + hSRS: OGRSpatialReferenceH, + phGeometry: *mut OGRGeometryH, + ) -> OGRErr, + >, + pub OGR_G_ExportToIsoWkb: Option< + unsafe extern "C" fn( + hGeom: OGRGeometryH, + eOrder: OGRwkbByteOrder, + pabyData: *mut c_uchar, + ) -> OGRErr, + >, + pub OGR_G_WkbSize: Option c_int>, + pub OGR_G_GetEnvelope: + Option, + pub OGR_G_DestroyGeometry: Option, + + // --- Vector / Layer --- + pub OGR_L_ResetReading: Option, + pub OGR_L_GetNextFeature: Option OGRFeatureH>, + pub OGR_L_CreateField: Option< + unsafe extern "C" fn( + hLayer: OGRLayerH, + hFieldDefn: OGRFieldDefnH, + bApproxOK: c_int, + ) -> OGRErr, + >, + pub OGR_L_GetFeatureCount: + Option i64>, + pub OGR_F_GetGeometryRef: Option OGRGeometryH>, + pub OGR_F_GetFieldIndex: + Option c_int>, + pub OGR_F_GetFieldAsDouble: + Option c_double>, + pub OGR_F_GetFieldAsInteger: + Option c_int>, + pub OGR_F_IsFieldSetAndNotNull: + Option c_int>, + pub OGR_F_Destroy: Option, + pub OGR_Fld_Create: + Option OGRFieldDefnH>, + pub OGR_Fld_Destroy: Option, + + // --- VSI --- + pub VSIFileFromMemBuffer: Option< + unsafe extern "C" fn( + pszFilename: *const c_char, + pabyData: *mut c_uchar, + nDataLength: i64, + bTakeOwnership: c_int, + ) -> VSILFILE, + >, + pub VSIFCloseL: Option c_int>, + pub VSIUnlink: Option c_int>, + pub VSIGetMemFileBuffer: Option< + unsafe extern "C" fn( + pszFilename: *const c_char, + pnDataLength: *mut i64, + bUnlinkAndSeize: c_int, + ) -> *mut c_uchar, + >, + pub VSIFree: Option, + pub VSIMalloc: Option *mut c_void>, + + // --- VRT --- + pub VRTCreate: Option GDALDatasetH>, + pub VRTAddSimpleSource: Option< + unsafe extern "C" fn( + hVRTBand: GDALRasterBandH, + hSrcBand: GDALRasterBandH, + nSrcXOff: c_int, + nSrcYOff: c_int, + nSrcXSize: c_int, + nSrcYSize: c_int, + nDstXOff: c_int, + nDstYOff: c_int, + nDstXSize: c_int, + nDstYSize: c_int, + pszResampling: *const c_char, + dfNoDataValue: c_double, + ) -> CPLErr, + >, + + // --- Rasterize / Polygonize --- + pub GDALRasterizeGeometries: Option< + unsafe extern "C" fn( + hDS: GDALDatasetH, + nBandCount: c_int, + panBandList: *const c_int, + nGeomCount: c_int, + pahGeometries: *const OGRGeometryH, + pfnTransformer: *mut c_void, + pTransformArg: *mut c_void, + padfGeomBurnValues: *const c_double, + papszOptions: *mut *mut c_char, + pfnProgress: *mut c_void, + pProgressData: *mut c_void, + ) -> CPLErr, + >, + pub GDALFPolygonize: Option< + unsafe extern "C" fn( + hSrcBand: GDALRasterBandH, + hMaskBand: GDALRasterBandH, + hOutLayer: OGRLayerH, + iPixValField: c_int, + papszOptions: *mut *mut c_char, + pfnProgress: *mut c_void, + pProgressData: *mut c_void, + ) -> CPLErr, + >, + pub GDALPolygonize: Option< + unsafe extern "C" fn( + hSrcBand: GDALRasterBandH, + hMaskBand: GDALRasterBandH, + hOutLayer: OGRLayerH, + iPixValField: c_int, + papszOptions: *mut *mut c_char, + pfnProgress: *mut c_void, + pProgressData: *mut c_void, + ) -> CPLErr, + >, + + // --- Version --- + pub GDALVersionInfo: Option *const c_char>, + + // --- Config --- + pub CPLSetThreadLocalConfigOption: + Option, + + // --- Error --- + pub CPLGetLastErrorNo: Option c_int>, + pub CPLGetLastErrorMsg: Option *const c_char>, + pub CPLErrorReset: Option, + + // --- Data Type --- + pub GDALGetDataTypeSizeBytes: Option c_int>, + + // --- C++ API --- + pub MEMDatasetCreate: Option< + unsafe extern "C" fn( + pszFilename: *const c_char, + nXSize: c_int, + nYSize: c_int, + nBandsIn: c_int, + eType: GDALDataType, + papszOptions: *mut *mut c_char, + ) -> GDALDatasetH, + >, +} diff --git a/c/sedona-gdal/src/global.rs b/c/sedona-gdal/src/global.rs new file mode 100644 index 000000000..9365ef89a --- /dev/null +++ b/c/sedona-gdal/src/global.rs @@ -0,0 +1,313 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::errors::GdalInitLibraryError; +use crate::gdal_api::GdalApi; +use std::path::PathBuf; +use std::sync::{Mutex, OnceLock}; + +/// Minimum GDAL version required by sedona-gdal. +const MIN_GDAL_VERSION_MAJOR: i32 = 3; +const MIN_GDAL_VERSION_MINOR: i32 = 5; +const MIN_GDAL_VERSION_NUM: i32 = + MIN_GDAL_VERSION_MAJOR * 1_000_000 + MIN_GDAL_VERSION_MINOR * 10_000; + +/// Builder for the global [`GdalApi`]. +/// +/// Provides a way to configure how GDAL is loaded before the first use. +/// Use [`configure_global_gdal_api`] to install a builder, then the actual +/// loading happens lazily on the first call to [`get_global_gdal_api`]. +/// +/// # Examples +/// +/// ```no_run +/// use sedona_gdal::global::{GdalApiBuilder, configure_global_gdal_api}; +/// +/// // Configure with a specific shared library path +/// let builder = GdalApiBuilder::default() +/// .with_shared_library("/usr/lib/libgdal.so".into()); +/// configure_global_gdal_api(builder).unwrap(); +/// ``` +#[derive(Default)] +pub struct GdalApiBuilder { + shared_library: Option, +} + +impl GdalApiBuilder { + /// Set the path to the GDAL shared library. + /// + /// If unset, GDAL symbols will be resolved from the current process image + /// (equivalent to `dlopen(NULL)`), which requires GDAL to already be linked + /// into the process (e.g. via `gdal-sys`). + /// + /// Note that the path is passed directly to `dlopen()`/`LoadLibrary()`, + /// which takes into account the working directory. As a security measure, + /// applications may wish to verify that the path is absolute. This should + /// not be specified from untrusted input. + pub fn with_shared_library(self, path: PathBuf) -> Self { + Self { + shared_library: Some(path), + } + } + + /// Build a [`GdalApi`] with the configured options. + /// + /// When `shared_library` is set, loads GDAL from that path. Otherwise, + /// resolves symbols from the current process (with an optional + /// compile-time version check when the `gdal-sys` feature is enabled). + pub fn build(&self) -> Result { + let api = if let Some(shared_library) = &self.shared_library { + GdalApi::try_from_shared_library(shared_library.clone())? + } else { + GdalApi::try_from_current_process()? + }; + + #[cfg(feature = "gdal-sys")] + let get_gdal_version_info = |arg: &str| unsafe { + // Calling into `gdal-sys` also forces the linker to include GDAL + // symbols, so that `try_from_current_process` (which resolves function pointers + // via `dlsym` on the current process) can find them at runtime. + let c_arg = std::ffi::CString::new(arg).unwrap(); + let c_version = gdal_sys::GDALVersionInfo(c_arg.as_ptr()); + std::ffi::CStr::from_ptr(c_version) + .to_string_lossy() + .into_owned() + }; + + #[cfg(not(feature = "gdal-sys"))] + let get_gdal_version_info = |arg: &str| api.version_info(arg); + + check_gdal_version(get_gdal_version_info)?; + Ok(api) + } +} + +/// Global builder configuration, protected by a [`Mutex`]. +/// +/// Set via [`configure_global_gdal_api`] before the first call to +/// [`get_global_gdal_api`]. Multiple calls to `configure_global_gdal_api` +/// are allowed as long as the API has not been initialized yet. +/// The same mutex also serves as the initialization guard for +/// [`get_global_gdal_api`], eliminating the need for a separate lock. +static GDAL_API_BUILDER: Mutex> = Mutex::new(None); + +static GDAL_API: OnceLock = OnceLock::new(); + +/// Get a reference to the global GDAL API, initializing it if not already done. +/// +/// On first call, reads the builder set by [`configure_global_gdal_api`] (or uses +/// [`GdalApiBuilder::default()`] if none was configured) and calls its `build()` +/// method to create the [`GdalApi`]. The result is stored in a process-global +/// `OnceLock` and reused for all subsequent calls. +fn get_global_gdal_api() -> Result<&'static GdalApi, GdalInitLibraryError> { + if let Some(api) = GDAL_API.get() { + return Ok(api); + } + + let guard = GDAL_API_BUILDER + .lock() + .map_err(|_| GdalInitLibraryError::Invalid("GDAL API builder lock poisoned".to_string()))?; + + if let Some(api) = GDAL_API.get() { + return Ok(api); + } + + let api = guard + .as_ref() + .unwrap_or(&GdalApiBuilder::default()) + .build()?; + + // Register all GDAL drivers once, immediately after loading symbols. + // This mirrors georust/gdal's `_register_drivers()` pattern where + // `GDALAllRegister` is called via `std::sync::Once` before any driver + // lookup or dataset open. Here the `OnceLock` + `Mutex` already + // guarantees this runs exactly once. + unsafe { + let Some(gdal_all_register) = api.inner.GDALAllRegister else { + return Err(GdalInitLibraryError::LibraryError( + "GDALAllRegister symbol not loaded".to_string(), + )); + }; + gdal_all_register(); + } + + let _ = GDAL_API.set(api); + Ok(GDAL_API.get().expect("GDAL API should be set")) +} + +/// Configure the global GDAL API. +/// +/// Stores the given [`GdalApiBuilder`] for use when the global [`GdalApi`] is +/// first initialized (lazily, on the first call to [`get_global_gdal_api`]). +/// +/// This can be called multiple times before the API is initialized — each call +/// replaces the previous builder. However, once [`get_global_gdal_api`] has been +/// called and the API has been successfully initialized, subsequent configurations +/// are accepted but will have no effect (the `OnceLock` ensures the API is created +/// only once). +/// +/// # Typical usage +/// +/// 1. The application (e.g. sedona-db) calls `configure_global_gdal_api` with its +/// default builder early in startup. +/// 2. User code may call `configure_global_gdal_api` again to override the +/// configuration before the first query that uses GDAL. +/// 3. On the first actual GDAL operation, [`get_global_gdal_api`] reads the +/// builder and initializes the API. +pub fn configure_global_gdal_api(builder: GdalApiBuilder) -> Result<(), GdalInitLibraryError> { + let mut global_builder = GDAL_API_BUILDER.lock().map_err(|_| { + GdalInitLibraryError::Invalid( + "Failed to acquire lock for global GDAL configuration".to_string(), + ) + })?; + global_builder.replace(builder); + Ok(()) +} + +/// Return whether the global [`GdalApi`] has been initialized. +/// +/// This returns `true` only after [`get_global_gdal_api`] (directly or via +/// [`with_global_gdal_api`]) has successfully initialized and stored the API. +/// It does not indicate whether a builder was previously set through +/// [`configure_global_gdal_api`]. +pub fn is_gdal_api_configured() -> bool { + GDAL_API.get().is_some() +} + +/// Execute a closure with the process-global [`GdalApi`]. +/// +/// This helper ensures the global API is initialized (lazily) and then passes a +/// shared `'static` reference to the provided closure. +/// +/// If initialization succeeds, the closure's result is returned unchanged; otherwise +/// returns an error from the initialization attempt. +pub fn with_global_gdal_api(func: F) -> Result +where + F: FnOnce(&'static GdalApi) -> R, +{ + let api = get_global_gdal_api()?; + Ok(func(api)) +} + +/// Verify that the GDAL library meets the minimum version requirement. +/// +/// We use `GDALVersionInfo("VERSION_NUM")` instead of `GDALCheckVersion` because +/// the latter performs an **exact** major.minor match and rejects newer versions +/// (e.g. GDAL 3.12 fails a check for 3.4), whereas we need a **minimum** version +/// check (>=). +fn check_gdal_version( + mut gdal_version_info: impl FnMut(&str) -> String, +) -> Result<(), GdalInitLibraryError> { + let version_str = gdal_version_info("VERSION_NUM"); + let version_num: i32 = version_str.trim().parse().map_err(|e| { + GdalInitLibraryError::LibraryError(format!( + "Failed to parse GDAL version number {:?}: {e}", + version_str + )) + })?; + + if version_num < MIN_GDAL_VERSION_NUM { + // Get the human-readable release name for the error message. + let release_name = gdal_version_info("RELEASE_NAME"); + return Err(GdalInitLibraryError::LibraryError(format!( + "GDAL >= {MIN_GDAL_VERSION_MAJOR}.{MIN_GDAL_VERSION_MINOR} required \ + for sedona-gdal (found {release_name})" + ))); + } + + Ok(()) +} + +#[cfg(test)] +mod test { + use super::*; + + /// Building with default options (no shared library) should succeed when + /// `gdal-sys` is linked, loading symbols from the current process. + #[test] + fn test_builder_default() { + let api = GdalApiBuilder::default() + .build() + .expect("GdalApiBuilder::default().build() should succeed with gdal-sys"); + assert_eq!(api.name(), "current_process"); + } + + /// Building with an explicit shared library path should use that path. + /// Gated behind an environment variable; skips gracefully if unset. + #[test] + fn test_builder_with_shared_library() { + if let Ok(gdal_library) = std::env::var("SEDONA_GDAL_TEST_SHARED_LIBRARY") { + if !gdal_library.is_empty() { + let api = GdalApiBuilder::default() + .with_shared_library(gdal_library.clone().into()) + .build() + .expect("Should build GdalApi from SEDONA_GDAL_TEST_SHARED_LIBRARY"); + assert_eq!(api.name(), gdal_library); + return; + } + } + + println!( + "Skipping test_builder_with_shared_library - \ + SEDONA_GDAL_TEST_SHARED_LIBRARY environment variable not set" + ); + } + + /// Building with an invalid shared library path should return an error. + #[test] + fn test_builder_invalid_path() { + let err = GdalApiBuilder::default() + .with_shared_library("/nonexistent/libgdal.so".into()) + .build() + .unwrap_err(); + assert!( + matches!(err, GdalInitLibraryError::LibraryError(_)), + "Expected LibraryError, got: {err:?}" + ); + } + + /// `get_global_gdal_api` should succeed and return a valid API reference. + /// + /// Note: this test touches the process-global `OnceLock` and cannot be + /// "undone", so it effectively tests the first-call initialization path. + /// Subsequent tests in the same process will hit the fast path. + #[test] + fn test_get_global_gdal_api() { + let api = get_global_gdal_api().expect("get_global_gdal_api should succeed"); + assert!(!api.name().is_empty(), "API name should not be empty"); + } + + /// After `get_global_gdal_api` succeeds, `is_gdal_api_configured` should + /// return true. + #[test] + fn test_is_gdal_api_configured() { + // Ensure the API is initialized. + let _ = get_global_gdal_api().expect("get_global_gdal_api should succeed"); + assert!( + is_gdal_api_configured(), + "is_gdal_api_configured should return true after initialization" + ); + } + + /// `with_global_gdal_api` should pass a valid reference to the closure. + #[test] + fn test_with_global_gdal_api() { + let name = with_global_gdal_api(|api| api.name().to_string()) + .expect("with_global_gdal_api should succeed"); + assert!(!name.is_empty(), "API name should not be empty"); + } +} diff --git a/c/sedona-gdal/src/lib.rs b/c/sedona-gdal/src/lib.rs new file mode 100644 index 000000000..e05a3bd02 --- /dev/null +++ b/c/sedona-gdal/src/lib.rs @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// --- FFI layer --- +pub(crate) mod dyn_load; +pub mod gdal_dyn_bindgen; + +// --- Error types --- +pub mod errors; + +// --- Core API --- +pub mod gdal_api; +pub mod global; diff --git a/docs/contributors-guide.md b/docs/contributors-guide.md index 1af286abb..f215fc80f 100644 --- a/docs/contributors-guide.md +++ b/docs/contributors-guide.md @@ -66,12 +66,16 @@ Your first step is to create a personal copy of the repository and connect it to upstream https://github.com/apache/sedona-db.git (fetch) upstream https://github.com/apache/sedona-db.git (push) ``` + ## System dependencies -Some crates in the workspace wrap native libraries and require system dependencies (GEOS, PROJ, Abseil, OpenSSL, CMake, etc.). We recommend using: +Some crates in the workspace wrap native libraries and require system dependencies (GEOS, GDAL, PROJ, Abseil, OpenSSL, CMake, etc.). We recommend using: ### macOS: Homebrew -``` bash brew install abseil openssl cmake geos proj ``` + +```bash +brew install abseil openssl cmake geos gdal proj +``` Ensure Homebrew-installed tools are on your PATH (Homebrew usually does this automatically). @@ -110,7 +114,7 @@ cd C:\dev\vcpkg Next, install the required libraries with vcpkg: ```powershell -C:\dev\vcpkg\vcpkg.exe install geos proj abseil openssl +C:\dev\vcpkg\vcpkg.exe install geos gdal proj abseil openssl ``` Configure environment variables (PowerShell example — update paths as needed): @@ -153,7 +157,7 @@ Linux users may install system dependencies from a system package manager. Note Ubuntu/Debian (Ubuntu 24.04 LTS is too old; however, later versions have the required version of Abseil) ```shell -sudo apt-get install -y build-essential cmake libssl-dev libproj-dev libgeos-dev python3-dev libabsl-dev +sudo apt-get install -y build-essential cmake libssl-dev libproj-dev libgeos-dev libgdal-dev python3-dev libabsl-dev ``` ## Rust diff --git a/python/sedonadb/Cargo.toml b/python/sedonadb/Cargo.toml index 177e96cee..c12fd3687 100644 --- a/python/sedonadb/Cargo.toml +++ b/python/sedonadb/Cargo.toml @@ -48,6 +48,7 @@ sedona-geometry = { workspace = true } sedona-expr = { workspace = true } sedona-geoparquet = { workspace = true } sedona-schema = { workspace = true } +sedona-gdal = { workspace = true } sedona-proj = { workspace = true } sedona-tg = { workspace = true } serde_json = { workspace = true } diff --git a/python/sedonadb/python/sedonadb/__init__.py b/python/sedonadb/python/sedonadb/__init__.py index 2b75c4859..10c11ccb7 100644 --- a/python/sedonadb/python/sedonadb/__init__.py +++ b/python/sedonadb/python/sedonadb/__init__.py @@ -15,16 +15,18 @@ # specific language governing permissions and limitations # under the License. from sedonadb import _lib -from sedonadb.context import connect, configure_proj +from sedonadb.context import connect, configure_proj, configure_gdal, gdal_version __version__ = _lib.sedona_python_version() __features__ = _lib.sedona_python_features() -__all__ = ["connect", "options"] +__all__ = ["connect", "options", "gdal_version"] -# Attempt to configure PROJ on import. This will warn if PROJ -# can't be configured but should never error. The auto-configured -# value can be overridden as long as the call to configure_proj() -# occurs before actually creating a transform. +# Attempt to configure PROJ and GDAL on import. This will warn if PROJ +# or GDAL can't be configured but should never error. The auto-configured +# values can be overridden as long as configure_proj() is called before +# creating a transform and configure_gdal() is called before any +# GDAL-backed operation (e.g., raster I/O). configure_proj("auto") +configure_gdal(preset="auto") diff --git a/python/sedonadb/python/sedonadb/context.py b/python/sedonadb/python/sedonadb/context.py index 5ad0a5f56..09efea055 100644 --- a/python/sedonadb/python/sedonadb/context.py +++ b/python/sedonadb/python/sedonadb/context.py @@ -22,7 +22,12 @@ from pathlib import Path from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union -from sedonadb._lib import InternalContext, configure_proj_shared +from sedonadb._lib import ( + InternalContext, + configure_gdal_shared, + configure_proj_shared, + gdal_version as _gdal_version, +) from sedonadb._options import Options from sedonadb.dataframe import DataFrame, _create_data_frame from sedonadb.functions import Functions @@ -585,13 +590,18 @@ def _configure_proj_pyproj(): ) -def _configure_proj_system(): +def _proj_lib_name() -> str: + if sys.platform == "darwin": + return "libproj.dylib" + if sys.platform.startswith("linux"): + return "libproj.so" if sys.platform == "win32": - configure_proj(shared_library="proj.dll") - elif sys.platform == "darwin": - configure_proj(shared_library="libproj.dylib") - else: - configure_proj(shared_library="libproj.so") + return "proj.dll" + raise ValueError(f"Unsupported platform: {sys.platform}") + + +def _configure_proj_system(): + configure_proj(shared_library=_proj_lib_name()) def _configure_proj_prefix(prefix: str): @@ -599,8 +609,214 @@ def _configure_proj_prefix(prefix: str): if not prefix.exists(): raise ValueError(f"Can't configure PROJ from prefix '{prefix}': does not exist") + if sys.platform == "win32": + shared_library = prefix / "Library" / "bin" / _proj_lib_name() + else: + shared_library = prefix / "lib" / _proj_lib_name() + configure_proj( - shared_library=Path(prefix) / "lib" / "libproj.dylib", - database_path=Path(prefix) / "share" / "proj" / "proj.db", - search_path=Path(prefix) / "share" / "proj", + shared_library=shared_library, + database_path=prefix / "share" / "proj" / "proj.db", + search_path=prefix / "share" / "proj", ) + + +def configure_gdal( + preset: Optional[ + Literal["auto", "rasterio", "pyogrio", "conda", "homebrew", "system"] + ] = None, + *, + shared_library: Optional[Union[str, Path]] = None, + verbose: bool = False, +) -> None: + """Configure GDAL source + + SedonaDB loads GDAL dynamically at runtime. This is normally configured + on package load but may need additional configuration (particularly if the + automatic configuration fails). + + This function may be called at any time; however, once a GDAL-backed + operation has been performed, subsequent configuration has no effect. + + Args: + preset: One of: + - None: Use a custom `shared_library` path. + - auto: Try all presets in the order pyogrio, rasterio, conda, + homebrew, system and warn if none succeeded. + - pyogrio: Attempt to use the GDAL shared library bundled with + pyogrio. This aligns the GDAL version with the one used by + `read_pyogrio()` / `geopandas.read_file()`. + - rasterio: Attempt to use the GDAL shared library bundled with + rasterio. + - conda: Attempt to load libgdal installed via + `conda install libgdal`. + - homebrew: Attempt to load libgdal installed via + `brew install gdal`. + - system: Attempt to load libgdal from a directory already on + LD_LIBRARY_PATH (Linux), DYLD_LIBRARY_PATH (macOS), or PATH + (Windows). + + shared_library: Path to a GDAL shared library. + verbose: If True, print information about the configuration process. + + Examples: + + >>> sedona.db.configure_gdal("auto") + """ + if preset is not None: + if preset == "pyogrio": + _configure_gdal_pyogrio() + return + elif preset == "rasterio": + _configure_gdal_rasterio() + return + elif preset == "conda": + _configure_gdal_conda() + return + elif preset == "homebrew": + prefix = os.environ.get("HOMEBREW_PREFIX", "/opt/homebrew") + shared_library = Path(prefix) / "lib" / _gdal_lib_name() + elif preset == "system": + shared_library = _gdal_lib_name() + elif preset == "auto": + tried = ["rasterio", "pyogrio", "conda", "homebrew", "system"] + errors = [] + for option in tried: + try: + configure_gdal(preset=option) + + if verbose: + print(f"Configured GDAL using '{option}'") + + return + except Exception as e: + if verbose: + print(f"Failed to configure GDAL using '{option}': {e}") + else: + errors.append(f"{option}: {e}") + + import warnings + + all_errors = "\n".join(errors) + warnings.warn( + "Failed to configure GDAL. Is rasterio, pyogrio, or a system install of GDAL available?" + f"\nDetails: tried {tried}\n{all_errors}" + ) + return + else: + raise ValueError(f"Unknown preset: {preset}") + + if shared_library is None: + raise ValueError("Must provide shared_library or preset") + + shared_library = Path(shared_library) + try: + import ctypes + + ctypes.CDLL(str(shared_library)) + except OSError as e: + raise ValueError(f"Can't load GDAL shared library '{shared_library}': {e}") + + configure_gdal_shared(str(shared_library)) + + +def _gdal_lib_name() -> str: + if sys.platform == "darwin": + return "libgdal.dylib" + if sys.platform.startswith("linux"): + return "libgdal.so" + if sys.platform == "win32": + return "gdal.dll" + raise ValueError(f"Unsupported platform: {sys.platform}") + + +def _find_gdal_in_package(pkg_name: str) -> Path: + """Locate the bundled GDAL shared library inside a pip-installed package. + + Pip wheels on macOS place vendored dylibs in `/.dylibs/`, + while on Linux `auditwheel` places them in `.libs/` next + to the package directory. Windows wheels use the same `.libs` layout. + + Returns the path to the single matching GDAL library file. + + Raises: + ValueError: If the package cannot be imported, the expected + directory does not exist, or exactly one GDAL library + cannot be found. + """ + import importlib + + pkg = importlib.import_module(pkg_name) + pkg_dir = Path(pkg.__file__).parent + + if sys.platform == "darwin": + dylibs_dir = pkg_dir / ".dylibs" + if not dylibs_dir.exists(): + raise ValueError( + f"Expected GDAL dylib directory '{dylibs_dir}' does not exist" + ) + possible_files = list(dylibs_dir.glob("libgdal*.dylib*")) + else: + dylibs_dir = pkg_dir.parent / f"{pkg_name}.libs" + if not dylibs_dir.exists(): + raise ValueError( + f"Expected GDAL dll/so directory '{dylibs_dir}' does not exist" + ) + possible_files = list(dylibs_dir.glob("gdal*.dll")) + possible_files.extend(dylibs_dir.glob("libgdal*.so*")) + + if len(possible_files) != 1: + all_files = "\n".join(str(s) for s in dylibs_dir.iterdir()) + raise ValueError( + f"Can't find exactly one GDAL shared library in '{dylibs_dir}'. " + f"{len(possible_files)} possible matches:\n{all_files}" + ) + + return possible_files[0] + + +def _configure_gdal_pyogrio(): + configure_gdal(shared_library=_find_gdal_in_package("pyogrio")) + + +def _configure_gdal_rasterio(): + configure_gdal(shared_library=_find_gdal_in_package("rasterio")) + + +def _configure_gdal_conda(): + conda_prefix = os.environ.get("CONDA_PREFIX") + if not conda_prefix: + raise ValueError("CONDA_PREFIX environment variable is not set") + + prefix = Path(conda_prefix) + if not prefix.exists(): + raise ValueError( + f"Can't configure GDAL from CONDA_PREFIX '{prefix}': does not exist" + ) + + if sys.platform == "win32": + shared_library = prefix / "Library" / "bin" / "gdal.dll" + else: + shared_library = prefix / "lib" / _gdal_lib_name() + + configure_gdal(shared_library=shared_library) + + +def gdal_version() -> Optional[str]: + """Return the GDAL release version string, or ``None`` if GDAL is not loaded. + + This function triggers lazy GDAL initialization if ``configure_gdal()`` + was previously called but the library has not yet been loaded. If GDAL + cannot be loaded, ``None`` is returned instead of raising an error. + + Returns: + A version string such as ``"3.8.4"``, or ``None`` if GDAL is + not available. + + Examples: + + >>> import sedonadb + >>> sedonadb.gdal_version() # doctest: +SKIP + '3.8.4' + """ + return _gdal_version() diff --git a/python/sedonadb/src/lib.rs b/python/sedonadb/src/lib.rs index 6a316964e..7187675db 100644 --- a/python/sedonadb/src/lib.rs +++ b/python/sedonadb/src/lib.rs @@ -17,6 +17,7 @@ use crate::{error::PySedonaError, udf::sedona_scalar_udf}; use pyo3::{ffi::Py_uintptr_t, prelude::*}; use sedona_adbc::AdbcSedonadbDriverInit; +use sedona_gdal::global::{configure_global_gdal_api, with_global_gdal_api, GdalApiBuilder}; use sedona_proj::register::{configure_global_proj_engine, ProjCrsEngineBuilder}; use std::ffi::c_void; @@ -95,12 +96,31 @@ fn configure_proj_shared( Ok(()) } +#[pyfunction] +fn configure_gdal_shared(shared_library_path: String) -> Result<(), PySedonaError> { + let builder = GdalApiBuilder::default().with_shared_library(shared_library_path.into()); + configure_global_gdal_api(builder).map_err(|e| { + PySedonaError::SedonaPython(format!("Failed to configure GDAL shared library: {e}")) + })?; + Ok(()) +} + +#[pyfunction] +fn gdal_version() -> Result, PySedonaError> { + match with_global_gdal_api(|api| api.version_info("RELEASE_NAME")) { + Ok(version) if !version.is_empty() => Ok(Some(version)), + _ => Ok(None), + } +} + #[pymodule] fn _lib(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { #[cfg(feature = "mimalloc")] configure_tg_allocator(); m.add_function(wrap_pyfunction!(configure_proj_shared, m)?)?; + m.add_function(wrap_pyfunction!(configure_gdal_shared, m)?)?; + m.add_function(wrap_pyfunction!(gdal_version, m)?)?; m.add_function(wrap_pyfunction!(sedona_adbc_driver_init, m)?)?; m.add_function(wrap_pyfunction!(sedona_python_version, m)?)?; m.add_function(wrap_pyfunction!(sedona_python_features, m)?)?; diff --git a/python/sedonadb/tests/test_gdal.py b/python/sedonadb/tests/test_gdal.py new file mode 100644 index 000000000..f71c5f15f --- /dev/null +++ b/python/sedonadb/tests/test_gdal.py @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import sedonadb + + +def test_gdal_version(): + """Verify that GDAL was loaded successfully and reports a valid version.""" + version = sedonadb.gdal_version() + assert version is not None, "GDAL should be loaded (gdal_version() returned None)" + assert len(version) > 0, "GDAL version string should not be empty" + + # GDAL versions look like "3.8.4" or "3.12.0" + parts = version.split(".") + assert len(parts) >= 2, f"Expected dotted version string, got: {version}" + major = int(parts[0]) + assert major >= 3, f"Expected GDAL >= 3.x, got: {version}" diff --git a/rust/sedona/Cargo.toml b/rust/sedona/Cargo.toml index 0c6fb4c92..051c7841b 100644 --- a/rust/sedona/Cargo.toml +++ b/rust/sedona/Cargo.toml @@ -41,6 +41,7 @@ tg = ["dep:sedona-tg"] http = ["object_store/http"] pointcloud = ["dep:sedona-pointcloud"] proj = ["sedona-proj/proj-sys"] +gdal = ["sedona-gdal/gdal-sys"] spatial-join = ["dep:sedona-spatial-join"] s2geography = ["dep:sedona-s2geography"] @@ -77,6 +78,7 @@ sedona-geoparquet = { workspace = true } sedona-geos = { workspace = true, optional = true } sedona-pointcloud = { workspace = true, optional = true } sedona-proj = { workspace = true } +sedona-gdal = { workspace = true } sedona-raster-functions = { workspace = true } sedona-schema = { workspace = true } sedona-spatial-join = { workspace = true, optional = true }