Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
b939673
Add the catalog-driven Spark UDF generator
estebanzimanyi Jun 12, 2026
4876e7d
Drive the full JMEOS surface from the catalog, organized by doxygen g…
estebanzimanyi Jun 13, 2026
de62e78
Generate the portable bare-name dispatch from the contract families
estebanzimanyi Jun 13, 2026
1604174
Make the generated-dispatch surface build cleanly
estebanzimanyi Jun 14, 2026
942f2ce
Wire build-time generation of the UDF surface (generate-sources)
estebanzimanyi Jun 14, 2026
6b0ddd7
Add runtime verification of the generated UDF surface
estebanzimanyi Jun 14, 2026
825b762
Generate the *_in parse functions (const char* -> Java String)
estebanzimanyi Jun 14, 2026
c3810cc
Broaden generated-surface verification to cbuffer + npoint families
estebanzimanyi Jun 14, 2026
ae456d8
Generate uint64_t functions + default codegen output to target/
estebanzimanyi Jun 14, 2026
dc2d2c5
Generate the full portable bare-name operator dispatch surface
estebanzimanyi Jun 14, 2026
45da8d5
Generate the *_as_hexwkb family (swallow size_out, map unsigned char)
estebanzimanyi Jun 14, 2026
dbb61d1
Generate the @sqlfn canonical MobilityDB SQL surface with overload di…
estebanzimanyi Jun 14, 2026
5fe096c
Re-vendor the catalog from the consolidated MEOS-API at pin 14h
estebanzimanyi Jun 14, 2026
a2e71f8
Run the generated-dispatch surface on Java 21 / Spark 3.5 under CI
estebanzimanyi Jun 14, 2026
3ef2d9d
Fix the wrong-type-WKB crash + emit the SQL-faithful arity
estebanzimanyi Jun 14, 2026
d55341f
Bump the CI libmeos pin to ecosystem-pin-2026-06-14i
estebanzimanyi Jun 14, 2026
bd5f924
Generate the atTime/minusTime time-restrict dispatch via MEOS parsers
estebanzimanyi Jun 14, 2026
48155ae
Fail the build if any generated UDF lacks a per-thread MEOS-init guard
estebanzimanyi Jun 14, 2026
66edfd8
Drop the unused hand-registration helpers; the UDF surface is fully g…
estebanzimanyi Jun 14, 2026
044592a
Advance to ecosystem-pin-2026-06-14l (catalog + JMEOS jar + CI pin)
estebanzimanyi Jun 14, 2026
ce83904
Generate array-in + SETOF UDFs for the NxN tgeoarr kernels
estebanzimanyi Jun 15, 2026
332057d
Advance to ecosystem-pin-2026-06-14m (catalog + CI pin)
estebanzimanyi Jun 15, 2026
e53a289
Test the bench's LATERAL VIEW explode consumption of the NxN array UDFs
estebanzimanyi Jun 15, 2026
690093b
Coerce the NxN array UDF dist arg via Number (bare decimal literals)
estebanzimanyi Jun 15, 2026
43f0b83
Advance to ecosystem-pin-2026-06-15a (canonical operator dialect)
estebanzimanyi Jun 15, 2026
f33910a
Advance to ecosystem-pin-2026-06-15c (catalog + JMEOS jar + CI pin)
estebanzimanyi Jun 15, 2026
4a36c2c
Advance to ecosystem-pin-2026-06-15d (h3 @sqlfn fix → geoToH3IndexSet)
estebanzimanyi Jun 15, 2026
a02ece4
Advance pin to ecosystem-pin-2026-06-15e (npoint_test build fix)
estebanzimanyi Jun 15, 2026
c88199c
Fix the th3 spatial prefilter on Spark: EWKT/SRID geo parse, eEq set …
estebanzimanyi Jun 15, 2026
240ca6b
Advance to ecosystem-pin-2026-06-15f (thread-safe geo_from_text; geoT…
estebanzimanyi Jun 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions .github/workflows/maven.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
name: Maven CI

on:
push:
branches: ["**"]
pull_request:

jobs:
# ── Linux ────────────────────────────────────────────────────────────────────
linux:
name: Build and test — Linux (Java 21 / Spark 3.5)
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Set up Java 21
uses: actions/setup-java@v4
with:
distribution: temurin
java-version: "21"
cache: maven

- name: Install MEOS build dependencies
run: |
sudo apt-get update -qq
sudo apt-get install -y \
cmake ninja-build \
libjson-c-dev libgeos-dev libproj-dev libgsl-dev libh3-dev

- name: Checkout MobilityDB source (for MEOS build)
uses: actions/checkout@v4
with:
# Ecosystem pin: the SAME commit the vendored catalog (tools/meos-idl.json)
# and the bundled JMEOS jar are generated against.
repository: estebanzimanyi/MobilityDB
ref: ecosystem-pin-2026-06-15f
path: MobilityDB-src

- name: Build and install libmeos.so
run: |
# The build dir lives inside MobilityDB-src so the vendored pgtypes headers
# ("../../meos/include/...") resolve against the source tree.
cmake -S MobilityDB-src -B MobilityDB-src/meos-build \
-G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DMEOS=ON \
-DCBUFFER=ON -DNPOINT=ON -DPOSE=ON -DRGEO=ON \
-DH3=ON \
-DH3_LIBRARY=/usr/lib/x86_64-linux-gnu/libh3.so \
-DH3_INCLUDE_DIR=/usr/include/h3
cmake --build MobilityDB-src/meos-build -j
sudo cmake --install MobilityDB-src/meos-build
echo "LD_LIBRARY_PATH=/usr/local/lib" >> "$GITHUB_ENV"

- name: Install the bundled JMEOS jar into the local repo
# The generator reads this jar's symbols at build time and the UDFs call it at
# runtime (org.jmeos:meos:1.0); it is not on Maven Central.
run: |
mvn -B install:install-file \
-Dfile=libs/JMEOS.jar \
-DgroupId=org.jmeos -DartifactId=meos -Dversion=1.0 -Dpackaging=jar

- name: Build + generate + unit tests
# generate-sources runs the catalog-driven UDF generator; test exercises the
# generated surface against libmeos through JMEOS.
run: mvn -B clean test
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,6 @@
# Maven
log/
target/
tools/__pycache__/
/target/
src/main/java/org/mobilitydb/spark/generated/
Binary file added libs/JMEOS.jar
Binary file not shown.
96 changes: 91 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,21 @@
<version>1.0-SNAPSHOT</version>

<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<maven.compiler.source>21</maven.compiler.source>
<maven.compiler.target>21</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.13</artifactId>
<version>3.4.0</version>
<version>3.5.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.13</artifactId>
<version>3.4.0</version>
<version>3.5.1</version>
<scope>compile</scope>
</dependency>

Expand All @@ -35,7 +35,93 @@
<dependency>
<groupId>com.github.jnr</groupId>
<artifactId>jnr-ffi</artifactId>
<version>2.2.11</version>
<version>2.2.17</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>5.10.0</version>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<!-- Generate the whole UDF surface from the vendored MEOS-API catalog at
build time (North Star: bindings are GENERATED from MEOS, never hand
written). The generator reads the org.jmeos:meos jar's actual symbols
so it never emits a call to an absent/mismatched JMEOS method. -->
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.1.0</version>
<executions>
<execution>
<id>generate-udfs</id>
<phase>generate-sources</phase>
<goals><goal>exec</goal></goals>
<configuration>
<executable>python3</executable>
<arguments>
<argument>${project.basedir}/tools/codegen_spark_udfs.py</argument>
<argument>--catalog</argument>
<argument>${project.basedir}/tools/meos-idl.json</argument>
<argument>--jar</argument>
<argument>${settings.localRepository}/org/jmeos/meos/1.0/meos-1.0.jar</argument>
<argument>--out</argument>
<argument>${project.build.directory}/generated-sources/spark/org/mobilitydb/spark/generated</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>3.5.0</version>
<executions>
<execution>
<id>add-generated-source</id>
<phase>generate-sources</phase>
<goals><goal>add-source</goal></goals>
<configuration>
<sources>
<source>${project.build.directory}/generated-sources/spark</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.1.2</version>
<configuration>
<useModulePath>false</useModulePath>
<!-- Fork per test class: MEOS global state cannot be re-initialized
in the same JVM after meos_finalize(). -->
<forkCount>1</forkCount>
<reuseForks>false</reuseForks>
<!-- Java 17+ module system: Spark internals access sun.* packages -->
<argLine>
--add-opens=java.base/java.lang=ALL-UNNAMED
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED
--add-opens=java.base/java.lang.reflect=ALL-UNNAMED
--add-opens=java.base/java.io=ALL-UNNAMED
--add-opens=java.base/java.net=ALL-UNNAMED
--add-opens=java.base/java.nio=ALL-UNNAMED
--add-opens=java.base/java.util=ALL-UNNAMED
--add-opens=java.base/java.util.concurrent=ALL-UNNAMED
--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED
--add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED
--add-opens=java.base/java.util.zip=ALL-UNNAMED
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED
--add-opens=java.base/sun.nio.cs=ALL-UNNAMED
--add-opens=java.base/sun.security.action=ALL-UNNAMED
--add-opens=java.base/sun.util.calendar=ALL-UNNAMED
</argLine>
</configuration>
</plugin>
</plugins>
</build>
</project>
84 changes: 84 additions & 0 deletions src/main/java/org/mobilitydb/spark/MeosMemory.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*****************************************************************************
*
* This MobilityDB code is provided under The PostgreSQL License.
* Copyright (c) 2020-2026, Université libre de Bruxelles and MobilityDB
* contributors
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose, without fee, and without a written
* agreement is hereby retained provided that the above copyright notice and
* this paragraph and the following two paragraphs appear in all copies.
*
* IN NO EVENT SHALL UNIVERSITE LIBRE DE BRUXELLES BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
* DOCUMENTATION, EVEN IF UNIVERSITE LIBRE DE BRUXELLES HAS BEEN ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*
* UNIVERSITE LIBRE DE BRUXELLES SPECIFICALLY DISCLAIMS ANY WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND UNIVERSITE LIBRE DE BRUXELLES HAS NO OBLIGATIONS
* TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
*****************************************************************************/

package org.mobilitydb.spark;

import jnr.ffi.Pointer;
import sun.misc.Unsafe;
import java.lang.reflect.Field;

/**
* Native memory management for MEOS objects returned by JNR-FFI calls.
*
* MEOS standalone mode allocates temporal objects with the system malloc
* (palloc/pfree map to malloc/free when not running inside PostgreSQL).
* JNR-FFI Pointer values returned from MEOS functions are raw native
* addresses — they are NOT tracked by the Java GC. Callers must free
* each Pointer explicitly after use, otherwise the native heap grows
* without bound (one leaked Temporal* per UDF call × millions of rows
* in cross-join queries like Q2/Q4/Q5/Q6).
*
* Implementation uses sun.misc.Unsafe.freeMemory() which calls the system
* free() underneath — safe for MEOS pointers since MEOS standalone mode
* uses the system allocator. This avoids JNR-FFI classloader boundary
* issues that arise when loading libc via LibraryLoader inside Spark.
*
* Usage:
* <pre>
* Pointer tptr = GeneratedFunctions.temporal_from_hexwkb(hex);
* try {
* // ... use tptr ...
* } finally {
* MeosMemory.free(tptr);
* }
* </pre>
*/
public final class MeosMemory {

private static final Unsafe UNSAFE;
static {
try {
Field f = Unsafe.class.getDeclaredField("theUnsafe");
f.setAccessible(true);
UNSAFE = (Unsafe) f.get(null);
} catch (ReflectiveOperationException e) {
throw new ExceptionInInitializerError(e);
}
}

private MeosMemory() {}

/** Free a native pointer allocated by MEOS. Null-safe. */
public static void free(Pointer ptr) {
if (ptr != null) UNSAFE.freeMemory(ptr.address());
}

/** Free multiple native pointers in one call. Null-safe. */
public static void free(Pointer... ptrs) {
for (Pointer p : ptrs) {
if (p != null) UNSAFE.freeMemory(p.address());
}
}
}
73 changes: 73 additions & 0 deletions src/main/java/org/mobilitydb/spark/MeosThread.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*****************************************************************************
*
* This MobilityDB code is provided under The PostgreSQL License.
* Copyright (c) 2020-2026, Université libre de Bruxelles and MobilityDB
* contributors
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose, without fee, and without a written
* agreement is hereby granted, provided that the above copyright notice and
* this paragraph and the following two paragraphs appear in all copies.
*
* IN NO EVENT SHALL UNIVERSITE LIBRE DE BRUXELLES BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
* LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION,
* EVEN IF UNIVERSITE LIBRE DE BRUXELLES HAS BEEN ADVISED OF THE POSSIBILITY
* OF SUCH DAMAGE.
*
* UNIVERSITE LIBRE DE BRUXELLES SPECIFICALLY DISCLAIMS ANY WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON
* AN "AS IS" BASIS, AND UNIVERSITE LIBRE DE BRUXELLES HAS NO OBLIGATIONS TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
*****************************************************************************/

package org.mobilitydb.spark;

import functions.GeneratedFunctions;
import functions.error_handler_fn;

/**
* Per-thread MEOS initialisation for Spark executor threads.
*
* In Spark's multi-threaded executor model every task thread initialises
* MEOS independently. meos_initialize() sets up the per-thread MEOS state
* (session_timezone, timezone cache, GEOS context, PROJ context, GSL RNGs,
* errno). The ThreadLocal in MEOS_READY runs initialisation exactly once
* per native thread.
*
* The entire UDF surface is generated (GeneratedSpatioTemporalUDFs); every
* generated entry point calls {@link #ensureReady()} before its first MEOS
* call, so the executor thread running it is always initialised. There are no
* hand-registered UDFs, so this class exposes only the guard — no registration
* helpers.
*/
public final class MeosThread {

private MeosThread() {}

/**
* No-exit MEOS error handler. MEOS's default handler calls
* exit(EXIT_FAILURE) on an ERROR, which would tear down the whole JVM if a
* MEOS error fired inside a Spark task. This handler returns instead of
* exiting; the error still surfaces because MEOS sets meos_errno, which the
* generated wrappers check (MeosErrorHandler.checkError) and rethrow as a
* Java exception. Held as a static field so JNR keeps the native callback
* alive for the process lifetime.
*/
public static final error_handler_fn NOEXIT_ERROR_HANDLER =
(errorLevel, errorCode, errorMessage) -> { /* do not exit the JVM */ };

private static final ThreadLocal<Boolean> MEOS_READY = ThreadLocal.withInitial(() -> {
GeneratedFunctions.meos_initialize();
GeneratedFunctions.meos_initialize_timezone("UTC");
GeneratedFunctions.meos_initialize_error_handler(NOEXIT_ERROR_HANDLER);
return Boolean.TRUE;
});

/** Ensure MEOS is initialised for the calling thread. */
public static void ensureReady() {
MEOS_READY.get();
}
}
Loading
Loading