From 5beaa278f46ae1f7189b0288bea287149fa2ee0c Mon Sep 17 00:00:00 2001 From: Subham Sangwan Date: Sat, 22 Nov 2025 03:33:12 +0530 Subject: [PATCH 1/5] feat: add to_sedonadb() method to SedonaSpark DataFrame --- python/sedona/spark/__init__.py | 20 +++++++++ python/tests/test_to_sedonadb.py | 71 ++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 python/tests/test_to_sedonadb.py diff --git a/python/sedona/spark/__init__.py b/python/sedona/spark/__init__.py index 707aa24c95e..394b770e224 100644 --- a/python/sedona/spark/__init__.py +++ b/python/sedona/spark/__init__.py @@ -71,3 +71,23 @@ from sedona.spark.utils.adapter import Adapter from sedona.spark.utils.spatial_rdd_parser import GeoData from sedona.spark.utils.structured_adapter import StructuredAdapter + +from pyspark.sql import DataFrame + +def to_sedonadb(self, connection=None): + """ + Converts a SedonaSpark DataFrame to a SedonaDB DataFrame. + :param connection: Optional SedonaDB connection object. If None, a new connection will be created. + :return: SedonaDB DataFrame + """ + try: + import sedona.db + except ImportError: + raise ImportError("SedonaDB is not installed. Please install it using `pip install sedona-db`.") + + if connection is None: + connection = sedona.db.connect() + + return connection.create_data_frame(dataframe_to_arrow(self)) + +DataFrame.to_sedonadb = to_sedonadb diff --git a/python/tests/test_to_sedonadb.py b/python/tests/test_to_sedonadb.py new file mode 100644 index 00000000000..32d6c7df140 --- /dev/null +++ b/python/tests/test_to_sedonadb.py @@ -0,0 +1,71 @@ + +import unittest +import sys +from unittest.mock import MagicMock, patch +from pyspark.sql import SparkSession, DataFrame +from sedona.spark import * + +class TestToSedonaDB(unittest.TestCase): + + def setUp(self): + # Mock sedona.db to avoid ImportError + self.mock_sedona_db = MagicMock() + sys.modules["sedona.db"] = self.mock_sedona_db + import sedona + sedona.db = self.mock_sedona_db + self.spark = SparkSession.builder.getOrCreate() + + def tearDown(self): + if "sedona.db" in sys.modules: + del sys.modules["sedona.db"] + import sedona + if hasattr(sedona, "db"): + del sedona.db + + @patch('sedona.spark.dataframe_to_arrow') + def test_to_sedonadb_no_connection(self, mock_dataframe_to_arrow): + # Mock dependencies + mock_arrow_table = MagicMock() + mock_dataframe_to_arrow.return_value = mock_arrow_table + + mock_connection = MagicMock() + self.mock_sedona_db.connect.return_value = mock_connection + + mock_sedonadb_df = MagicMock() + mock_connection.create_data_frame.return_value = mock_sedonadb_df + + # Create a dummy Spark DataFrame + df = self.spark.range(1) + + # Call the method + result = df.to_sedonadb() + + # Verify calls + self.mock_sedona_db.connect.assert_called_once() + mock_dataframe_to_arrow.assert_called_once_with(df) + mock_connection.create_data_frame.assert_called_once_with(mock_arrow_table) + self.assertEqual(result, mock_sedonadb_df) + + @patch('sedona.spark.dataframe_to_arrow') + def test_to_sedonadb_with_connection(self, mock_dataframe_to_arrow): + # Mock dependencies + mock_arrow_table = MagicMock() + mock_dataframe_to_arrow.return_value = mock_arrow_table + + mock_connection = MagicMock() + mock_sedonadb_df = MagicMock() + mock_connection.create_data_frame.return_value = mock_sedonadb_df + + # Create a dummy Spark DataFrame + df = self.spark.range(1) + + # Call the method + result = df.to_sedonadb(connection=mock_connection) + + # Verify calls + mock_dataframe_to_arrow.assert_called_once_with(df) + mock_connection.create_data_frame.assert_called_once_with(mock_arrow_table) + self.assertEqual(result, mock_sedonadb_df) + +if __name__ == '__main__': + unittest.main() From 27e16141245739dca2c3afeb5c4dff9f85ec1805 Mon Sep 17 00:00:00 2001 From: Subham Sangwan Date: Sat, 22 Nov 2025 03:41:57 +0530 Subject: [PATCH 2/5] style: fix pre-commit errors (license header and formatting) --- python/sedona/spark/__init__.py | 6 +++++- python/tests/test_to_sedonadb.py | 36 +++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/python/sedona/spark/__init__.py b/python/sedona/spark/__init__.py index 394b770e224..ac3c18b7176 100644 --- a/python/sedona/spark/__init__.py +++ b/python/sedona/spark/__init__.py @@ -74,6 +74,7 @@ from pyspark.sql import DataFrame + def to_sedonadb(self, connection=None): """ Converts a SedonaSpark DataFrame to a SedonaDB DataFrame. @@ -83,11 +84,14 @@ def to_sedonadb(self, connection=None): try: import sedona.db except ImportError: - raise ImportError("SedonaDB is not installed. Please install it using `pip install sedona-db`.") + raise ImportError( + "SedonaDB is not installed. Please install it using `pip install sedona-db`." + ) if connection is None: connection = sedona.db.connect() return connection.create_data_frame(dataframe_to_arrow(self)) + DataFrame.to_sedonadb = to_sedonadb diff --git a/python/tests/test_to_sedonadb.py b/python/tests/test_to_sedonadb.py index 32d6c7df140..862c74056eb 100644 --- a/python/tests/test_to_sedonadb.py +++ b/python/tests/test_to_sedonadb.py @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. import unittest import sys @@ -5,6 +21,7 @@ from pyspark.sql import SparkSession, DataFrame from sedona.spark import * + class TestToSedonaDB(unittest.TestCase): def setUp(self): @@ -12,6 +29,7 @@ def setUp(self): self.mock_sedona_db = MagicMock() sys.modules["sedona.db"] = self.mock_sedona_db import sedona + sedona.db = self.mock_sedona_db self.spark = SparkSession.builder.getOrCreate() @@ -19,24 +37,25 @@ def tearDown(self): if "sedona.db" in sys.modules: del sys.modules["sedona.db"] import sedona + if hasattr(sedona, "db"): del sedona.db - @patch('sedona.spark.dataframe_to_arrow') + @patch("sedona.spark.dataframe_to_arrow") def test_to_sedonadb_no_connection(self, mock_dataframe_to_arrow): # Mock dependencies mock_arrow_table = MagicMock() mock_dataframe_to_arrow.return_value = mock_arrow_table - + mock_connection = MagicMock() self.mock_sedona_db.connect.return_value = mock_connection - + mock_sedonadb_df = MagicMock() mock_connection.create_data_frame.return_value = mock_sedonadb_df # Create a dummy Spark DataFrame df = self.spark.range(1) - + # Call the method result = df.to_sedonadb() @@ -46,19 +65,19 @@ def test_to_sedonadb_no_connection(self, mock_dataframe_to_arrow): mock_connection.create_data_frame.assert_called_once_with(mock_arrow_table) self.assertEqual(result, mock_sedonadb_df) - @patch('sedona.spark.dataframe_to_arrow') + @patch("sedona.spark.dataframe_to_arrow") def test_to_sedonadb_with_connection(self, mock_dataframe_to_arrow): # Mock dependencies mock_arrow_table = MagicMock() mock_dataframe_to_arrow.return_value = mock_arrow_table - + mock_connection = MagicMock() mock_sedonadb_df = MagicMock() mock_connection.create_data_frame.return_value = mock_sedonadb_df # Create a dummy Spark DataFrame df = self.spark.range(1) - + # Call the method result = df.to_sedonadb(connection=mock_connection) @@ -67,5 +86,6 @@ def test_to_sedonadb_with_connection(self, mock_dataframe_to_arrow): mock_connection.create_data_frame.assert_called_once_with(mock_arrow_table) self.assertEqual(result, mock_sedonadb_df) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() From 221a9cd99a144b35ea9bbd593beed99d8f4f0596 Mon Sep 17 00:00:00 2001 From: Subham Sangwan Date: Fri, 27 Mar 2026 11:48:32 +0530 Subject: [PATCH 3/5] fix: address Copilot AI review feedback for to_sedonadb() --- python/sedona/spark/__init__.py | 13 +++++----- python/tests/test_to_sedonadb.py | 42 +++++++++++++++++++++++++++++--- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/python/sedona/spark/__init__.py b/python/sedona/spark/__init__.py index ac3c18b7176..943b4b89cec 100644 --- a/python/sedona/spark/__init__.py +++ b/python/sedona/spark/__init__.py @@ -81,14 +81,13 @@ def to_sedonadb(self, connection=None): :param connection: Optional SedonaDB connection object. If None, a new connection will be created. :return: SedonaDB DataFrame """ - try: - import sedona.db - except ImportError: - raise ImportError( - "SedonaDB is not installed. Please install it using `pip install sedona-db`." - ) - if connection is None: + try: + import sedona.db + except ImportError: + raise ImportError( + "SedonaDB is not installed. Please install it using `pip install sedona-db`." + ) connection = sedona.db.connect() return connection.create_data_frame(dataframe_to_arrow(self)) diff --git a/python/tests/test_to_sedonadb.py b/python/tests/test_to_sedonadb.py index 862c74056eb..ec3b56c1b9b 100644 --- a/python/tests/test_to_sedonadb.py +++ b/python/tests/test_to_sedonadb.py @@ -25,20 +25,33 @@ class TestToSedonaDB(unittest.TestCase): def setUp(self): + # Preserve any existing sedona.db module and sedona.db attribute + self._original_sedona_db_module = sys.modules.get("sedona.db") + import sedona + + self._had_sedona_db_attr = hasattr(sedona, "db") + self._original_sedona_db_attr = getattr(sedona, "db", None) + # Mock sedona.db to avoid ImportError self.mock_sedona_db = MagicMock() sys.modules["sedona.db"] = self.mock_sedona_db - import sedona - sedona.db = self.mock_sedona_db self.spark = SparkSession.builder.getOrCreate() def tearDown(self): - if "sedona.db" in sys.modules: + self.spark.stop() + # Restore prior sys.modules['sedona.db'] state + if self._original_sedona_db_module is not None: + sys.modules["sedona.db"] = self._original_sedona_db_module + elif "sedona.db" in sys.modules: del sys.modules["sedona.db"] + import sedona - if hasattr(sedona, "db"): + # Restore prior sedona.db attribute state + if self._had_sedona_db_attr: + sedona.db = self._original_sedona_db_attr + elif hasattr(sedona, "db"): del sedona.db @patch("sedona.spark.dataframe_to_arrow") @@ -86,6 +99,27 @@ def test_to_sedonadb_with_connection(self, mock_dataframe_to_arrow): mock_connection.create_data_frame.assert_called_once_with(mock_arrow_table) self.assertEqual(result, mock_sedonadb_df) + def test_to_sedonadb_import_error(self): + # Temporarily remove sedona.db from sys.modules and sedona + if "sedona.db" in sys.modules: + del sys.modules["sedona.db"] + import sedona + + if hasattr(sedona, "db"): + del sedona.db + + # Create a dummy Spark DataFrame + df = self.spark.range(1) + + # Call the method and expect ImportError + with self.assertRaises(ImportError) as cm: + df.to_sedonadb() + + self.assertEqual( + str(cm.exception), + "SedonaDB is not installed. Please install it using `pip install sedona-db`.", + ) + if __name__ == "__main__": unittest.main() From b8cf8c17aa709fa50f8961637c83b6567e8020d3 Mon Sep 17 00:00:00 2001 From: Subham Sangwan Date: Fri, 27 Mar 2026 12:18:42 +0530 Subject: [PATCH 4/5] fix: address CI failure by improving test robustness and session management --- python/tests/test_to_sedonadb.py | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/python/tests/test_to_sedonadb.py b/python/tests/test_to_sedonadb.py index ec3b56c1b9b..886a2dbe4fe 100644 --- a/python/tests/test_to_sedonadb.py +++ b/python/tests/test_to_sedonadb.py @@ -17,37 +17,32 @@ import unittest import sys +import sedona +import sedona.spark from unittest.mock import MagicMock, patch -from pyspark.sql import SparkSession, DataFrame -from sedona.spark import * +from pyspark.sql import SparkSession class TestToSedonaDB(unittest.TestCase): def setUp(self): - # Preserve any existing sedona.db module and sedona.db attribute + # Preserve original state self._original_sedona_db_module = sys.modules.get("sedona.db") - import sedona - self._had_sedona_db_attr = hasattr(sedona, "db") self._original_sedona_db_attr = getattr(sedona, "db", None) - - # Mock sedona.db to avoid ImportError - self.mock_sedona_db = MagicMock() - sys.modules["sedona.db"] = self.mock_sedona_db - sedona.db = self.mock_sedona_db + + # Use getOrCreate without stopping it in tearDown to avoid CI crashes/hangs + # when other tests might be sharing the same JVM/session. self.spark = SparkSession.builder.getOrCreate() def tearDown(self): - self.spark.stop() # Restore prior sys.modules['sedona.db'] state + if "sedona.db" in sys.modules: + del sys.modules["sedona.db"] if self._original_sedona_db_module is not None: sys.modules["sedona.db"] = self._original_sedona_db_module - elif "sedona.db" in sys.modules: - del sys.modules["sedona.db"] import sedona - # Restore prior sedona.db attribute state if self._had_sedona_db_attr: sedona.db = self._original_sedona_db_attr @@ -56,12 +51,16 @@ def tearDown(self): @patch("sedona.spark.dataframe_to_arrow") def test_to_sedonadb_no_connection(self, mock_dataframe_to_arrow): - # Mock dependencies + # Mock sedona.db + mock_sedona_db = MagicMock() + sys.modules["sedona.db"] = mock_sedona_db + sedona.db = mock_sedona_db + mock_arrow_table = MagicMock() mock_dataframe_to_arrow.return_value = mock_arrow_table mock_connection = MagicMock() - self.mock_sedona_db.connect.return_value = mock_connection + mock_sedona_db.connect.return_value = mock_connection mock_sedonadb_df = MagicMock() mock_connection.create_data_frame.return_value = mock_sedonadb_df @@ -73,14 +72,18 @@ def test_to_sedonadb_no_connection(self, mock_dataframe_to_arrow): result = df.to_sedonadb() # Verify calls - self.mock_sedona_db.connect.assert_called_once() + mock_sedona_db.connect.assert_called_once() mock_dataframe_to_arrow.assert_called_once_with(df) mock_connection.create_data_frame.assert_called_once_with(mock_arrow_table) self.assertEqual(result, mock_sedonadb_df) @patch("sedona.spark.dataframe_to_arrow") def test_to_sedonadb_with_connection(self, mock_dataframe_to_arrow): - # Mock dependencies + # Force sedona.db to be missing to ensure it's not required when connection is provided + sys.modules["sedona.db"] = None + if hasattr(sedona, "db"): + del sedona.db + mock_arrow_table = MagicMock() mock_dataframe_to_arrow.return_value = mock_arrow_table @@ -100,11 +103,8 @@ def test_to_sedonadb_with_connection(self, mock_dataframe_to_arrow): self.assertEqual(result, mock_sedonadb_df) def test_to_sedonadb_import_error(self): - # Temporarily remove sedona.db from sys.modules and sedona - if "sedona.db" in sys.modules: - del sys.modules["sedona.db"] - import sedona - + # Force ImportError by setting the module to None in sys.modules + sys.modules["sedona.db"] = None if hasattr(sedona, "db"): del sedona.db @@ -117,7 +117,7 @@ def test_to_sedonadb_import_error(self): self.assertEqual( str(cm.exception), - "SedonaDB is not installed. Please install it using `pip install sedona-db`.", + "SedonaDB is not installed. Please install it using `pip install sedona-db`." ) From 9cebd3e2b01380b068475b65e3550b4549069d6d Mon Sep 17 00:00:00 2001 From: Subham Sangwan Date: Fri, 27 Mar 2026 12:23:01 +0530 Subject: [PATCH 5/5] style: fix pre-commit errors in tests --- python/tests/test_to_sedonadb.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/tests/test_to_sedonadb.py b/python/tests/test_to_sedonadb.py index 886a2dbe4fe..46470e8925f 100644 --- a/python/tests/test_to_sedonadb.py +++ b/python/tests/test_to_sedonadb.py @@ -30,7 +30,7 @@ def setUp(self): self._original_sedona_db_module = sys.modules.get("sedona.db") self._had_sedona_db_attr = hasattr(sedona, "db") self._original_sedona_db_attr = getattr(sedona, "db", None) - + # Use getOrCreate without stopping it in tearDown to avoid CI crashes/hangs # when other tests might be sharing the same JVM/session. self.spark = SparkSession.builder.getOrCreate() @@ -43,6 +43,7 @@ def tearDown(self): sys.modules["sedona.db"] = self._original_sedona_db_module import sedona + # Restore prior sedona.db attribute state if self._had_sedona_db_attr: sedona.db = self._original_sedona_db_attr @@ -117,7 +118,7 @@ def test_to_sedonadb_import_error(self): self.assertEqual( str(cm.exception), - "SedonaDB is not installed. Please install it using `pip install sedona-db`." + "SedonaDB is not installed. Please install it using `pip install sedona-db`.", )