Skip to content

Commit aa780fb

Browse files
committed
Fix after merging upstream/main.
2 parents c3c5c0b + 4f5274a commit aa780fb

11 files changed

Lines changed: 234 additions & 65 deletions

File tree

.github/workflows/stale.yml

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,28 +17,28 @@
1717
# under the License.
1818
#
1919

20-
name: "Close Stale Issues"
20+
name: "Close Stale Issues and PRs"
2121
on:
2222
schedule:
2323
- cron: '0 0 * * *'
2424

2525
permissions:
2626
# All other permissions are set to none
2727
issues: write
28+
pull-requests: write
2829

2930
jobs:
3031
stale:
3132
if: github.repository_owner == 'apache'
32-
runs-on: ubuntu-22.04
33+
runs-on: ubuntu-24.04
3334
steps:
3435
- uses: actions/stale@v10.2.0
3536
with:
36-
stale-issue-label: 'stale'
37+
# stale issues
38+
stale-issue-label: 'stale,security'
3739
exempt-issue-labels: 'not-stale'
3840
days-before-issue-stale: 180
3941
days-before-issue-close: 14
40-
# Only close stale issues, leave PRs alone
41-
days-before-pr-stale: -1
4242
stale-issue-message: >
4343
This issue has been automatically marked as stale because it has been open for 180 days
4444
with no activity. It will be closed in next 14 days if no further activity occurs. To
@@ -47,3 +47,12 @@ jobs:
4747
close-issue-message: >
4848
This issue has been closed because it has not received any activity in the last 14 days
4949
since being marked as 'stale'
50+
# stale PRs
51+
stale-pr-label: 'stale'
52+
exempt-pr-labels: 'not-stale,security'
53+
stale-pr-message: 'This pull request has been marked as stale due to 30 days of inactivity. It will be closed in 1 week if no further activity occurs. If you think that’s incorrect or this pull request requires a review, please simply write any comment. If closed, you can revive the PR at any time and @mention a reviewer or discuss it on the dev@iceberg.apache.org list. Thank you for your contributions.'
54+
close-pr-message: 'This pull request has been closed due to lack of activity. This is not a judgement on the merit of the PR in any way. It is just a way of keeping the PR queue manageable. If you think that is incorrect, or the pull request requires review, you can revive the PR at any time.'
55+
days-before-pr-stale: 30
56+
days-before-pr-close: 7
57+
ascending: true
58+
operations-per-run: 200

bindings/python/Makefile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,15 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
19-
install:
20-
uv sync --group dev --no-install-project
18+
.PHONY: build install test
2119

2220
build:
2321
uv run maturin develop
2422

23+
# uv sync may remove local editable pyiceberg-core, so install rebuilds it afterwards.
24+
install:
25+
uv sync --group dev --no-install-project
26+
$(MAKE) build
27+
2528
test:
2629
uv run --no-sync pytest

bindings/python/pyproject.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ include = [
5151
[tool.ruff.lint]
5252
ignore = ["F403", "F405"]
5353

54+
[tool.pytest.ini_options]
55+
filterwarnings = [
56+
"error",
57+
]
58+
5459
[dependency-groups]
5560
dev = [
5661
"maturin>=1.0,<2.0",

bindings/python/tests/conftest.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
import pytest
2727
from pydantic_core import to_json
28+
from pyiceberg.utils.config import Config
2829

2930
from pyiceberg.partitioning import PartitionField, PartitionSpec
3031
from pyiceberg.schema import Schema
@@ -36,6 +37,20 @@
3637
)
3738

3839

40+
@pytest.fixture(scope="session", autouse=True)
41+
def isolate_pyiceberg_config() -> Generator[None, None, None]:
42+
monkeypatch = pytest.MonkeyPatch()
43+
with TemporaryDirectory() as empty_home_dir:
44+
monkeypatch.setenv("HOME", empty_home_dir)
45+
monkeypatch.setenv("PYICEBERG_HOME", empty_home_dir)
46+
47+
import pyiceberg.catalog as catalog
48+
49+
monkeypatch.setattr(catalog, "_ENV_CONFIG", Config())
50+
yield
51+
monkeypatch.undo()
52+
53+
3954
@pytest.fixture(scope="session")
4055
def avro_schema_manifest_entry() -> Dict[str, Any]:
4156
return {

bindings/python/tests/test_datafusion_table_provider.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def test_register_iceberg_table_provider(
106106
)
107107

108108
ctx = SessionContext()
109-
ctx.register_table_provider("test", iceberg_table_provider)
109+
ctx.register_table("test", iceberg_table_provider)
110110

111111
datafusion_table = ctx.table("test")
112112
assert datafusion_table is not None
@@ -154,7 +154,7 @@ def __datafusion_table_provider__(self):
154154
)
155155

156156
ctx = SessionContext()
157-
ctx.register_table_provider("test", iceberg_table)
157+
ctx.register_table("test", iceberg_table)
158158

159159
datafusion_table = ctx.table("test")
160160
assert datafusion_table is not None

crates/iceberg/src/arrow/caching_delete_file_loader.rs

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ impl CachingDeleteFileLoader {
235235
PosDelLoadAction::Load => Ok(DeleteFileContext::PosDels {
236236
file_path: task.file_path.clone(),
237237
stream: basic_delete_file_loader
238-
.parquet_to_batch_stream(&task.file_path)
238+
.parquet_to_batch_stream(&task.file_path, task.file_size_in_bytes)
239239
.await?,
240240
}),
241241
}
@@ -254,7 +254,7 @@ impl CachingDeleteFileLoader {
254254
let equality_ids_vec = task.equality_ids.clone().unwrap();
255255
let evolved_stream = BasicDeleteFileLoader::evolve_schema(
256256
basic_delete_file_loader
257-
.parquet_to_batch_stream(&task.file_path)
257+
.parquet_to_batch_stream(&task.file_path, task.file_size_in_bytes)
258258
.await?,
259259
schema,
260260
&equality_ids_vec,
@@ -614,7 +614,10 @@ mod tests {
614614

615615
let basic_delete_file_loader = BasicDeleteFileLoader::new(file_io.clone());
616616
let record_batch_stream = basic_delete_file_loader
617-
.parquet_to_batch_stream(&eq_delete_file_path)
617+
.parquet_to_batch_stream(
618+
&eq_delete_file_path,
619+
std::fs::metadata(&eq_delete_file_path).unwrap().len(),
620+
)
618621
.await
619622
.expect("could not get batch stream");
620623

@@ -811,7 +814,10 @@ mod tests {
811814
let basic_delete_file_loader = BasicDeleteFileLoader::new(file_io.clone());
812815

813816
let batch_stream = basic_delete_file_loader
814-
.parquet_to_batch_stream(&delete_file_path)
817+
.parquet_to_batch_stream(
818+
&delete_file_path,
819+
std::fs::metadata(&delete_file_path).unwrap().len(),
820+
)
815821
.await
816822
.unwrap();
817823

@@ -913,14 +919,16 @@ mod tests {
913919

914920
// Create FileScanTask with BOTH positional and equality deletes
915921
let pos_del = FileScanTaskDeleteFile {
916-
file_path: pos_del_path,
922+
file_path: pos_del_path.clone(),
923+
file_size_in_bytes: std::fs::metadata(&pos_del_path).unwrap().len(),
917924
file_type: DataContentType::PositionDeletes,
918925
partition_spec_id: 0,
919926
equality_ids: None,
920927
};
921928

922929
let eq_del = FileScanTaskDeleteFile {
923930
file_path: eq_delete_path.clone(),
931+
file_size_in_bytes: std::fs::metadata(&eq_delete_path).unwrap().len(),
924932
file_type: DataContentType::EqualityDeletes,
925933
partition_spec_id: 0,
926934
equality_ids: Some(vec![2, 3]), // Only use field IDs that exist in both schemas
@@ -994,7 +1002,7 @@ mod tests {
9941002

9951003
let basic_delete_file_loader = BasicDeleteFileLoader::new(file_io.clone());
9961004
let record_batch_stream = basic_delete_file_loader
997-
.parquet_to_batch_stream(&path)
1005+
.parquet_to_batch_stream(&path, std::fs::metadata(&path).unwrap().len())
9981006
.await
9991007
.expect("could not get batch stream");
10001008

crates/iceberg/src/arrow/delete_file_loader.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,20 +55,21 @@ impl BasicDeleteFileLoader {
5555
pub(crate) async fn parquet_to_batch_stream(
5656
&self,
5757
data_file_path: &str,
58+
file_size_in_bytes: u64,
5859
) -> Result<ArrowRecordBatchStream> {
5960
/*
6061
Essentially a super-cut-down ArrowReader. We can't use ArrowReader directly
6162
as that introduces a circular dependency.
6263
*/
6364
let parquet_metadata =
64-
ArrowReader::load_parquet_metadata(data_file_path, &self.file_io, false, None).await?;
65+
ArrowReader::load_parquet_metadata(data_file_path, &self.file_io, false, file_size_in_bytes, None).await?;
6566

6667
let record_batch_stream = ArrowReader::create_parquet_record_batch_stream_builder(
6768
data_file_path,
6869
self.file_io.clone(),
6970
false,
7071
ArrowReaderOptions::default(),
71-
None,
72+
file_size_in_bytes,
7273
parquet_metadata,
7374
)
7475
.await?
@@ -107,7 +108,9 @@ impl DeleteFileLoader for BasicDeleteFileLoader {
107108
task: &FileScanTaskDeleteFile,
108109
schema: SchemaRef,
109110
) -> Result<ArrowRecordBatchStream> {
110-
let raw_batch_stream = self.parquet_to_batch_stream(&task.file_path).await?;
111+
let raw_batch_stream = self
112+
.parquet_to_batch_stream(&task.file_path, task.file_size_in_bytes)
113+
.await?;
111114

112115
// For equality deletes, only evolve the equality_ids columns.
113116
// For positional deletes (equality_ids is None), use all field IDs.

crates/iceberg/src/arrow/delete_filter.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,20 +378,38 @@ pub(crate) mod tests {
378378

379379
let pos_del_1 = FileScanTaskDeleteFile {
380380
file_path: format!("{}/pos-del-1.parquet", table_location.to_str().unwrap()),
381+
file_size_in_bytes: std::fs::metadata(format!(
382+
"{}/pos-del-1.parquet",
383+
table_location.to_str().unwrap()
384+
))
385+
.unwrap()
386+
.len(),
381387
file_type: DataContentType::PositionDeletes,
382388
partition_spec_id: 0,
383389
equality_ids: None,
384390
};
385391

386392
let pos_del_2 = FileScanTaskDeleteFile {
387393
file_path: format!("{}/pos-del-2.parquet", table_location.to_str().unwrap()),
394+
file_size_in_bytes: std::fs::metadata(format!(
395+
"{}/pos-del-2.parquet",
396+
table_location.to_str().unwrap()
397+
))
398+
.unwrap()
399+
.len(),
388400
file_type: DataContentType::PositionDeletes,
389401
partition_spec_id: 0,
390402
equality_ids: None,
391403
};
392404

393405
let pos_del_3 = FileScanTaskDeleteFile {
394406
file_path: format!("{}/pos-del-3.parquet", table_location.to_str().unwrap()),
407+
file_size_in_bytes: std::fs::metadata(format!(
408+
"{}/pos-del-3.parquet",
409+
table_location.to_str().unwrap()
410+
))
411+
.unwrap()
412+
.len(),
395413
file_type: DataContentType::PositionDeletes,
396414
partition_spec_id: 0,
397415
equality_ids: None,
@@ -477,6 +495,7 @@ pub(crate) mod tests {
477495
predicate: None,
478496
deletes: vec![FileScanTaskDeleteFile {
479497
file_path: "eq-del.parquet".to_string(),
498+
file_size_in_bytes: 1, // never read; this test fails before opening the file
480499
file_type: DataContentType::EqualityDeletes,
481500
partition_spec_id: 0,
482501
equality_ids: None,

0 commit comments

Comments
 (0)