diff --git a/src/client/dfs/dfs_internal.h b/src/client/dfs/dfs_internal.h index 4775edef738..81324daf289 100644 --- a/src/client/dfs/dfs_internal.h +++ b/src/client/dfs/dfs_internal.h @@ -147,7 +147,13 @@ struct dfs_obj { struct { /** Optional tail array object id for progressive layout */ daos_obj_id_t tail_oid; - /** Logical file offset where progressive layout switches to tail */ + /** + * Logical file offset where progressive layout switches to tail. + * Contract: the head array holds logical bytes [0, split_off) and + * the tail array holds logical bytes [split_off, EOF) indexed from 0 + * (i.e. tail index == logical_off - split_off). The IO path must + * honor this base when routing reads/writes/punches. + */ daos_size_t split_off; /** Optional tail array object handle for progressive layout */ daos_handle_t tail_oh; diff --git a/src/client/dfs/file.c b/src/client/dfs/file.c index a2cdb40c72a..d640a7e04a4 100644 --- a/src/client/dfs/file.c +++ b/src/client/dfs/file.c @@ -35,6 +35,13 @@ file_stat(dfs_t *dfs, daos_handle_t head_oh, daos_handle_t tail_oh, bool has_tai if (rc) return daos_der2errno(rc); + /* + * The head holds logical bytes [0, split_off) and the tail holds [split_off, EOF) indexed + * from 0, so the logical size is the sum of both extents. This is correct while the head is + * densely filled up to split_off whenever the tail is non-empty. + * TODO: once the PL IO path lands, account for sparse files written only past split_off + * (where the head extent is shorter than split_off) using split_off explicitly. + */ stbuf->st_size += tail_stbuf.st_size; if (tail_stbuf.st_max_epoch > stbuf->st_max_epoch) stbuf->st_max_epoch = tail_stbuf.st_max_epoch; diff --git a/src/client/dfs/obj.c b/src/client/dfs/obj.c index 655c21b8069..d89e0ad982a 100644 --- a/src/client/dfs/obj.c +++ b/src/client/dfs/obj.c @@ -15,16 +15,15 @@ #include #include +#include #include "dfs_internal.h" -/* 0.2% per-target budget for data that should remain in the compact head object. */ -#define DFS_PL_HEAD_BUDGET_NUM 2ULL -/* Denominator for the head budget fraction. */ -#define DFS_PL_HEAD_BUDGET_DEN 1000ULL -/* Do not switch to the tail before 64 MiB of logical file data. */ -#define DFS_PL_SPLIT_OFF_MIN (64ULL << 20) -/* Cap the head region at 64 GiB even on very large systems. */ -#define DFS_PL_SPLIT_OFF_MAX (64ULL << 30) +/* + * Progressive-layout split-point tuning constants (DFS_PL_HEAD_BUDGET_NUM/DEN, + * DFS_PL_SPLIT_OFF_MIN/MAX) are defined in so the test oracle stays in sync. + */ +/* Minimum pool target count before progressive layout is applied to default-class files. */ +#define DFS_PL_MIN_TARGETS 1000 /* Test-only override to bypass the PL target-count gate for default-selection testing. */ #define DFS_PL_BYPASS_TARGET_LIMIT_ENV "DFS_PL_BYPASS_TARGET_LIMIT" @@ -65,7 +64,7 @@ file_head_oclass(daos_oclass_id_t tail_cid, uint32_t max_groups) static const uint32_t head_groups[] = {32, 16, 12, 8, 6, 4, 2, 1}; enum daos_obj_redun ord; uint32_t group_nr; - int i; + size_t i; if (max_groups == 0) return OC_UNKNOWN; @@ -186,7 +185,7 @@ file_oclasses(dfs_t *dfs, dfs_obj_t *parent, daos_oclass_id_t cid, daos_size_t c if (dfs->pl_target_nr == 0) return 0; - if (dfs->pl_target_nr < 1000 && !pl_bypass_target_limit()) + if (dfs->pl_target_nr < DFS_PL_MIN_TARGETS && !pl_bypass_target_limit()) return 0; /* Use the default DAOS file class as the wide tail and derive the compact head from it. */ @@ -200,9 +199,6 @@ file_oclasses(dfs_t *dfs, dfs_obj_t *parent, daos_oclass_id_t cid, daos_size_t c if (tail_attr == NULL) return EINVAL; - if (max_groups == 0) - goto out; - /* Keep the tail redundancy family and only compact the head by reducing its group count. */ *head_cid = file_head_oclass(*tail_cid, max_groups); if (*head_cid == OC_UNKNOWN) diff --git a/src/include/daos/dfs_lib_int.h b/src/include/daos/dfs_lib_int.h index 7df6d09a5e4..59660353349 100644 --- a/src/include/daos/dfs_lib_int.h +++ b/src/include/daos/dfs_lib_int.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2023 Intel Corporation. + * (C) Copyright 2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -17,6 +18,20 @@ extern "C" { #include #include +/* + * Progressive-layout (PL) tuning constants shared between the DFS client implementation and the + * tests that validate the head/tail split-point selection. Keep these in one place so the + * production logic and the test oracle cannot drift apart. + */ +/** Numerator of the per-target fraction of capacity budgeted to the compact head object (0.2%). */ +#define DFS_PL_HEAD_BUDGET_NUM 2ULL +/** Denominator of the per-target head budget fraction. */ +#define DFS_PL_HEAD_BUDGET_DEN 1000ULL +/** Do not switch to the tail before this much logical file data (64 MiB). */ +#define DFS_PL_SPLIT_OFF_MIN (64ULL << 20) +/** Cap the head region at this size even on very large systems (64 GiB). */ +#define DFS_PL_SPLIT_OFF_MAX (64ULL << 30) + /* * Get the DFS superblock D-Key and A-Keys * diff --git a/src/tests/suite/dfs_unit_test.c b/src/tests/suite/dfs_unit_test.c index afda540c7c4..c6c8bbdd656 100644 --- a/src/tests/suite/dfs_unit_test.c +++ b/src/tests/suite/dfs_unit_test.c @@ -13,11 +13,6 @@ #include #include -#define DFS_PL_HEAD_BUDGET_NUM 2ULL -#define DFS_PL_HEAD_BUDGET_DEN 1000ULL -#define DFS_PL_SPLIT_OFF_MIN (64ULL << 20) -#define DFS_PL_SPLIT_OFF_MAX (64ULL << 30) - /** global DFS mount used for all tests */ static uuid_t co_uuid; static daos_handle_t co_hdl;