diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index fb4a5d8ed46..22c05ee730b 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -64,7 +64,7 @@ #define FREE(ptr) do {free(ptr); (ptr) = NULL; } while (0) /* The max number of mount points for DAOS mounted simultaneously */ -#define MAX_DAOS_MT (8) +#define MAX_DAOS_MT (32) #define READ_DIR_BATCH_SIZE (96) #define MAX_FD_DUP2ED (16) @@ -589,11 +589,6 @@ discover_daos_mount_with_env(void) /* env D_IL_MOUNT_POINT is undefined, return success (0) */ D_GOTO(out, rc = 0); - if (num_dfs >= MAX_DAOS_MT) { - D_FATAL("dfs_list[] is full already. Need to increase MAX_DAOS_MT.\n"); - D_GOTO(out, rc = EBUSY); - } - if (access(fs_root, R_OK)) { D_FATAL("no read permission for %s: %d (%s)\n", fs_root, errno, strerror(errno)); D_GOTO(out, rc = EACCES); @@ -602,8 +597,19 @@ discover_daos_mount_with_env(void) /* check whether fs_root exists in dfs_list[] already. "idx >= 0" means exists. */ idx = query_dfs_mount(fs_root); if (idx >= 0) + /* already registered by discover_dfuse_mounts(), no new slot needed */ D_GOTO(out, rc = 0); + /* A new slot is required. If the table is already full, skip this mount point + * gracefully instead of aborting the application. + */ + if (num_dfs >= MAX_DAOS_MT) { + D_WARN("D_IL_MOUNT_POINT ignored: dfs_list[] is full (%d mounts). Increase " + "MAX_DAOS_MT to support more simultaneous mounts.\n", + MAX_DAOS_MT); + D_GOTO(out, rc = 0); + } + /* Not found in existing list, then append this new mount point. */ len_fs_root = strnlen(fs_root, DFS_MAX_PATH); if (len_fs_root >= DFS_MAX_PATH) { @@ -687,32 +693,36 @@ discover_dfuse_mounts(void) } while ((fs_entry = getmntent(fp)) != NULL) { + if (memcmp(fs_entry->mnt_type, STR_AND_SIZE(MNT_TYPE_FUSE)) != 0) + continue; + if (num_dfs >= MAX_DAOS_MT) { - D_FATAL("dfs_list[] is full. Need to increase MAX_DAOS_MT.\n"); - abort(); + D_WARN("Found more than MAX_DAOS_MT (%d) dfuse mount points. " + "Disabling interception. Increase MAX_DAOS_MT to support " + "more simultaneous mounts.\n", + MAX_DAOS_MT); + D_GOTO(out, rc = EOVERFLOW); } - pt_dfs_mt = &dfs_list[num_dfs]; - if (memcmp(fs_entry->mnt_type, STR_AND_SIZE(MNT_TYPE_FUSE)) == 0) { - pt_dfs_mt->dcache = NULL; - pt_dfs_mt->len_fs_root = strnlen(fs_entry->mnt_dir, DFS_MAX_PATH); - if (pt_dfs_mt->len_fs_root >= DFS_MAX_PATH) { - D_DEBUG(DB_ANY, "mnt_dir[] is too long. Skip this entry.\n"); - D_GOTO(out, rc = ENAMETOOLONG); - } - if (access(fs_entry->mnt_dir, R_OK)) { - D_DEBUG(DB_ANY, "no read permission for %s: %d (%s)\n", - fs_entry->mnt_dir, errno, strerror(errno)); - continue; - } - - atomic_init(&pt_dfs_mt->inited, 0); - pt_dfs_mt->pool = NULL; - pt_dfs_mt->cont = NULL; - D_STRNDUP(pt_dfs_mt->fs_root, fs_entry->mnt_dir, pt_dfs_mt->len_fs_root); - if (pt_dfs_mt->fs_root == NULL) - D_GOTO(out, rc = ENOMEM); - num_dfs++; + pt_dfs_mt = &dfs_list[num_dfs]; + pt_dfs_mt->dcache = NULL; + pt_dfs_mt->len_fs_root = strnlen(fs_entry->mnt_dir, DFS_MAX_PATH); + if (pt_dfs_mt->len_fs_root >= DFS_MAX_PATH) { + D_DEBUG(DB_ANY, "mnt_dir[] is too long. Skip this entry.\n"); + D_GOTO(out, rc = ENAMETOOLONG); + } + if (access(fs_entry->mnt_dir, R_OK)) { + D_DEBUG(DB_ANY, "no read permission for %s: %d (%s)\n", fs_entry->mnt_dir, + errno, strerror(errno)); + continue; } + + atomic_init(&pt_dfs_mt->inited, 0); + pt_dfs_mt->pool = NULL; + pt_dfs_mt->cont = NULL; + D_STRNDUP(pt_dfs_mt->fs_root, fs_entry->mnt_dir, pt_dfs_mt->len_fs_root); + if (pt_dfs_mt->fs_root == NULL) + D_GOTO(out, rc = ENOMEM); + num_dfs++; } out: diff --git a/src/tests/ftest/dfuse/pil4dfs_many_mounts.py b/src/tests/ftest/dfuse/pil4dfs_many_mounts.py new file mode 100644 index 00000000000..39b0c1d8ecd --- /dev/null +++ b/src/tests/ftest/dfuse/pil4dfs_many_mounts.py @@ -0,0 +1,149 @@ +""" + (C) Copyright 2026 Hewlett Packard Enterprise Development LP + + SPDX-License-Identifier: BSD-2-Clause-Patent +""" +import os + +from apricot import TestWithServers +from command_utils_base import EnvironmentVariables +from dfuse_utils import get_dfuse, start_dfuse +from run_utils import run_remote + +# Marker printed to stderr by libpil4dfs at process exit when D_IL_REPORT is set +# and interception is enabled. Its presence/absence tells us whether interception +# was active for the process. +INTERCEPT_MARKER = "libpil4dfs intercepting summary" + + +class Pil4dfsManyMounts(TestWithServers): + """Verify libpil4dfs handling of many dfuse mount points (MAX_DAOS_MT). + + libpil4dfs discovers every fuse.daos mount point listed in /proc/self/mounts + when it initializes and stores them in a fixed-size table (MAX_DAOS_MT). When + the number of mount points is at or below the limit, interception is enabled + and used for all of them. When the number exceeds the limit, libpil4dfs must + gracefully disable interception (falling back to dfuse) rather than aborting + the application, so that no core file is produced. + + :avocado: recursive + """ + + def _add_mounts(self, pool, dfuse_hosts, dfuses, mount_dirs, target_count): + """Mount additional dfuse instances until target_count are mounted. + + Args: + pool (TestPool): pool to create the containers in. + dfuse_hosts (NodeSet): hosts on which to mount dfuse. + dfuses (list): list of running dfuse instances, extended in place. + mount_dirs (list): list of mount point directories, extended in place. + target_count (int): total number of dfuse mount points to have mounted. + """ + while len(dfuses) < target_count: + container = self.get_container(pool) + dfuse = get_dfuse(self, dfuse_hosts) + start_dfuse(self, dfuse, pool, container) + dfuses.append(dfuse) + mount_dirs.append(dfuse.mount_dir.value) + + def _verify_case(self, dfuse_hosts, env_str, mount_dirs, expect_intercept): + """Run a single libpil4dfs process across all current mount points and check interception. + + Args: + dfuse_hosts (NodeSet): hosts on which to run the command. + env_str (str): shell prefix that loads libpil4dfs and enables D_IL_REPORT. + mount_dirs (list): mount point directories of all currently mounted dfuse instances. + expect_intercept (bool): whether interception is expected to be enabled. + """ + mount_count = len(mount_dirs) + self.log_step( + f"Case: {mount_count} mount points, " + f"expecting interception to be {'enabled' if expect_intercept else 'disabled'}") + + # A single libpil4dfs-intercepted process that touches every mount point. At + # initialization libpil4dfs discovers all fuse.daos mounts in /proc/self/mounts, + # so this exercises the MAX_DAOS_MT table regardless of which mount is accessed. + stat_cmd = env_str + "stat " + " ".join(mount_dirs) + result = run_remote(self.log, dfuse_hosts, stat_cmd) + + # The process must always complete cleanly, regardless of how many mounts are + # present. Over the limit, libpil4dfs must disable interception gracefully and + # never abort (which would create a core file and fail the CI stage). + if not result.passed: + self.fail( + f"libpil4dfs process failed with {mount_count} mount points on " + f"{result.failed_hosts}; it must never abort") + + intercepted = INTERCEPT_MARKER in result.joined_stdout + + # Log the observed interception status so the test log shows each case behaving + # as expected (interception enabled at/below MAX_DAOS_MT, disabled above it). + self.log.info( + "Case result: %d mount points -> process succeeded, interception %s " + "(expected %s)", mount_count, "enabled" if intercepted else "disabled", + "enabled" if expect_intercept else "disabled") + + if expect_intercept and not intercepted: + self.fail( + f"Expected interception to be enabled with {mount_count} mount points, " + "but the libpil4dfs summary was not found") + if not expect_intercept and intercepted: + self.fail( + f"Expected interception to be disabled with {mount_count} mount points " + "(more than MAX_DAOS_MT), but the libpil4dfs summary was found") + + def test_pil4dfs_many_mounts(self): + """JIRA ID: DAOS-18890. + + Test Description: + Verify libpil4dfs behavior with dfuse mount point counts at/below and + above MAX_DAOS_MT, all within a single test run. No case may produce a + core file. Mounts accumulate across cases (rather than being recreated + for each) so the same dfuse instances are reused as the count grows. + + Steps: + 1.) Create a single pool. + 2.) For each count in intercept_mount_counts (ascending), mount + additional dfuse instances up to that count and confirm a single + libpil4dfs process uses them all (interception enabled). + 3.) Mount additional dfuse instances up to no_intercept_mount_count + (more than MAX_DAOS_MT) and confirm the libpil4dfs process + completes without aborting and with interception disabled. + + :avocado: tags=all,daily_regression + :avocado: tags=vm + :avocado: tags=dfuse,pil4dfs + :avocado: tags=Pil4dfsManyMounts,test_pil4dfs_many_mounts + """ + intercept_mount_counts = sorted(self.params.get( + "intercept_mount_counts", "/run/test/*", [10, 32])) + no_intercept_mount_count = self.params.get( + "no_intercept_mount_count", "/run/test/*", 33) + + lib_path = os.path.join(self.prefix, "lib64", "libpil4dfs.so") + env_str = EnvironmentVariables({ + "LD_PRELOAD": lib_path, + "D_IL_NO_BYPASS": 1, + "D_IL_REPORT": 1 + }).to_export_str() + dfuse_hosts = self.hostlist_clients + + self.log_step("Creating a single pool") + pool = self.get_pool(connect=False) + + dfuses = [] + mount_dirs = [] + try: + # Mounts accumulate across cases: grow up to each target count, verifying + # behavior at each step, rather than recreating mounts for every case. + for target_count in intercept_mount_counts: + self._add_mounts(pool, dfuse_hosts, dfuses, mount_dirs, target_count) + self._verify_case(dfuse_hosts, env_str, mount_dirs, expect_intercept=True) + + self._add_mounts(pool, dfuse_hosts, dfuses, mount_dirs, no_intercept_mount_count) + self._verify_case(dfuse_hosts, env_str, mount_dirs, expect_intercept=False) + finally: + for dfuse in dfuses: + dfuse.stop() + + self.log.info("Test passed") diff --git a/src/tests/ftest/dfuse/pil4dfs_many_mounts.yaml b/src/tests/ftest/dfuse/pil4dfs_many_mounts.yaml new file mode 100644 index 00000000000..8b649ca4c79 --- /dev/null +++ b/src/tests/ftest/dfuse/pil4dfs_many_mounts.yaml @@ -0,0 +1,29 @@ +hosts: + test_servers: 1 + test_clients: 1 +timeout: 900 +server_config: + name: daos_server + engines_per_host: 1 + engines: + 0: + targets: 4 + nr_xs_helpers: 0 + storage: + 0: + class: ram + scm_mount: /mnt/daos + system_ram_reserved: 1 +pool: + size: 1GiB +container: + type: POSIX + control_method: daos +test: + # Mount counts at/below MAX_DAOS_MT for which libpil4dfs enables interception. + intercept_mount_counts: + - 10 + - 32 + # Mount count above MAX_DAOS_MT for which libpil4dfs must gracefully disable + # interception (no abort, no core file). + no_intercept_mount_count: 33