diff --git a/src/common/pool_map.c b/src/common/pool_map.c index c932ae67f6c..358f8dfb5e7 100644 --- a/src/common/pool_map.c +++ b/src/common/pool_map.c @@ -423,18 +423,19 @@ pool_buf_attach(struct pool_buf *buf, struct pool_component *comps, return -DER_INVAL; } + D_DEBUG(DB_TRACE, "nr %d %s\n", nr, pool_comp_type2str(comps[0].co_type)); + if (comps[0].co_type == PO_COMP_TP_TARGET) buf->pb_target_nr++; else if (comps[0].co_type == PO_COMP_TP_RANK) buf->pb_node_nr++; - else + else { buf->pb_domain_nr++; + D_ERROR("pb_domain_nr=%u\n", buf->pb_domain_nr); + } buf->pb_comps[nr] = comps[0]; buf->pb_comps[nr].co_flags &= ~PO_COMPF_CHK_DONE; - - D_DEBUG(DB_TRACE, "nr %d %s\n", nr, - pool_comp_type2str(comps[0].co_type)); } return 0; } @@ -1531,11 +1532,13 @@ add_domain_tree_to_pool_buf(struct pool_map *map, struct pool_buf *map_buf, int return rc; /* discard the root - it's being added to the pool buf elsewhere */ + D_ERROR("ndomains=%u\n", ndomains); rc = d_fd_tree_next(&tree, &node); while (rc == 0) { struct pool_component map_comp = {0}; rc = d_fd_tree_next(&tree, &node); + D_ERROR("d_fd_tree_next() -> rc=%d\n", rc); if (rc != 0) { /* got to the end of the tree with no problems */ if (rc == -DER_NONEXIST) @@ -1628,7 +1631,9 @@ add_domain_tree_to_pool_buf(struct pool_map *map, struct pool_buf *map_buf, int map_comp.co_ver, map_comp.co_in_ver, map_comp.co_fseq, map_comp.co_flags, map_comp.co_nr); + D_ERROR("add_domain_tree_to_pool_buf -> pool_buf_attach() call\n"); rc = pool_buf_attach(map_buf, &map_comp, 1); + D_ERROR("pb_domain_nr=%u\n", map_buf->pb_domain_nr); if (rc != 0) D_ERROR("failed attaching component ID %u to pool buf\n", map_comp.co_id); } @@ -1688,6 +1693,7 @@ gen_pool_buf(struct pool_map *map, struct pool_buf **map_buf_out, int map_versio D_ERROR("failed to calculate number of domains, "DF_RC"\n", DP_RC(rc)); return rc; } + D_ERROR("num_domain_comps=%u\n", num_domain_comps); D_ASSERT(num_domain_comps > 0); num_domain_comps--; /* remove the root domain - allocated separately */ diff --git a/src/control/fault/code/codes.go b/src/control/fault/code/codes.go index bb07f0b89ed..f6e7800a49e 100644 --- a/src/control/fault/code/codes.go +++ b/src/control/fault/code/codes.go @@ -161,6 +161,7 @@ const ( ServerPoolHasContainers ServerPoolMemRatioNoRoles ServerBadFaultDomainLabels + ServerPoolTooFewFaultDomains ServerJoinReplaceEnabledPoolRank ServerRankAdminExcluded ServerTransparentHugepageEnabled diff --git a/src/control/run.sh b/src/control/run.sh new file mode 100755 index 00000000000..7860b65440b --- /dev/null +++ b/src/control/run.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +export LD_LIBRARY_PATH='/opt/daos/lib:/opt/daos/lib64:/opt/daos/lib64/daos_srv:/opt/daos/prereq/debug/mercury/lib:/opt/daos/prereq/debug/spdk/lib64/daos_srv:/opt/daos/prereq/debug/ofi/lib' +export CGO_LDFLAGS='-L/opt/daos/lib -L/opt/daos/lib64 -L/opt/daos/lib64/daos_srv -L/home/michalsk/work/daos-stack/daos/build/debug/gcc/src/control/lib/spdk -L/opt/daos/prereq/debug/mercury/lib -L/opt/daos/prereq/debug/spdk/lib64/daos_srv -L/opt/daos/prereq/debug/ofi/lib' +export CGO_CFLAGS='-I/opt/daos/include -I/opt/daos/prereq/debug/mercury/include -I/opt/daos/prereq/debug/spdk/include/daos_srv -I/opt/daos/prereq/debug/ofi/include -I/opt/daos/prereq/debug/argobots/include -I/home/michalsk/work/daos-stack/daos/src/include' + +/usr/local/go/bin/go test -v ./system/ -run TestSystem_Membership_CompressedFaultDomainTree diff --git a/src/control/server/faults.go b/src/control/server/faults.go index a2e51760026..4359d06f1a6 100644 --- a/src/control/server/faults.go +++ b/src/control/server/faults.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -174,6 +174,15 @@ var FaultPoolMemRatioNoRoles = serverFault( "either remove MD-on-SSD-specific options from the command request or set bdev_roles in "+ "server config file to enable MD-on-SSD") +func FaultPoolTooFewFaultDomains(rdFac int, numDomains int) *fault.Fault { + return serverFault( + code.ServerPoolTooFewFaultDomains, + fmt.Sprintf("pool redundancy factor %d requires at least %d fault domains but only %d are available", + rdFac, rdFac+1, numDomains), + "retry the request with a lower redundancy factor or add more fault domains", + ) +} + func FaultBadFaultDomainLabels(faultPath, addr string, reqLabels, systemLabels []string) *fault.Fault { return serverFault( code.ServerBadFaultDomainLabels, diff --git a/src/control/server/mgmt_pool.go b/src/control/server/mgmt_pool.go index dbe5f0d5ae9..6919af3da92 100644 --- a/src/control/server/mgmt_pool.go +++ b/src/control/server/mgmt_pool.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -414,6 +414,28 @@ func (svc *mgmtSvc) poolCreate(parent context.Context, req *mgmtpb.PoolCreateReq return nil, FaultPoolInvalidServiceReps(maxSvcReps) } + // Check if the requested redundancy factor can be met with the number of supplied fault domains. + domainNr, err := svc.membership.DomainNr(svc.log, req.Ranks...) + if err != nil { + return nil, err + } + // instances := svc.harness.Instances() + // if len(instances) < 1 { + // return nil, errors.New("harness has no managed instances") + // } + // domainNr := rankNr * instances[0].GetTargetCount() + // svc.log.Errorf("rankNr %d, target count %d, domainNr %d", rankNr, instances[0].GetTargetCount(), domainNr) + for _, prop := range req.GetProperties() { + if prop.GetNumber() == uint32(daos.PoolPropertyRedunFac) { + rdFac := int(prop.GetNumval()) + if rdFac+1 > domainNr { + return nil, FaultPoolTooFewFaultDomains(rdFac, domainNr) + } + svc.log.Errorf("rdFac %d does NOT exceed available fault domains %d", rdFac, domainNr) + break + } + } + // IO engine needs the fault domain tree for placement purposes req.FaultDomains, err = svc.membership.CompressedFaultDomainTree(req.Ranks...) if err != nil { diff --git a/src/control/server/mgmt_pool_test.go b/src/control/server/mgmt_pool_test.go index 51a3bdc2813..13035d89ba5 100644 --- a/src/control/server/mgmt_pool_test.go +++ b/src/control/server/mgmt_pool_test.go @@ -1,6 +1,6 @@ // // (C) Copyright 2020-2024 Intel Corporation. -// (C) Copyright 2025 Hewlett Packard Enterprise Development LP +// (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP // // SPDX-License-Identifier: BSD-2-Clause-Patent // @@ -118,6 +118,23 @@ func testPoolLabelProp() []*mgmtpb.PoolProperty { } } +func testPoolRedunFacProp() []*mgmtpb.PoolProperty { + return []*mgmtpb.PoolProperty{ + { + Number: daos.PoolPropertyLabel, + Value: &mgmtpb.PoolProperty_Strval{ + Strval: "test", + }, + }, + { + Number: daos.PoolPropertyRedunFac, + Value: &mgmtpb.PoolProperty_Numval{ + Numval: 1, + }, + }, + } +} + func TestServer_MgmtSvc_PoolCreateAlreadyExists(t *testing.T) { for name, tc := range map[string]struct { state system.PoolServiceState @@ -635,6 +652,33 @@ func TestServer_MgmtSvc_PoolCreate(t *testing.T) { }, expErr: FaultPoolNoLabel, }, + "failed creation too few fault domains": { + targetCount: 1, + req: &mgmtpb.PoolCreateReq{ + Uuid: test.MockUUID(1), + TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + Ranks: []uint32{0}, + Properties: testPoolRedunFacProp(), + }, + expErr: FaultPoolTooFewFaultDomains(1, 2), + }, + "successful creation with rd_fac": { + targetCount: 2, + req: &mgmtpb.PoolCreateReq{ + Uuid: test.MockUUID(1), + TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + Ranks: []uint32{0, 1}, + Properties: testPoolRedunFacProp(), + }, + drpcRet: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + TgtRanks: []uint32{0, 1}, + }, + expResp: &mgmtpb.PoolCreateResp{ + TierBytes: []uint64{100 * humanize.GiByte, 10 * humanize.TByte}, + TgtRanks: []uint32{0, 1}, + }, + }, } { t.Run(name, func(t *testing.T) { buf.Reset() diff --git a/src/control/system/membership.go b/src/control/system/membership.go index 691243c0682..631b66594b4 100644 --- a/src/control/system/membership.go +++ b/src/control/system/membership.go @@ -785,6 +785,55 @@ func (m *Membership) CompressedFaultDomainTree(ranks ...uint32) ([]uint32, error return append([]uint32{md}, compressTree(subtree)...), nil } +func printFaultDomainTree(log logging.Logger, tree *FaultDomainTree, prefix string) { + log.Debugf("%s [%d] %s", prefix, tree.ID, tree.Domain.String()) + for _, child := range tree.Children { + printFaultDomainTree(log, child, prefix+"-") + } +} + +func (m *Membership) DomainNr(log logging.Logger, ranks ...uint32) (int, error) { + tree := m.db.FaultDomainTree() + if tree == nil { + return 0, errors.New("uninitialized fault domain tree") + } + + subtree, err := getFaultDomainSubtree(tree, ranks...) + if err != nil { + return 0, err + } + + log.Debugf("Fault domain tree (depth=%d):", subtree.Depth()) + printFaultDomainTree(log, subtree, "") + + // TODO DAOS-6353: Properly detect when fault and perf domain are requested. + // Currently any depth greater than the minimum must indicate a performance domain. + minDepth := 2 // domain + rank + if tree.Depth() > minDepth { + // Loop over the children of the root and sum up the number their children. + sum := 0 + for _, child := range subtree.Children { + sum += len(child.Children) + } + return sum, nil + } else { + // There are no perf domains, so all children of the root are fault domains. + return len(subtree.Children), nil + } + + // log.Debugf("ndomains=%d", len(subtree.Domains())) + + // nr := 0 + // for i, domain := range subtree.Domains() { + // if rank, isRank := getFaultDomainRank(domain); isRank { + // log.Debugf("domain[%d] = rank %d", i, rank) + // nr++ + // } + // } + + return len(subtree.Domains()), nil +} + const ( DomTreeMetadataHasFaultDom uint32 = (1 << iota) DomTreeMetadataHasPerfDom diff --git a/src/control/system/membership_test.go b/src/control/system/membership_test.go index 7d3e2db8473..81d6668b17b 100644 --- a/src/control/system/membership_test.go +++ b/src/control/system/membership_test.go @@ -1299,10 +1299,11 @@ func TestSystem_Membership_CompressedFaultDomainTree(t *testing.T) { } for name, tc := range map[string]struct { - tree *FaultDomainTree - inputRanks []uint32 - expResult []uint32 - expErr error + tree *FaultDomainTree + inputRanks []uint32 + expResult []uint32 + expErr error + expDomainNr int }{ "nil tree": { expErr: errors.New("uninitialized fault domain tree"), @@ -1315,6 +1316,7 @@ func TestSystem_Membership_CompressedFaultDomainTree(t *testing.T) { ExpFaultDomainID(0), 0, }, + expDomainNr: 0, }, "single branch, no rank leaves": { tree: NewFaultDomainTree( @@ -1335,6 +1337,7 @@ func TestSystem_Membership_CompressedFaultDomainTree(t *testing.T) { ExpFaultDomainID(3), 0, }, + expDomainNr: 1, }, "multi branch, no rank leaves": { tree: NewFaultDomainTree( @@ -1374,6 +1377,7 @@ func TestSystem_Membership_CompressedFaultDomainTree(t *testing.T) { ExpFaultDomainID(8), 0, }, + expDomainNr: 5, }, "single branch with rank leaves": { tree: NewFaultDomainTree( @@ -1395,6 +1399,7 @@ func TestSystem_Membership_CompressedFaultDomainTree(t *testing.T) { 1, 5, }, + expDomainNr: 1, }, "multi branch with rank leaves": { tree: NewFaultDomainTree( @@ -1442,6 +1447,7 @@ func TestSystem_Membership_CompressedFaultDomainTree(t *testing.T) { 4, 5, }, + expDomainNr: 6, }, "intermediate domain has name like rank": { tree: NewFaultDomainTree( @@ -1463,6 +1469,7 @@ func TestSystem_Membership_CompressedFaultDomainTree(t *testing.T) { 1, 1, // rank }, + expDomainNr: 1, }, "request one rank with node only": { tree: NewFaultDomainTree( @@ -1485,6 +1492,7 @@ func TestSystem_Membership_CompressedFaultDomainTree(t *testing.T) { // ranks 4, }, + expDomainNr: 1, }, "request one rank": { tree: NewFaultDomainTree( @@ -1510,6 +1518,7 @@ func TestSystem_Membership_CompressedFaultDomainTree(t *testing.T) { // ranks 4, }, + expDomainNr: 1, }, "request multiple ranks": { tree: NewFaultDomainTree( @@ -1550,6 +1559,7 @@ func TestSystem_Membership_CompressedFaultDomainTree(t *testing.T) { 4, 5, }, + expDomainNr: 4, }, "request nonexistent rank": { tree: NewFaultDomainTree( @@ -1572,12 +1582,15 @@ func TestSystem_Membership_CompressedFaultDomainTree(t *testing.T) { membership := NewMembership(log, db) result, err := membership.CompressedFaultDomainTree(tc.inputRanks...) - test.CmpErr(t, tc.expErr, err) if diff := cmp.Diff(tc.expResult, result); diff != "" { t.Fatalf("(-want, +got): %s", diff) } + + domainNr, err := membership.DomainNr(log, tc.inputRanks...) + test.CmpErr(t, tc.expErr, err) + test.AssertEqual(t, tc.expDomainNr, domainNr, "unexpected domain number") }) } } diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index fad6ef2ea25..54a1a3f8f89 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -809,6 +809,8 @@ init_pool_metadata(struct rdb_tx *tx, const rdb_path_t *kvs, uint32_t nnodes, co D_GOTO(out_prop, rc); } + D_ERROR("===\n"); + D_ERROR("ndomains=%u\n", ndomains); rc = gen_pool_buf(NULL /* map */, &map_buf, map_version, ndomains, nnodes, ntargets, domains, dss_tgt_nr); if (rc != 0) { @@ -822,6 +824,7 @@ init_pool_metadata(struct rdb_tx *tx, const rdb_path_t *kvs, uint32_t nnodes, co if (entry->dpe_val + 1 > map_buf->pb_domain_nr) { D_ERROR("ndomains(%u) could not meet specified redunc factor(%lu)\n", map_buf->pb_domain_nr, entry->dpe_val); + D_ERROR("===\n"); D_GOTO(out_map_buf, rc = -DER_INVAL); } } else { diff --git a/src/tests/ftest/performance/ior_easy.yaml b/src/tests/ftest/performance/ior_easy.yaml index 139f199f53e..34b95ffb0be 100644 --- a/src/tests/ftest/performance/ior_easy.yaml +++ b/src/tests/ftest/performance/ior_easy.yaml @@ -24,7 +24,7 @@ server_config: pool: size: 95% - properties: rd_fac:0,space_rb:0,ec_cell_sz:1MiB + properties: rd_fac:3,space_rb:0,ec_cell_sz:1MiB container: type: POSIX