From 835468abb7dc5a473d5a32683c91d0c732ae3fb4 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Wed, 25 Mar 2026 18:15:46 +0800 Subject: [PATCH 01/11] feat(new_metrics): support server_stat command showing some important server-level metrics (part 4) --- src/shell/commands/node_management.cpp | 1 + src/utils/metrics.cpp | 52 ++++++++++++++++++-------- src/utils/metrics.h | 29 ++++++++++++-- 3 files changed, 63 insertions(+), 19 deletions(-) diff --git a/src/shell/commands/node_management.cpp b/src/shell/commands/node_management.cpp index c8614b2032..7826f89598 100644 --- a/src/shell/commands/node_management.cpp +++ b/src/shell/commands/node_management.cpp @@ -262,6 +262,7 @@ dsn::metric_filters server_stat_filters() "rdb_manual_compact_queued_tasks", "rdb_manual_compact_running_tasks", }; + filters.as_value = true; return filters; } diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp index d457f1c226..541024fc0c 100644 --- a/src/utils/metrics.cpp +++ b/src/utils/metrics.cpp @@ -256,6 +256,7 @@ std::string metric_filters::to_query_string() const COMBINE_FIELD_PAIR(ids, entity_ids); COMBINE_FIELD_PAIR(attributes, entity_attrs); COMBINE_FIELD_PAIR(metrics, entity_metrics); + fields.push_back(fmt::format("as_value={}", as_value)); #undef COMBINE_FIELD_PAIR @@ -341,16 +342,17 @@ void metrics_http_service::get_metrics_handler(const http_request &req, http_res metric_filters filters; bool with_metric_fields = false; bool detail = false; - for (const auto &field : req.query_args) { - if (field.first == "with_metric_fields") { - parse_as(field.second, filters.with_metric_fields); + bool as_value = false; + for (const auto &[field,value] : req.query_args) { + if (field == "with_metric_fields") { + parse_as(value, filters.with_metric_fields); with_metric_fields = true; - } else if (field.first == "types") { - parse_as(field.second, filters.entity_types); - } else if (field.first == "ids") { - parse_as(field.second, filters.entity_ids); - } else if (field.first == "attributes") { - parse_as(field.second, filters.entity_attrs); + } else if (field == "types") { + parse_as(value, filters.entity_types); + } else if (field == "ids") { + parse_as(value, filters.entity_ids); + } else if (field == "attributes") { + parse_as(value, filters.entity_attrs); if ((filters.entity_attrs.size() & 1) != 0) { resp.body = encode_error_as_json("the number of arguments for attributes should be even, " @@ -358,17 +360,24 @@ void metrics_http_service::get_metrics_handler(const http_request &req, http_res resp.status_code = http_status_code::kBadRequest; return; } - } else if (field.first == "metrics") { - parse_as(field.second, filters.entity_metrics); - } else if (field.first == "detail") { - if (!buf2bool(field.second, detail)) { - resp.body = encode_error_as_json("the value of detail should be a boolean value, " + } else if (field == "metrics") { + parse_as(value, filters.entity_metrics); + } else if (field == "detail") { + if (!buf2bool(value, detail)) { + resp.body = encode_error_as_json("the field `detail` should be a boolean value, " + "i.e. true or false"); + resp.status_code = http_status_code::kBadRequest; + return; + } + } else if (field == "as_value") { + if (!buf2bool(value, as_value)) { + resp.body = encode_error_as_json("the field `as_value` should be a boolean value, " "i.e. true or false"); resp.status_code = http_status_code::kBadRequest; return; } } else { - auto error_message = fmt::format("unknown field {}={}", field.first, field.second); + auto error_message = fmt::format("unknown field {}={}", field, value); resp.body = encode_error_as_json(error_message.c_str()); resp.status_code = http_status_code::kBadRequest; return; @@ -381,6 +390,19 @@ void metrics_http_service::get_metrics_handler(const http_request &req, http_res filters.with_metric_fields = kBriefMetricFields; } + if (as_value) { + int kth_count{0}; + for (const auto &kth : kAllKthPercentiles) { + if (gutil::ContainsKey(filters.with_metric_fields, kth.name)) { + ++kth_count; + } + } + + if (kth_count == 1) { + filters.as_value = true; + } + } + resp.body = take_snapshot_as_json(_registry, filters); resp.status_code = http_status_code::kOk; } diff --git a/src/utils/metrics.h b/src/utils/metrics.h index 8fe1eb1122..c1af184d99 100644 --- a/src/utils/metrics.h +++ b/src/utils/metrics.h @@ -480,7 +480,7 @@ struct metric_filters const std::unordered_set &white_list) { RETURN_MATCHED_WITH_EMPTY_WHITE_LIST(white_list); - return white_list.find(candidate) != white_list.end(); + return gutil::ContainsKey(white_list, candidate); } // According to the parameters requested by client, this function will filter metric @@ -536,6 +536,8 @@ struct metric_filters entity_attrs_type entity_attrs; entity_metrics_type entity_metrics; + + bool as_value{false}; }; inline std::string encode_as_json(std::function encoder) @@ -955,6 +957,17 @@ class metric : public ref_counter explicit metric(const metric_prototype *prototype); virtual ~metric() = default; + // Encode a metric field specified by `field_name` as json format. However, once the field + // are not chosen by `filters`, this function will do nothing. + template + static inline void encode(metric_json_writer &writer, + const std::string &field_name, + const T &value) + { + writer.Key(field_name.c_str()); + json::json_encode(writer, value); + } + // Encode a metric field specified by `field_name` as json format. However, once the field // are not chosen by `filters`, this function will do nothing. template @@ -967,8 +980,7 @@ class metric : public ref_counter return; } - writer.Key(field_name.c_str()); - json::json_encode(writer, value); + encode(writer, field_name, value); } // Encode the metric type as json format, if it is chosen by `filters`. @@ -1386,7 +1398,16 @@ class percentile : public closeable_metric continue; } - encode(writer, kAllKthPercentiles[i].name, value(i), filters); + if (!filters.match_with_metric_field(field_name)) { + continue; + } + + if (filters.as_value) { + encode(writer, kMetricSingleValueField, value(i)); + break; + } + + encode(writer, kAllKthPercentiles[i].name, value(i)); } writer.EndObject(); From b0addad7a5a86aaf6a51c186ad6dd880ae254504 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Wed, 25 Mar 2026 18:39:39 +0800 Subject: [PATCH 02/11] format --- src/utils/metrics.cpp | 4 ++-- src/utils/metrics.h | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp index 541024fc0c..a6c65cea59 100644 --- a/src/utils/metrics.cpp +++ b/src/utils/metrics.cpp @@ -256,7 +256,7 @@ std::string metric_filters::to_query_string() const COMBINE_FIELD_PAIR(ids, entity_ids); COMBINE_FIELD_PAIR(attributes, entity_attrs); COMBINE_FIELD_PAIR(metrics, entity_metrics); - fields.push_back(fmt::format("as_value={}", as_value)); + fields.push_back(fmt::format("as_value={}", as_value)); #undef COMBINE_FIELD_PAIR @@ -343,7 +343,7 @@ void metrics_http_service::get_metrics_handler(const http_request &req, http_res bool with_metric_fields = false; bool detail = false; bool as_value = false; - for (const auto &[field,value] : req.query_args) { + for (const auto &[field, value] : req.query_args) { if (field == "with_metric_fields") { parse_as(value, filters.with_metric_fields); with_metric_fields = true; diff --git a/src/utils/metrics.h b/src/utils/metrics.h index c1af184d99..a373d119d5 100644 --- a/src/utils/metrics.h +++ b/src/utils/metrics.h @@ -960,9 +960,8 @@ class metric : public ref_counter // Encode a metric field specified by `field_name` as json format. However, once the field // are not chosen by `filters`, this function will do nothing. template - static inline void encode(metric_json_writer &writer, - const std::string &field_name, - const T &value) + static inline void + encode(metric_json_writer &writer, const std::string &field_name, const T &value) { writer.Key(field_name.c_str()); json::json_encode(writer, value); @@ -1398,7 +1397,7 @@ class percentile : public closeable_metric continue; } - if (!filters.match_with_metric_field(field_name)) { + if (!filters.match_with_metric_field(kAllKthPercentiles[i].name)) { continue; } From 87c01de2e8b00274b890f88bbf064bbe227231a0 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Fri, 27 Mar 2026 14:39:24 +0800 Subject: [PATCH 03/11] aggregate stats and show them as json format --- src/shell/command_helper.h | 76 ++++++++++--- src/shell/commands/node_management.cpp | 151 +++++++++++++++++++------ 2 files changed, 179 insertions(+), 48 deletions(-) diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index 3dbd17be42..8274bec708 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -1134,8 +1134,8 @@ class total_aggregate_stats : public aggregate_stats void calc_rates(double duration_s) override { - for (auto &stat_var : _my_stat_vars) { - *stat_var.second /= duration_s; + for (auto &[_, stat_var] : _my_stat_vars) { + *stat_var /= duration_s; } } @@ -1178,13 +1178,13 @@ class table_aggregate_stats : public aggregate_stats { RETURN_NULL_STAT_VARS_IF(entity_type != _my_entity_type); - int32_t metric_table_id; + int32_t metric_table_id{-1}; RETURN_NULL_STAT_VARS_IF_NOT_OK(dsn::parse_metric_table_id(entity_attrs, metric_table_id)); // Empty `_my_partitions` means there is no restriction; otherwise, the partition id // should be found in `_my_partitions`. if (!_my_partitions.empty()) { - int32_t metric_partition_id; + int32_t metric_partition_id{-1}; RETURN_NULL_STAT_VARS_IF_NOT_OK( dsn::parse_metric_partition_id(entity_attrs, metric_partition_id)); @@ -1192,7 +1192,7 @@ class table_aggregate_stats : public aggregate_stats RETURN_NULL_STAT_VARS_IF(_my_partitions.find(metric_pid) == _my_partitions.end()); } - const auto &table_stat = _my_table_stats.find(metric_table_id); + const auto table_stat = _my_table_stats.find(metric_table_id); CHECK_TRUE(table_stat != _my_table_stats.end()); *stat_vars = &table_stat->second; @@ -1201,9 +1201,9 @@ class table_aggregate_stats : public aggregate_stats void calc_rates(double duration_s) override { - for (auto &table_stats : _my_table_stats) { - for (auto &stat_var : table_stats.second) { - *stat_var.second /= duration_s; + for (auto &[_, table_stat] : _my_table_stats) { + for (auto &[_, stat_var] : table_stat) { + *stat_var /= duration_s; } } } @@ -1239,15 +1239,15 @@ class partition_aggregate_stats : public aggregate_stats { RETURN_NULL_STAT_VARS_IF(entity_type != _my_entity_type); - int32_t metric_table_id; + int32_t metric_table_id{-1}; RETURN_NULL_STAT_VARS_IF_NOT_OK(dsn::parse_metric_table_id(entity_attrs, metric_table_id)); - int32_t metric_partition_id; + int32_t metric_partition_id{-1}; RETURN_NULL_STAT_VARS_IF_NOT_OK( dsn::parse_metric_partition_id(entity_attrs, metric_partition_id)); dsn::gpid metric_pid(metric_table_id, metric_partition_id); - const auto &partition_stat = _my_partition_stats.find(metric_pid); + const auto partition_stat = _my_partition_stats.find(metric_pid); RETURN_NULL_STAT_VARS_IF(partition_stat == _my_partition_stats.end()); *stat_vars = &partition_stat->second; @@ -1256,9 +1256,9 @@ class partition_aggregate_stats : public aggregate_stats void calc_rates(double duration_s) override { - for (auto &partition_stats : _my_partition_stats) { - for (auto &stat_var : partition_stats.second) { - *stat_var.second /= duration_s; + for (auto &[_, partition_stat] : _my_partition_stats) { + for (auto &[_, stat_var] : partition_stat) { + *stat_var /= duration_s; } } } @@ -1270,6 +1270,54 @@ class partition_aggregate_stats : public aggregate_stats partition_stat_map _my_partition_stats; }; +using profiler_stat_map = std::unordered_map; + +// Profiler-level aggregation over the fetched metrics. There are 2 dimensions for the aggregation: +// * the task name, the name of the RPC task, from the attributes of the metric entity; +// * the metric name, which is also the key of `stat_var_map`. +class profiler_aggregate_stats : public aggregate_stats +{ +public: + profiler_aggregate_stats(const std::string &entity_type, profiler_stat_map &&profiler_stats) + : _my_entity_type(entity_type), _my_profiler_stats(std::move(profiler_stats)) + { + } + + ~profiler_aggregate_stats() override = default; + +protected: + dsn::error_s get_stat_vars(const std::string &entity_type, + const dsn::metric_entity::attr_map &entity_attrs, + stat_var_map **stat_vars) override + { + RETURN_NULL_STAT_VARS_IF(entity_type != _my_entity_type); + + const auto attr = std::as_const(entity_attrs).find("task_name"); + RETURN_NULL_STAT_VARS_IF(attr == entity_attrs.end()); + + const auto profiler_stat = _my_profiler_stats.find(attr->second); + RETURN_NULL_STAT_VARS_IF(profiler_stat == _my_profiler_stats.end()); + + *stat_vars = &profiler_stat->second; + return dsn::error_s::ok(); + } + + void calc_rates(double duration_s) override + { + for (auto &[_, profiler_stat] : _my_profiler_stats) { + for (auto &[_, stat_var] : profiler_stat) { + *stat_var /= duration_s; + } + } + } + +private: + DISALLOW_COPY_AND_ASSIGN(profiler_aggregate_stats); + + const std::string _my_entity_type; + profiler_stat_map _my_profiler_stats; +}; + inline std::vector> call_remote_command(shell_context *sc, const std::vector &nodes, diff --git a/src/shell/commands/node_management.cpp b/src/shell/commands/node_management.cpp index 7826f89598..7f60c83dbd 100644 --- a/src/shell/commands/node_management.cpp +++ b/src/shell/commands/node_management.cpp @@ -22,11 +22,12 @@ #include #include #include -#include -#include #include +#include // IWYU pragma: no_include #include +#include +#include #include #include #include @@ -245,7 +246,7 @@ dsn::metric_filters server_stat_filters() { dsn::metric_filters filters; filters.with_metric_fields = {dsn::kMetricNameField, dsn::kMetricSingleValueField}; - filters.entity_types = {"server", "replica"}; + filters.entity_types = {"server", "replica", "profiler"}; filters.entity_metrics = { "virtual_mem_usage_mb", "resident_mem_usage_mb", @@ -261,6 +262,8 @@ dsn::metric_filters server_stat_filters() "rdb_block_cache_mem_usage_bytes", "rdb_manual_compact_queued_tasks", "rdb_manual_compact_running_tasks", + "profiler_executed_tasks", + "profiler_server_rpc_latency_ns", }; filters.as_value = true; return filters; @@ -302,6 +305,36 @@ struct replica_server_stats { replica_server_stats() = default; + std::string to_json_string() const + { + nlohmann::json stats; + stats["virt_mem_mb"] = virt_mem_mb; + stats["res_mem_mb"] = res_mem_mb; + stats["total_replicas"] = total_replicas; + stats["opening_replicas"] = opening_replicas; + stats["closing_replicas"] = closing_replicas; + stats["inactive_replicas"] = inactive_replicas; + stats["error_replicas"] = error_replicas; + stats["primary_replicas"] = primary_replicas; + stats["secondary_replicas"] = secondary_replicas; + stats["learning_replicas"] = learning_replicas; + stats["splitting_replicas"] = splitting_replicas; + stats["rdb_block_cache_mem_usage_bytes"] = rdb_block_cache_mem_usage_bytes; + stats["rdb_manual_compact_queued_tasks"] = rdb_manual_compact_queued_tasks; + stats["rdb_manual_compact_running_tasks"] = rdb_manual_compact_running_tasks; + stats["get_qps"] = get_qps; + stats["get_p99"] = get_p99; + stats["multi_get_qps"] = multi_get_qps; + stats["multi_get_p99"] = multi_get_p99; + stats["batch_get_qps"] = batch_get_qps; + stats["batch_get_p99"] = batch_get_p99; + stats["put_qps"] = put_qps; + stats["put_p99"] = put_p99; + stats["multi_put_qps"] = multi_put_qps; + stats["multi_put_p99"] = multi_put_p99; + return stats.dump(); + } + double virt_mem_mb{0.0}; double res_mem_mb{0.0}; @@ -319,30 +352,23 @@ struct replica_server_stats double rdb_manual_compact_queued_tasks{0.0}; double rdb_manual_compact_running_tasks{0.0}; - DEFINE_JSON_SERIALIZATION(virt_mem_mb, - res_mem_mb, - total_replicas, - opening_replicas, - closing_replicas, - inactive_replicas, - error_replicas, - primary_replicas, - secondary_replicas, - learning_replicas, - splitting_replicas, - rdb_block_cache_mem_usage_bytes, - rdb_manual_compact_queued_tasks, - rdb_manual_compact_running_tasks) + double get_qps{0.0}; + double get_p99{0.0}; + double multi_get_qps{0.0}; + double multi_get_p99{0.0}; + double batch_get_qps{0.0}; + double batch_get_p99{0.0}; + double put_qps{0.0}; + double put_p99{0.0}; + double multi_put_qps{0.0}; + double multi_put_p99{0.0}; }; -std::pair -aggregate_replica_server_stats(const node_desc &node, - const dsn::metric_query_brief_value_snapshot &query_snapshot_start, - const dsn::metric_query_brief_value_snapshot &query_snapshot_end) +std::unique_ptr +create_replica_server_stats_total_calcs(replica_server_stats &stats) { - aggregate_stats_calcs calcs; - replica_server_stats stats; - calcs.create_assignments( + auto calcs = std::make_unique(); + calcs->create_assignments( "server", stat_var_map({ {"virtual_mem_usage_mb", &stats.virt_mem_mb}, @@ -358,24 +384,81 @@ aggregate_replica_server_stats(const node_desc &node, {"splitting_replicas", &stats.splitting_replicas}, {"rdb_block_cache_mem_usage_bytes", &stats.rdb_block_cache_mem_usage_bytes}, })); - calcs.create_sums( + calcs->create_sums( "replica", stat_var_map({ {"rdb_manual_compact_queued_tasks", &stats.rdb_manual_compact_queued_tasks}, {"rdb_manual_compact_running_tasks", &stats.rdb_manual_compact_running_tasks}, })); - const auto command_result = process_parse_metrics_result( - calcs.aggregate_metrics(query_snapshot_start, query_snapshot_end), - node, - "aggregate replica server stats"); - if (!command_result) { - // Metrics failed to be aggregated. - return std::make_pair(false, command_result.description()); + return calcs; +} + +stat_var_map create_profiler_rpc_assignments(double *var) +{ + return stat_var_map({ + {"profiler_server_rpc_latency_ns", var}, + }); +} + +stat_var_map create_profiler_rpc_rates(double *var) +{ + return stat_var_map({ + {"profiler_executed_tasks", var}, + }); +} + +// Create all aggregations needed for the profiler-level stats. +std::unique_ptr +create_replica_server_stats_profiler_calcs(replica_server_stats &stats) +{ + const std::array, 5> tasks = { + {{"RPC_RRDB_RRDB_GET", &stats.get_qps, &stats.get_p99}, + {"RPC_RRDB_RRDB_MULTI_GET", &stats.multi_get_qps, &stats.multi_get_p99}, + {"RPC_RRDB_RRDB_BATCH_GET", &stats.batch_get_qps, &stats.batch_get_p99}, + {"RPC_RRDB_RRDB_PUT", &stats.put_qps, &stats.put_p99}, + {"RPC_RRDB_RRDB_MULTI_PUT", &stats.multi_put_qps, &stats.multi_put_p99}}}; + + profiler_stat_map assignments; + profiler_stat_map rates; + for (const auto &[name, qps, p99] : tasks) { + assignments.emplace(name, create_profiler_rpc_assignments(p99)); + rates.emplace(name, create_profiler_rpc_rates(qps)); } - return std::make_pair( - true, dsn::json::json_forwarder::encode(stats).to_string()); + auto calcs = std::make_unique(); + calcs->create_assignments("profiler", std::move(assignments)); + calcs->create_rates("profiler", std::move(rates)); + return calcs; +} + +std::pair +aggregate_replica_server_stats(const node_desc &node, + const dsn::metric_query_brief_value_snapshot &query_snapshot_start, + const dsn::metric_query_brief_value_snapshot &query_snapshot_end) +{ + replica_server_stats stats; + +#define AGGREGATE_SERVER_STATS(calcs, info) \ + do { \ + const auto command_result = process_parse_metrics_result( \ + calcs->aggregate_metrics(query_snapshot_start, query_snapshot_end), \ + node, \ + "aggregate total replica server stats"); \ + if (!command_result) { \ + return std::make_pair(false, command_result.description()); \ + } \ + } while (0) + + auto total_calcs = create_replica_server_stats_total_calcs(stats); + AGGREGATE_SERVER_STATS(total_calcs, "aggregate total replica server stats"); + + auto profiler_calcs = create_replica_server_stats_profiler_calcs(stats); + AGGREGATE_SERVER_STATS(profiler_calcs, "aggregate profiler replica server stats"); + +#undef AGGREGATE_SERVER_STATS + + return std::make_pair(true, stats.to_json_string()); } std::vector> get_server_stats(const std::vector &nodes, From 27900e3989e4acd5387d5af1033ddbf6bfc0f4ef Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Fri, 27 Mar 2026 15:31:13 +0800 Subject: [PATCH 04/11] fix that failed to assign rpc p99 --- src/shell/commands/node_management.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/shell/commands/node_management.cpp b/src/shell/commands/node_management.cpp index 7f60c83dbd..e84074c40a 100644 --- a/src/shell/commands/node_management.cpp +++ b/src/shell/commands/node_management.cpp @@ -245,7 +245,9 @@ dsn::metric_filters rw_requests_filters() dsn::metric_filters server_stat_filters() { dsn::metric_filters filters; - filters.with_metric_fields = {dsn::kMetricNameField, dsn::kMetricSingleValueField}; + filters.with_metric_fields = {dsn::kMetricNameField, + dsn::kMetricSingleValueField, + dsn::kth_percentile_to_name(dsn::kth_percentile_type::P99)}; filters.entity_types = {"server", "replica", "profiler"}; filters.entity_metrics = { "virtual_mem_usage_mb", From d2187d39655aff586d71016f2ea082393d9a5640 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Fri, 27 Mar 2026 15:51:18 +0800 Subject: [PATCH 05/11] optimize printing --- src/shell/commands/node_management.cpp | 56 ++++++++++++++------------ 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/src/shell/commands/node_management.cpp b/src/shell/commands/node_management.cpp index e84074c40a..8002d79780 100644 --- a/src/shell/commands/node_management.cpp +++ b/src/shell/commands/node_management.cpp @@ -309,32 +309,36 @@ struct replica_server_stats std::string to_json_string() const { - nlohmann::json stats; - stats["virt_mem_mb"] = virt_mem_mb; - stats["res_mem_mb"] = res_mem_mb; - stats["total_replicas"] = total_replicas; - stats["opening_replicas"] = opening_replicas; - stats["closing_replicas"] = closing_replicas; - stats["inactive_replicas"] = inactive_replicas; - stats["error_replicas"] = error_replicas; - stats["primary_replicas"] = primary_replicas; - stats["secondary_replicas"] = secondary_replicas; - stats["learning_replicas"] = learning_replicas; - stats["splitting_replicas"] = splitting_replicas; - stats["rdb_block_cache_mem_usage_bytes"] = rdb_block_cache_mem_usage_bytes; - stats["rdb_manual_compact_queued_tasks"] = rdb_manual_compact_queued_tasks; - stats["rdb_manual_compact_running_tasks"] = rdb_manual_compact_running_tasks; - stats["get_qps"] = get_qps; - stats["get_p99"] = get_p99; - stats["multi_get_qps"] = multi_get_qps; - stats["multi_get_p99"] = multi_get_p99; - stats["batch_get_qps"] = batch_get_qps; - stats["batch_get_p99"] = batch_get_p99; - stats["put_qps"] = put_qps; - stats["put_p99"] = put_p99; - stats["multi_put_qps"] = multi_put_qps; - stats["multi_put_p99"] = multi_put_p99; - return stats.dump(); + nlohmann::json rpc; + rpc["get_qps"] = get_qps; + rpc["get_p99(ms)"] = get_p99 / 1e6; + rpc["multi_get_qps"] = multi_get_qps; + rpc["multi_get_p99(ms)"] = multi_get_p99 / 1e6; + rpc["batch_get_qps"] = batch_get_qps; + rpc["batch_get_p99(ms)"] = batch_get_p99 / 1e6; + rpc["put_qps"] = put_qps; + rpc["put_p99(ms)"] = put_p99 / 1e6; + rpc["multi_put_qps"] = multi_put_qps; + rpc["multi_put_p99(ms)"] = multi_put_p99 / 1e6; + + nlohmann::json result; + result["virt_mem_mb"] = virt_mem_mb; + result["res_mem_mb"] = res_mem_mb; + result["total_replicas"] = total_replicas; + result["opening_replicas"] = opening_replicas; + result["closing_replicas"] = closing_replicas; + result["inactive_replicas"] = inactive_replicas; + result["error_replicas"] = error_replicas; + result["primary_replicas"] = primary_replicas; + result["secondary_replicas"] = secondary_replicas; + result["learning_replicas"] = learning_replicas; + result["splitting_replicas"] = splitting_replicas; + result["rdb_block_cache_mem_usage_bytes"] = rdb_block_cache_mem_usage_bytes; + result["rdb_manual_compact_queued_tasks"] = rdb_manual_compact_queued_tasks; + result["rdb_manual_compact_running_tasks"] = rdb_manual_compact_running_tasks; + result["rpc"] = rpc; + + return result.dump(); } double virt_mem_mb{0.0}; From 4295a3404f4ec0843e943060a7753f03626d31a3 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Fri, 27 Mar 2026 16:35:55 +0800 Subject: [PATCH 06/11] add comments --- src/shell/commands/node_management.cpp | 4 ++++ src/utils/metrics.cpp | 5 +++++ src/utils/metrics.h | 15 +++++++++++++++ 3 files changed, 24 insertions(+) diff --git a/src/shell/commands/node_management.cpp b/src/shell/commands/node_management.cpp index 8002d79780..bfcac105d9 100644 --- a/src/shell/commands/node_management.cpp +++ b/src/shell/commands/node_management.cpp @@ -307,6 +307,10 @@ struct replica_server_stats { replica_server_stats() = default; + // `DEFINE_JSON_SERIALIZATION` is not used to encode the member variables of + // `replica_server_stats` into JSON because the number of its member variables + // is very large and far exceeds the parameter limit of this macro. Increasing + // the macro's parameter limit would make the code overly verbose. std::string to_json_string() const { nlohmann::json rpc; diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp index a6c65cea59..4f27cdccf5 100644 --- a/src/utils/metrics.cpp +++ b/src/utils/metrics.cpp @@ -390,6 +390,11 @@ void metrics_http_service::get_metrics_handler(const http_request &req, http_res filters.with_metric_fields = kBriefMetricFields; } + // If the client specifies `as_value=true` in the HTTP request, it is necessary to + // check whether metrics in `with_metric_fields` that contain multiple values (such + // as percentiles) only need to return a single value to the client. If so, the + // server-side `as_value` should be set to true, so that the returned field name + // in the response is "value". if (as_value) { int kth_count{0}; for (const auto &kth : kAllKthPercentiles) { diff --git a/src/utils/metrics.h b/src/utils/metrics.h index a373d119d5..bb69d19654 100644 --- a/src/utils/metrics.h +++ b/src/utils/metrics.h @@ -537,6 +537,17 @@ struct metric_filters entity_metrics_type entity_metrics; + // When the `as_value` field is used to construct the query string in an HTTP request, + // setting it to true means that for metrics with multiple values (such as percentiles), + // if the server determines that only a single value will be returned to the client, it + // should name that field "value" instead of something like "p99". + // + // When the `as_value` field is used on the HTTP server side to process a request, true + // means that for multi-value metrics (such as percentiles), only one value will be + // returned to the client, and this single returned field will be named "value". + // + // This parameter can greatly simplify the structured processing of JSON responses + // returned by the server. bool as_value{false}; }; @@ -1401,6 +1412,10 @@ class percentile : public closeable_metric continue; } + // If `as_value` is true, then for metrics with multiple values (such as + // percentiles), only one value needs to be returned to the client. Therefore, + // its field name is set to "value" and the loop is exited (since only a single + // value is required to be returned). if (filters.as_value) { encode(writer, kMetricSingleValueField, value(i)); break; From 6c4ed71d95781d636ad0c3b18f1987d0ee309246 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Fri, 27 Mar 2026 16:47:51 +0800 Subject: [PATCH 07/11] fix comments --- src/utils/metrics.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/utils/metrics.h b/src/utils/metrics.h index bb69d19654..8a80be9fbc 100644 --- a/src/utils/metrics.h +++ b/src/utils/metrics.h @@ -968,8 +968,7 @@ class metric : public ref_counter explicit metric(const metric_prototype *prototype); virtual ~metric() = default; - // Encode a metric field specified by `field_name` as json format. However, once the field - // are not chosen by `filters`, this function will do nothing. + // Encode a metric field specified by `field_name` with `value` as json format. template static inline void encode(metric_json_writer &writer, const std::string &field_name, const T &value) @@ -978,8 +977,8 @@ class metric : public ref_counter json::json_encode(writer, value); } - // Encode a metric field specified by `field_name` as json format. However, once the field - // are not chosen by `filters`, this function will do nothing. + // Encode a metric field specified by `field_name` with `value` as json format. However, + // once the field are not chosen by `filters`, this function will do nothing. template static inline void encode(metric_json_writer &writer, const std::string &field_name, From f8d9d3404c628aea324e7ad20f01a7f756534e17 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Wed, 1 Apr 2026 16:38:23 +0800 Subject: [PATCH 08/11] fix IWYU --- src/shell/command_helper.h | 10 +++++++--- src/utils/metrics.cpp | 2 +- src/utils/metrics.h | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index 8274bec708..a0c464d311 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -1141,6 +1141,7 @@ class total_aggregate_stats : public aggregate_stats private: DISALLOW_COPY_AND_ASSIGN(total_aggregate_stats); + DISALLOW_MOVE_AND_ASSIGN(total_aggregate_stats); const std::string _my_entity_type; stat_var_map _my_stat_vars; @@ -1165,7 +1166,7 @@ class table_aggregate_stats : public aggregate_stats const std::unordered_set &partitions) : _my_entity_type(entity_type), _my_table_stats(std::move(table_stats)), - _my_partitions(std::move(partitions)) + _my_partitions(partitions) { } @@ -1210,6 +1211,7 @@ class table_aggregate_stats : public aggregate_stats private: DISALLOW_COPY_AND_ASSIGN(table_aggregate_stats); + DISALLOW_MOVE_AND_ASSIGN(table_aggregate_stats); const std::string _my_entity_type; table_stat_map _my_table_stats; @@ -1265,6 +1267,7 @@ class partition_aggregate_stats : public aggregate_stats private: DISALLOW_COPY_AND_ASSIGN(partition_aggregate_stats); + DISALLOW_MOVE_AND_ASSIGN(partition_aggregate_stats); const std::string _my_entity_type; partition_stat_map _my_partition_stats; @@ -1278,8 +1281,8 @@ using profiler_stat_map = std::unordered_map; class profiler_aggregate_stats : public aggregate_stats { public: - profiler_aggregate_stats(const std::string &entity_type, profiler_stat_map &&profiler_stats) - : _my_entity_type(entity_type), _my_profiler_stats(std::move(profiler_stats)) + profiler_aggregate_stats(std::string entity_type, profiler_stat_map &&profiler_stats) + : _my_entity_type(std::move(entity_type)), _my_profiler_stats(std::move(profiler_stats)) { } @@ -1313,6 +1316,7 @@ class profiler_aggregate_stats : public aggregate_stats private: DISALLOW_COPY_AND_ASSIGN(profiler_aggregate_stats); + DISALLOW_MOVE_AND_ASSIGN(profiler_aggregate_stats); const std::string _my_entity_type; profiler_stat_map _my_profiler_stats; diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp index 4f27cdccf5..b0ead1613f 100644 --- a/src/utils/metrics.cpp +++ b/src/utils/metrics.cpp @@ -353,7 +353,7 @@ void metrics_http_service::get_metrics_handler(const http_request &req, http_res parse_as(value, filters.entity_ids); } else if (field == "attributes") { parse_as(value, filters.entity_attrs); - if ((filters.entity_attrs.size() & 1) != 0) { + if ((filters.entity_attrs.size() & 1U) != 0) { resp.body = encode_error_as_json("the number of arguments for attributes should be even, " "since each attribute name always pairs with a value"); diff --git a/src/utils/metrics.h b/src/utils/metrics.h index 8a80be9fbc..5856a5fa41 100644 --- a/src/utils/metrics.h +++ b/src/utils/metrics.h @@ -966,7 +966,7 @@ class metric : public ref_counter protected: explicit metric(const metric_prototype *prototype); - virtual ~metric() = default; + ~metric() override = default; // Encode a metric field specified by `field_name` with `value` as json format. template From 4dfae255cc5460f74a4b8aadf818e823fbec8d0f Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Wed, 1 Apr 2026 18:59:58 +0800 Subject: [PATCH 09/11] fix clang-tidy, IWYU and tests --- src/shell/commands/node_management.cpp | 3 ++- src/utils/metrics.cpp | 6 +++++- src/utils/metrics.h | 2 +- src/utils/test/metrics_test.cpp | 30 +++++++++++++++++++++++--- 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/shell/commands/node_management.cpp b/src/shell/commands/node_management.cpp index bfcac105d9..903b9d2526 100644 --- a/src/shell/commands/node_management.cpp +++ b/src/shell/commands/node_management.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -311,7 +312,7 @@ struct replica_server_stats // `replica_server_stats` into JSON because the number of its member variables // is very large and far exceeds the parameter limit of this macro. Increasing // the macro's parameter limit would make the code overly verbose. - std::string to_json_string() const + [[nodiscard]] std::string to_json_string() const { nlohmann::json rpc; rpc["get_qps"] = get_qps; diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp index b0ead1613f..83cddb4993 100644 --- a/src/utils/metrics.cpp +++ b/src/utils/metrics.cpp @@ -256,7 +256,11 @@ std::string metric_filters::to_query_string() const COMBINE_FIELD_PAIR(ids, entity_ids); COMBINE_FIELD_PAIR(attributes, entity_attrs); COMBINE_FIELD_PAIR(metrics, entity_metrics); - fields.push_back(fmt::format("as_value={}", as_value)); + + // Only when `as_value` is true should it be inserted as a field into the query string. + if (as_value) { + fields.push_back("as_value=true"); + } #undef COMBINE_FIELD_PAIR diff --git a/src/utils/metrics.h b/src/utils/metrics.h index 5856a5fa41..c2014c4d12 100644 --- a/src/utils/metrics.h +++ b/src/utils/metrics.h @@ -497,7 +497,7 @@ struct metric_filters // The size of container must be divisible by 2, since attribute name always pairs // with value in it. - CHECK_EQ(entity_attrs.size() & 1, 0); + CHECK_EQ(entity_attrs.size() & 1U, 0); for (entity_attrs_type::size_type i = 0; i < entity_attrs.size(); i += 2) { const auto &iter = candidates.find(entity_attrs[i]); diff --git a/src/utils/test/metrics_test.cpp b/src/utils/test/metrics_test.cpp index a9047b50f3..61e26cb4e9 100644 --- a/src/utils/test/metrics_test.cpp +++ b/src/utils/test/metrics_test.cpp @@ -2660,6 +2660,8 @@ TEST(metrics_test, http_get_metrics) // - request percentile for default detail // - request percentile while detail=false // - request percentile while detail=true + // - request percentile with all percentile types while as_value=true + // - request percentile with only one percentile type while as_value=true struct test_case { std::string request_string; @@ -2859,6 +2861,14 @@ TEST(metrics_test, http_get_metrics) http_status_code::kOk, {{"server_116", {"test_server_percentile_int64"}}}, percentile_metric_fields}, + {REQUEST_STRING(GET, "ids=server_116&as_value=true"), + http_status_code::kOk, + {{"server_116", {"test_server_percentile_int64"}}}, + {kMetricNameField, "p95", "p99"}}, + {REQUEST_STRING(GET, "ids=server_116&with_metric_fields=name,p99&as_value=true"), + http_status_code::kOk, + {{"server_116", {"test_server_percentile_int64"}}}, + {kMetricNameField, kMetricSingleValueField}}, }; #undef REQUEST_STRING @@ -2889,6 +2899,7 @@ struct metric_filters_query_string_case metric_filters::entity_ids_type entity_ids; metric_filters::entity_attrs_type entity_attrs; metric_filters::entity_metrics_type entity_metrics; + bool as_value; size_t expected_fields; }; @@ -2898,16 +2909,23 @@ class MetricFiltersQueryStringTest : public testing::TestWithParam metric_filters_query_string_tests = { // Empty query string. - {{}, {}, {}, {}, {}, 0}, + {{}, {}, {}, {}, {}, false, 0}, // Some fields were missing in the query string. - {{"name", "value"}, {"replica"}, {}, {}, {"rdb_total_sst_files", "rdb_total_sst_size_mb"}, 3}, + {{"name", "value"}, + {"replica"}, + {}, + {}, + {"rdb_total_sst_files", "rdb_total_sst_size_mb"}, + false, + 3}, // All fields were present. {{"name", "value"}, {"replica"}, {"replica5.2"}, {"table_id", "partition_id"}, {"rdb_total_sst_files", "rdb_total_sst_size_mb"}, - 5}, + true, + 6}, }; TEST_P(MetricFiltersQueryStringTest, BuildQueryString) @@ -2924,6 +2942,7 @@ TEST_P(MetricFiltersQueryStringTest, BuildQueryString) COPY_CONTAINER(entity_ids); COPY_CONTAINER(entity_attrs); COPY_CONTAINER(entity_metrics); + COPY_CONTAINER(as_value); #undef COPY_CONTAINER @@ -2969,6 +2988,11 @@ TEST_P(MetricFiltersQueryStringTest, BuildQueryString) CHECK_FIELD(attributes, metric_filters::entity_attrs_type, entity_attrs); CHECK_FIELD(metrics, metric_filters::entity_metrics_type, entity_metrics); + if (query_string_case.as_value) { + ASSERT_LT(i, query_string_case.expected_fields); + ASSERT_STREQ("as_value=true", fields[i++].c_str()); + } + #undef CHECK_FIELD // All of the fields should have been checked. From 2926c9b17d131e605a6a16427dc64efd5f268fce Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Thu, 2 Apr 2026 11:27:40 +0800 Subject: [PATCH 10/11] fix clang-tidy --- src/shell/command_helper.h | 16 ++++++++-------- src/utils/metrics.cpp | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index a0c464d311..06c10e8c26 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -1116,8 +1116,8 @@ class aggregate_stats_calcs class total_aggregate_stats : public aggregate_stats { public: - total_aggregate_stats(const std::string &entity_type, stat_var_map &&stat_vars) - : _my_entity_type(entity_type), _my_stat_vars(std::move(stat_vars)) + total_aggregate_stats(std::string entity_type, stat_var_map &&stat_vars) + : _my_entity_type(std::move(entity_type)), _my_stat_vars(std::move(stat_vars)) { } @@ -1161,12 +1161,12 @@ using table_stat_map = std::unordered_map; class table_aggregate_stats : public aggregate_stats { public: - table_aggregate_stats(const std::string &entity_type, + table_aggregate_stats(std::string entity_type, table_stat_map &&table_stats, - const std::unordered_set &partitions) - : _my_entity_type(entity_type), + std::unordered_set partitions) + : _my_entity_type(std::move(entity_type)), _my_table_stats(std::move(table_stats)), - _my_partitions(partitions) + _my_partitions(std::move(partitions)) { } @@ -1227,8 +1227,8 @@ using partition_stat_map = std::unordered_map; class partition_aggregate_stats : public aggregate_stats { public: - partition_aggregate_stats(const std::string &entity_type, partition_stat_map &&partition_stats) - : _my_entity_type(entity_type), _my_partition_stats(std::move(partition_stats)) + partition_aggregate_stats(std::string entity_type, partition_stat_map &&partition_stats) + : _my_entity_type(std::move(entity_type)), _my_partition_stats(std::move(partition_stats)) { } diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp index 83cddb4993..eccb8b3fe3 100644 --- a/src/utils/metrics.cpp +++ b/src/utils/metrics.cpp @@ -259,7 +259,7 @@ std::string metric_filters::to_query_string() const // Only when `as_value` is true should it be inserted as a field into the query string. if (as_value) { - fields.push_back("as_value=true"); + fields.emplace_back("as_value=true"); } #undef COMBINE_FIELD_PAIR From 6c3c0d6cf288a7b4ca8da9ab14516876a19f3938 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Thu, 2 Apr 2026 16:46:07 +0800 Subject: [PATCH 11/11] optimize parsing as_value --- src/utils/metrics.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/utils/metrics.cpp b/src/utils/metrics.cpp index eccb8b3fe3..ea216fd65d 100644 --- a/src/utils/metrics.cpp +++ b/src/utils/metrics.cpp @@ -403,7 +403,9 @@ void metrics_http_service::get_metrics_handler(const http_request &req, http_res int kth_count{0}; for (const auto &kth : kAllKthPercentiles) { if (gutil::ContainsKey(filters.with_metric_fields, kth.name)) { - ++kth_count; + if (++kth_count > 1) { + break; + } } }