From: Patrick Donnelly Date: Fri, 27 Jun 2025 18:38:17 +0000 (-0400) Subject: mds: allow disabling batch ops X-Git-Tag: testing/wip-vshankar-testing-20250721.082855-tentacle-debug~2^2~2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=dcc9aa2414aab9a50f58231f83d574af33b6e83c;p=ceph-ci.git mds: allow disabling batch ops To address a bug and future ones where batching lookup/getattr does not help "kick" the MDS in switching state more quickly (e.g. flushing the MDS journal). Signed-off-by: Patrick Donnelly (cherry picked from commit 0201f86e6939a3d787bea755a48cb4b4254d2f9c) --- diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in index 06c12ea671d..80be6be4d6d 100644 --- a/src/common/options/mds.yaml.in +++ b/src/common/options/mds.yaml.in @@ -793,6 +793,21 @@ options: services: - mds with_legacy: true +- name: mds_allow_batched_ops + type: bool + level: advanced + desc: allow MDS to batch lookup/getattr RPCs + long_desc: > + The MDS will batch a lookup or getattr RPC on the same inode when + possible to avoid repetitive locks on metadata and to bypass other + requests acquiring write locks. Generally, this should only + improve performance but this switch exists to provide a means to + turn this behavior off for comparison. + default: true + services: + - mds + flags: + - runtime # multiple of size_max that triggers immediate split - name: mds_bal_fragment_fast_factor type: float diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 6f4adb81be4..59f695a8c04 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -4070,6 +4070,7 @@ std::vector MDSRankDispatcher::get_tracked_keys() "fsid", "host", "mds_allow_async_dirops", + "mds_allow_batched_ops", "mds_alternate_name_max", "mds_bal_export_pin", "mds_bal_fragment_dirs", diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 6586defce91..bf353f37122 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -280,6 +280,7 @@ Server::Server(MDSRank *m, MetricsHandler *metrics_handler) : { forward_all_requests_to_auth = g_conf().get_val("mds_forward_all_requests_to_auth"); replay_unsafe_with_closed_session = g_conf().get_val("mds_replay_unsafe_with_closed_session"); + allow_batched_ops = g_conf().get_val("mds_allow_batched_ops"); cap_revoke_eviction_timeout = g_conf().get_val("mds_cap_revoke_eviction_timeout"); max_snaps_per_dir = g_conf().get_val("mds_max_snaps_per_dir"); delegate_inos_pct = g_conf().get_val("mds_client_delegate_inos_pct"); @@ -1378,6 +1379,9 @@ void Server::handle_conf_change(const std::set& changed) { if (changed.count("mds_forward_all_requests_to_auth")){ forward_all_requests_to_auth = g_conf().get_val("mds_forward_all_requests_to_auth"); } + if (changed.count("mds_allow_batched_ops")) { + allow_batched_ops = g_conf().get_val("mds_allow_batched_ops"); + } if (changed.count("mds_cap_revoke_eviction_timeout")) { cap_revoke_eviction_timeout = g_conf().get_val("mds_cap_revoke_eviction_timeout"); dout(20) << __func__ << " cap revoke eviction timeout changed to " @@ -4185,7 +4189,7 @@ void Server::handle_client_getattr(const MDRequestRef& mdr, bool is_lookup) if (mask & CEPH_STAT_RSTAT) want_auth = true; // set want_auth for CEPH_STAT_RSTAT mask - if (!mdr->is_batch_head() && mdr->can_batch()) { + if (!mdr->is_batch_head() && allow_batched_ops && mdr->can_batch()) { CF_MDS_RetryRequestFactory cf(mdcache, mdr, false); int r = mdcache->path_traverse(mdr, cf, mdr->get_filepath(), (want_auth ? MDS_TRAVERSE_WANT_AUTH : 0), diff --git a/src/mds/Server.h b/src/mds/Server.h index 87a2ac9bb05..f994514bbce 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -603,6 +603,7 @@ private: unsigned delegate_inos_pct = 0; uint64_t dir_max_entries = 0; int64_t bal_fragment_size_max = 0; + bool allow_batched_ops = true; double inject_rename_corrupt_dentry_first = 0.0;