From 749c7706762025163d37481eb7316e2a79035344 Mon Sep 17 00:00:00 2001
From: Leonid Usov <leonid.usov@ibm.com>
Date: Thu, 27 Jul 2023 17:48:20 +0300
Subject: [PATCH] mds/Server: mark a cap acquisition throttle event in the
 request

Fixes: https://tracker.ceph.com/issues/59067
Signed-off-by: Leonid Usov <leonid.usov@ibm.com>
---
 qa/tasks/cephfs/test_client_limits.py |  47 ++++--
 src/mds/Server.cc                     |   2 +
 src/test/libcephfs/snapdiff.cc        | 219 +++++++++++++++++++-------
 3 files changed, 198 insertions(+), 70 deletions(-)

diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py
index 836f81af1642..f2d63920564f 100644
--- a/qa/tasks/cephfs/test_client_limits.py
+++ b/qa/tasks/cephfs/test_client_limits.py
@@ -10,6 +10,7 @@ from tasks.ceph_test_case import TestTimeoutError
 from tasks.cephfs.cephfs_test_case import CephFSTestCase, needs_trimming
 from tasks.cephfs.fuse_mount import FuseMount
 import os
+from io import StringIO
 
 
 log = logging.getLogger(__name__)
@@ -159,29 +160,49 @@ class TestClientLimits(CephFSTestCase):
         a fraction of second (0.5) by default when throttling condition is met.
         """
 
-        max_caps_per_client = 500
-        cap_acquisition_throttle = 250
+        subdir_count = 4
+        files_per_dir = 25
 
-        self.config_set('mds', 'mds_max_caps_per_client', max_caps_per_client)
-        self.config_set('mds', 'mds_session_cap_acquisition_throttle', cap_acquisition_throttle)
+        # throttle in a way so that two dir reads are already hitting it.
+        throttle_value = (files_per_dir * 3) // 2
 
-        # Create 1500 files split across 6 directories, 250 each.
-        for i in range(1, 7):
-            self.mount_a.create_n_files("dir{0}/file".format(i), cap_acquisition_throttle, sync=True)
+        # activate throttling logic by setting max per client to a low value
+        self.config_set('mds', 'mds_max_caps_per_client', 1)
+        self.config_set('mds', 'mds_session_cap_acquisition_throttle', throttle_value)
 
-        mount_a_client_id = self.mount_a.get_global_id()
+        # Create files split across {subdir_count} directories, {per_dir_count} in each dir
+        for i in range(1, subdir_count+1):
+            self.mount_a.create_n_files("dir{0}/file".format(i), files_per_dir, sync=True)
 
-        # recursive readdir
-        self.mount_a.run_shell_payload("find | wc")
+        mount_a_client_id = self.mount_a.get_global_id()
 
-        # validate cap_acquisition decay counter after readdir to exceed throttle count i.e 250
-        cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value']
-        self.assertGreaterEqual(cap_acquisition_value, cap_acquisition_throttle)
+        # recursive readdir. macOs wants an explicit directory for `find`.
+        proc = self.mount_a.run_shell_payload("find . | wc", stderr=StringIO())
+        # return code may be None if the command got interrupted
+        self.assertTrue(proc.returncode is None or proc.returncode == 0, proc.stderr.getvalue())
 
         # validate the throttle condition to be hit atleast once
         cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle']
         self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1)
 
+        # validate cap_acquisition decay counter after readdir to NOT exceed the throttle value
+        # plus one batch that could have been taken immediately before querying
+        # assuming the batch is equal to the per dir file count.
+        cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value']
+        self.assertLessEqual(cap_acquisition_value, files_per_dir + throttle_value)
+
+        # make sure that the throttle was reported in the events
+        def historic_ops_have_event(expected_event):
+            ops_dump = self.fs.rank_tell(['dump_historic_ops'])
+            # reverse the events and the ops assuming that later ops would be throttled
+            for op in reversed(ops_dump['ops']):
+                for ev in reversed(op.get('type_data', {}).get('events', [])):
+                    if ev['event'] == expected_event:
+                        return True
+            return False
+
+        self.assertTrue(historic_ops_have_event('cap_acquisition_throttle'))
+
     def test_client_release_bug(self):
         """
         When a client has a bug (which we will simulate) preventing it from releasing caps,
diff --git a/src/mds/Server.cc b/src/mds/Server.cc
index 7ed27c14b10d..5003acaa9d32 100644
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -4833,6 +4833,7 @@ void Server::handle_client_readdir(MDRequestRef& mdr)
       if (logger)
           logger->inc(l_mdss_cap_acquisition_throttle);
 
+      mdr->mark_event("cap_acquisition_throttle");
       mds->timer.add_event_after(caps_throttle_retry_request_timeout, new C_MDS_RetryRequest(mdcache, mdr));
       return;
   }
@@ -11515,6 +11516,7 @@ void Server::handle_client_readdir_snapdiff(MDRequestRef& mdr)
     if (logger)
       logger->inc(l_mdss_cap_acquisition_throttle);
 
+    mdr->mark_event("cap_acquisition_throttle");
     mds->timer.add_event_after(caps_throttle_retry_request_timeout, new C_MDS_RetryRequest(mdcache, mdr));
     return;
   }
diff --git a/src/test/libcephfs/snapdiff.cc b/src/test/libcephfs/snapdiff.cc
index 2320bf58b6c2..641505653691 100644
--- a/src/test/libcephfs/snapdiff.cc
+++ b/src/test/libcephfs/snapdiff.cc
@@ -17,6 +17,10 @@
 #include "include/ceph_assert.h"
 #include "include/object.h"
 #include "include/stringify.h"
+#include "common/ceph_context.h"
+#include "common/config_proxy.h"
+#include "json_spirit/json_spirit.h"
+#include "boost/format/alt_sstream.hpp"
 #include <errno.h>
 #include <fcntl.h>
 #include <unistd.h>
@@ -27,6 +31,7 @@
 #include <algorithm>
 #include <limits.h>
 #include <dirent.h>
+#include <optional>
 
 using namespace std;
 class TestMount {
@@ -56,7 +61,56 @@ public:
     return ceph_conf_get(cmount, option, buf, len);
   }
 
-  string make_file_path(const char* relpath) {
+  json_spirit::mValue tell_rank0(const std::string& prefix, cmdmap_t&& cmdmap = {}) {
+    cmdmap["prefix"] = prefix;
+    cmdmap["format"] = std::string("json");
+
+    JSONFormatter jf;
+    jf.open_object_section("");
+    ceph::common::cmdmap_dump(cmdmap, &jf);
+    jf.close_section();
+
+    boost::io::basic_oaltstringstream<char> oss;
+    jf.flush(oss);
+
+    const char *cmdv[] = {oss.begin()};
+
+    char *outb, *outs;
+    size_t outb_len, outs_len;
+    int status = ceph_mds_command(cmount, "0", cmdv, sizeof(cmdv)/sizeof(cmdv[0]), nullptr, 0, &outb, &outb_len, &outs, &outs_len);
+    if (status < 0)
+    {
+      outs[outs_len] = 0;
+      std::cout << "couldn't tell rank 0 '" << oss.begin() << "'\n" << strerror(-status) << ": " << outs << std::endl;
+      return json_spirit::mValue::null;
+    }
+
+    json_spirit::mValue dump;
+    if (!json_spirit::read(outb, dump))
+    {
+      std::cout << "couldn't parse '" << prefix << "'response json" << std::endl;
+      return json_spirit::mValue::null;
+    }
+    return dump;
+  }
+
+  bool tell_rank0_config(const std::string &var, const std::optional<const std::string> val = {}) {
+    cmdmap_t cmdmap;
+    std::string prefix;
+    cmdmap["var"] = var;
+
+    if (val.has_value()) {
+      cmdmap["val"] = std::vector{val.value()};
+      prefix = "config set";
+    }
+    else {
+      prefix = "config unset";
+    }
+
+    return !tell_rank0(prefix, std::move(cmdmap)).is_null();
+  }
+
+  string make_file_path(const char *relpath) {
     char path[PATH_MAX];
     sprintf(path, "%s/%s", dir_path, relpath);
     return path;
@@ -369,7 +423,6 @@ void TestMount::print_snap_diff(const char* relpath,
     }));
 };
 
-
 /* The following method creates some files/folders/snapshots layout,
    described in the sheet below.
    We're to test SnapDiff readdir API against that structure.
@@ -497,71 +550,123 @@ TEST(LibCephFS, SnapDiffLib)
     test_mount.verify_snap_diff(expected, "", "snap1", "snap2");
   }
 
-  //
-  // Make sure snap1 vs. snap2 delta for /dirA is as expected
-  //
+  json_spirit::mValue dump;
   {
-    vector<pair<string, uint64_t>> expected;
-    expected.emplace_back("fileA", snapid2);
-    test_mount.verify_snap_diff(expected, "dirA", "snap1", "snap2");
-  }
+    struct Cleanup
+    {
+      TestMount &test_mount;
+      ~Cleanup()
+      {
+        // make sure to restore the default settings before leaving this block
+        test_mount.tell_rank0_config("mds_max_caps_per_client");
+        test_mount.tell_rank0_config("mds_session_cap_acquisition_throttle");
+        test_mount.tell_rank0_config("mds_session_cap_acquisition_decay_rate");
+        test_mount.tell_rank0_config("mds_op_history_size");
+        test_mount.tell_rank0_config("mds_op_history_duration");
+      }
+    } cleanup {test_mount};
+    // the following commands will be run with cap_acquisition_throttle triggered
+    // to verify that such event is logged on the operations
+    ASSERT_TRUE(test_mount.tell_rank0_config("mds_max_caps_per_client", "1"));
+    ASSERT_TRUE(test_mount.tell_rank0_config("mds_session_cap_acquisition_throttle", "1"));
+    ASSERT_TRUE(test_mount.tell_rank0_config("mds_session_cap_acquisition_decay_rate", "1"));
+    ASSERT_TRUE(test_mount.tell_rank0_config("mds_op_history_size", "100"));
+    ASSERT_TRUE(test_mount.tell_rank0_config("mds_op_history_duration", "600"));
 
-  //
-  // Make sure snap1 vs. snap2 delta for /dirB is as expected
-  //
-  {
-    vector<pair<string, uint64_t>> expected;
-    expected.emplace_back("fileb", snapid2);
-    test_mount.verify_snap_diff(expected, "dirB", "snap1", "snap2");
-  }
+    //
+    // Make sure snap1 vs. snap2 delta for /dirA is as expected
+    //
+    {
+      vector<pair<string, uint64_t>> expected;
+      expected.emplace_back("fileA", snapid2);
+      test_mount.verify_snap_diff(expected, "dirA", "snap1", "snap2");
+    }
 
-  //
-  // Make sure snap1 vs. snap2 delta for /dirC is as expected
-  //
-  {
-    vector<pair<string, uint64_t>> expected;
-    expected.emplace_back("filec", snapid1);
-    test_mount.verify_snap_diff(expected, "dirC", "snap2", "snap1");
-  }
+    //
+    // Make sure snap1 vs. snap2 delta for /dirB is as expected
+    //
+    {
+      vector<pair<string, uint64_t>> expected;
+      expected.emplace_back("fileb", snapid2);
+      test_mount.verify_snap_diff(expected, "dirB", "snap1", "snap2");
+    }
 
-  //
-  // Make sure snap1 vs. snap2 delta for /dirD is as expected
-  //
-  {
-    vector<pair<string, uint64_t>> expected;
-    test_mount.verify_snap_diff(expected, "dirD", "snap1", "snap2");
+    //
+    // Make sure snap1 vs. snap2 delta for /dirC is as expected
+    //
+    {
+      vector<pair<string, uint64_t>> expected;
+      expected.emplace_back("filec", snapid1);
+      test_mount.verify_snap_diff(expected, "dirC", "snap2", "snap1");
+    }
+
+    //
+    // Make sure snap1 vs. snap2 delta for /dirD is as expected
+    //
+    {
+      vector<pair<string, uint64_t>> expected;
+      test_mount.verify_snap_diff(expected, "dirD", "snap1", "snap2");
+    }
+
+    // Make sure SnapDiff returns an error when provided with the same
+    // snapshot name for both parties A and B.
+    {
+      string snap_path = test_mount.make_snap_path("snap2");
+      string snap_other_path = snap_path;
+      std::cout << "---------invalid snapdiff params, the same snaps---------" << std::endl;
+      ASSERT_EQ(-EINVAL, test_mount.for_each_readdir_snapdiff(
+        "",
+        "snap2",
+        "snap2",
+        [&](const dirent* dire, uint64_t snapid) {
+          return true;
+        }));
+    }
+    // Make sure SnapDiff returns an error when provided with an empty
+    // snapshot name for one of the parties
+    {
+      std::cout << "---------invalid snapdiff params, no snap_other ---------" << std::endl;
+      string snap_path = test_mount.make_snap_path("snap2");
+      string snap_other_path;
+      ASSERT_EQ(-EINVAL, test_mount.for_each_readdir_snapdiff(
+        "",
+        "snap2",
+        "",
+        [&](const dirent* dire, uint64_t snapid) {
+          return true;
+        }));
+    }
+
+    // do this before the scope ends and cleanup is run
+    dump = test_mount.tell_rank0("dump_historic_ops");
   }
 
-  // Make sure SnapDiff returns an error when provided with the same
-  // snapshot name for both parties A and B.
-  {
-    string snap_path = test_mount.make_snap_path("snap2");
-    string snap_other_path = snap_path;
-    std::cout << "---------invalid snapdiff params, the same snaps---------" << std::endl;
-    ASSERT_EQ(-EINVAL, test_mount.for_each_readdir_snapdiff(
-      "",
-      "snap2",
-      "snap2",
-      [&](const dirent* dire, uint64_t snapid) {
-        return true;
-      }));
+  ASSERT_FALSE(dump.is_null());
+  bool seen_cap_throttle_in_recent_op_events = false;
+  try {
+    for (const auto& op: dump.get_obj().at("ops").get_array()) {
+      for (const auto& ev: op.get_obj().at("type_data").get_obj().at("events").get_array()) {
+        if (ev.get_obj().at("event") == "cap_acquisition_throttle") {
+	  seen_cap_throttle_in_recent_op_events = true;
+	  goto done;
+	}
+      }
+    }
+    done:;
   }
-  // Make sure SnapDiff returns an error when provided with an empty
-  // snapshot name for one of the parties
-  {
-    std::cout << "---------invalid snapdiff params, no snap_other ---------" << std::endl;
-    string snap_path = test_mount.make_snap_path("snap2");
-    string snap_other_path;
-    ASSERT_EQ(-EINVAL, test_mount.for_each_readdir_snapdiff(
-      "",
-      "snap2",
-      "",
-      [&](const dirent* dire, uint64_t snapid) {
-        return true;
-      }));
+  catch (const std::runtime_error &e) {
+    std::cout << "error while parsing dump_historic_ops: " << e.what() << std::endl;
+  }
+
+  if (!seen_cap_throttle_in_recent_op_events) {
+    std::cout << "couldn't find 'cap_acquisition_throttle' event in:" << std::endl;
+    json_spirit::write(dump, std::cout, json_spirit::pretty_print);
   }
 
+  ASSERT_TRUE(seen_cap_throttle_in_recent_op_events);
+
   std::cout << "------------- closing -------------" << std::endl;
+
   ASSERT_EQ(0, test_mount.purge_dir(""));
   ASSERT_EQ(0, test_mount.rmsnap("snap1"));
   ASSERT_EQ(0, test_mount.rmsnap("snap2"));
-- 
2.47.3