From 4de1a3f474e1c0d9fb66863ea58926861e0a4034 Mon Sep 17 00:00:00 2001
From: Patrick Donnelly <pdonnell@redhat.com>
Date: Tue, 22 Jan 2019 18:26:38 -0800
Subject: [PATCH] mds: remove cache drop asok command

`cache drop` is a long running command that will block the asok interface
(while the tell version does not). Attempting to abort the command with ^C or
equivalents will simply cause the `ceph` command to exit but won't stop the
asok command handler from waiting for the cache drop operation to complete.

Instead, just allow the tell version.

Fixes: http://tracker.ceph.com/issues/38020
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
(cherry picked from commit 7fa1e3c37f8c7fb709ae9e070a3154a7084c0584)

Conflicts:
	PendingReleaseNotes
	qa/tasks/cephfs/test_misc.py
	src/mds/MDSDaemon.cc
---
 PendingReleaseNotes          | 84 ++----------------------------------
 qa/tasks/cephfs/test_misc.py | 39 +++++------------
 src/mds/MDSDaemon.cc         |  5 ---
 src/mds/MDSRank.cc           | 12 ------
 4 files changed, 14 insertions(+), 126 deletions(-)

diff --git a/PendingReleaseNotes b/PendingReleaseNotes
index 564f4eabea124..b181526b38c1d 100644
--- a/PendingReleaseNotes
+++ b/PendingReleaseNotes
@@ -1,83 +1,5 @@
->=13.2.0
-========
-
-(some or all of the notes in this section seem to have been omitted
-from the 13.2.0 release notes (?), so include them in 13.2.1 release
-notes (?))
-
-* The Telegraf module for the Manager allows for sending statistics to
-  an Telegraf Agent over TCP, UDP or a UNIX Socket. Telegraf can then
-  send the statistics to databases like InfluxDB, ElasticSearch, Graphite
-  and many more.
-
-* The graylog fields naming the originator of a log event have
-  changed: the string-form name is now included (e.g., ``"name":
-  "mgr.foo"``), and the rank-form name is now in a nested section
-  (e.g., ``"rank": {"type": "mgr", "num": 43243}``).
-
-* If the cluster log is directed at syslog, the entries are now
-  prefixed by both the string-form name and the rank-form name (e.g.,
-  ``mgr.x mgr.12345 ...`` instead of just ``mgr.12345 ...``).
-
-* The JSON output of the ``osd find`` command has replaced the ``ip``
-  field with an ``addrs`` section to reflect that OSDs may bind to
-  multiple addresses.
-
-13.2.1
-======
-
-* CephFS clients without the 's' flag in their authentication capability
-  string will no longer be able to create/delete snapshots. To allow
-  ``client.foo`` to create/delete snapshots in the ``bar`` directory of
-  filesystem ``cephfs_a``, use command:
-
-    - ``ceph auth caps client.foo mon 'allow r' osd 'allow rw tag cephfs data=cephfs_a' mds 'allow rw, allow rws path=/bar'``
-
-13.2.3
-======
-
-* The default memory utilization for the mons has been increased
-  somewhat.  Rocksdb now uses 512 MB of RAM by default, which should
-  be sufficient for small to medium-sized clusters; large clusters
-  should tune this up.  Also, the ``mon_osd_cache_size`` has been
-  increase from 10 OSDMaps to 500, which will translate to an
-  additional 500 MB to 1 GB of RAM for large clusters, and much less
-  for small clusters.
-
-* Ceph v13.2.2 includes a wrong backport, which may cause mds to go into
-  'damaged' state when upgrading Ceph cluster from previous version.
-  The bug is fixed in v13.2.3. For ceph v13.2.2 installation, upgrading
-  to v13.2.3 does not requires special action.
-
-* The bluestore_cache_* options are no longer needed. They are replaced
-  by osd_memory_target, defaulting to 4GB. BlueStore will expand
-  and contract its cache to attempt to stay within this
-  limit. Users upgrading should note this is a higher default
-  than the previous bluestore_cache_size of 1GB, so OSDs using
-  BlueStore will use more memory by default.
-  For more details, see `BlueStore docs <http://docs.ceph.com/docs/mimic/rados/configuration/bluestore-config-ref/#automatic-cache-sizing>_`
-
-13.2.5
-======
-
-* This release fixes the pg log hard limit bug that was introduced in
-  13.2.2, https://tracker.ceph.com/issues/36686. A flag called
-  `pglog_hardlimit` has been introduced, which is off by default. Enabling
-  this flag will limit the length of the pg log.  In order to enable
-  that, the flag must be set by running `ceph osd set pglog_hardlimit`
-  after completely upgrading to 13.2.2. Once the cluster has this flag
-  set, the length of the pg log will be capped by a hard limit. Once set,
-  this flag *must not* be unset anymore. In luminous, this feature was
-  introduced in 12.2.11. Users who are running 12.2.11, and want to
-  continue to use this feauture, should upgrade to 13.2.5 or later.
-
-* This release also fixes a cve on civetweb, CVE-2019-3821 where ssl fds were
-  not closed in civetweb in case the initial negotiation fails.
-
-13.2.6
+13.2.7
 ======
 
-* Ceph v13.2.6 now packages python bindings for python3.6 instead of
-  python3.4, because EPEL7 recently switched from python3.4 to
-  python3.6 as the native python3. see the `announcement <https://lists.fedoraproject.org/archives/list/epel-announce@lists.fedoraproject.org/message/EGUMKAIMPK2UD5VSHXM53BH2MBDGDWMO/>_`
-  for more details on the background of this change.
\ No newline at end of file
+* The `cache drop` admin socket command has been removed. The `ceph tell mds.X
+  cache drop` remains.
diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py
index b1f378fd524ea..3a792a7fba516 100644
--- a/qa/tasks/cephfs/test_misc.py
+++ b/qa/tasks/cephfs/test_misc.py
@@ -217,18 +217,15 @@ class TestMisc(CephFSTestCase):
         ratio = raw_avail / fs_avail
         assert 0.9 < ratio < 1.1
 
-    def _run_drop_cache_cmd(self, timeout, use_tell):
+    def _run_drop_cache_cmd(self, timeout):
         drop_res = None
-        if use_tell:
-            mds_id = self.fs.get_lone_mds_id()
-            drop_res = json.loads(
-                self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id),
-                                                    "cache", "drop", str(timeout)))
-        else:
-            drop_res = self.fs.mds_asok(["cache", "drop", str(timeout)])
+        mds_id = self.fs.get_lone_mds_id()
+        drop_res = json.loads(
+            self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id),
+                                                "cache", "drop", str(timeout)))
         return drop_res
 
-    def _drop_cache_command(self, timeout, use_tell=True):
+    def _drop_cache_command(self, timeout):
         self.mount_b.umount_wait()
         ls_data = self.fs.mds_asok(['session', 'ls'])
         self.assert_session_count(1, ls_data)
@@ -236,12 +233,12 @@ class TestMisc(CephFSTestCase):
         # create some files
         self.mount_a.create_n_files("dc-dir/dc-file", 1000)
         # drop cache
-        drop_res = self._run_drop_cache_cmd(timeout, use_tell)
+        drop_res = self._run_drop_cache_cmd(timeout)
 
         self.assertTrue(drop_res['client_recall']['return_code'] == 0)
         self.assertTrue(drop_res['flush_journal']['return_code'] == 0)
 
-    def _drop_cache_command_timeout(self, timeout, use_tell=True):
+    def _drop_cache_command_timeout(self, timeout):
         self.mount_b.umount_wait()
         ls_data = self.fs.mds_asok(['session', 'ls'])
         self.assert_session_count(1, ls_data)
@@ -251,7 +248,7 @@ class TestMisc(CephFSTestCase):
 
         # simulate client death and try drop cache
         self.mount_a.kill()
-        drop_res = self._run_drop_cache_cmd(timeout, use_tell)
+        drop_res = self._run_drop_cache_cmd(timeout)
 
         self.assertTrue(drop_res['client_recall']['return_code'] == -errno.ETIMEDOUT)
         self.assertTrue(drop_res['flush_journal']['return_code'] == 0)
@@ -260,28 +257,14 @@ class TestMisc(CephFSTestCase):
         self.mount_a.mount()
         self.mount_a.wait_until_mounted()
 
-    def test_drop_cache_command_asok(self):
-        """
-        Basic test for checking drop cache command using admin socket.
-        Note that the cache size post trimming is not checked here.
-        """
-        self._drop_cache_command(10, use_tell=False)
-
-    def test_drop_cache_command_tell(self):
+    def test_drop_cache_command(self):
         """
         Basic test for checking drop cache command using tell interface.
         Note that the cache size post trimming is not checked here.
         """
         self._drop_cache_command(10)
 
-    def test_drop_cache_command_timeout_asok(self):
-        """
-        Check drop cache command with non-responding client using admin
-        socket. Note that the cache size post trimming is not checked here.
-        """
-        self._drop_cache_command_timeout(5, use_tell=False)
-
-    def test_drop_cache_command_timeout_tell(self):
+    def test_drop_cache_command_timeout(self):
         """
         Check drop cache command with non-responding client using tell
         interface. Note that the cache size post trimming is not checked
diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc
index b0a605957e41d..a572a8dcbfc45 100644
--- a/src/mds/MDSDaemon.cc
+++ b/src/mds/MDSDaemon.cc
@@ -250,11 +250,6 @@ void MDSDaemon::set_up_admin_socket()
                                      asok_hook,
                                      "show cache status");
   assert(r == 0);
-  r = admin_socket->register_command("cache drop",
-                                     "cache drop name=timeout,type=CephInt,range=0,req=false",
-                                     asok_hook,
-                                     "drop cache");
-  assert(r == 0);
   r = admin_socket->register_command("dump tree",
 				     "dump tree "
 				     "name=root,type=CephString,req=true "
diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc
index 0964f5d57ab21..aced4c72a6821 100644
--- a/src/mds/MDSRank.cc
+++ b/src/mds/MDSRank.cc
@@ -2529,18 +2529,6 @@ bool MDSRankDispatcher::handle_asok_command(std::string_view command,
   } else if (command == "cache status") {
     Mutex::Locker l(mds_lock);
     mdcache->cache_status(f);
-  } else if (command == "cache drop") {
-    int64_t timeout;
-    if (!cmd_getval(g_ceph_context, cmdmap, "timeout", timeout)) {
-      timeout = 0;
-    }
-
-    C_SaferCond cond;
-    command_cache_drop((uint64_t)timeout, f, &cond);
-    int r = cond.wait();
-    if (r != 0) {
-      f->flush(ss);
-    }
   } else if (command == "dump tree") {
     command_dump_tree(cmdmap, ss, f);
   } else if (command == "dump loads") {
-- 
2.47.3