From 4de1a3f474e1c0d9fb66863ea58926861e0a4034 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Tue, 22 Jan 2019 18:26:38 -0800 Subject: [PATCH] mds: remove cache drop asok command `cache drop` is a long running command that will block the asok interface (while the tell version does not). Attempting to abort the command with ^C or equivalents will simply cause the `ceph` command to exit but won't stop the asok command handler from waiting for the cache drop operation to complete. Instead, just allow the tell version. Fixes: http://tracker.ceph.com/issues/38020 Signed-off-by: Patrick Donnelly (cherry picked from commit 7fa1e3c37f8c7fb709ae9e070a3154a7084c0584) Conflicts: PendingReleaseNotes qa/tasks/cephfs/test_misc.py src/mds/MDSDaemon.cc --- PendingReleaseNotes | 84 ++---------------------------------- qa/tasks/cephfs/test_misc.py | 39 +++++------------ src/mds/MDSDaemon.cc | 5 --- src/mds/MDSRank.cc | 12 ------ 4 files changed, 14 insertions(+), 126 deletions(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 564f4eabea124..b181526b38c1d 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -1,83 +1,5 @@ ->=13.2.0 -======== - -(some or all of the notes in this section seem to have been omitted -from the 13.2.0 release notes (?), so include them in 13.2.1 release -notes (?)) - -* The Telegraf module for the Manager allows for sending statistics to - an Telegraf Agent over TCP, UDP or a UNIX Socket. Telegraf can then - send the statistics to databases like InfluxDB, ElasticSearch, Graphite - and many more. - -* The graylog fields naming the originator of a log event have - changed: the string-form name is now included (e.g., ``"name": - "mgr.foo"``), and the rank-form name is now in a nested section - (e.g., ``"rank": {"type": "mgr", "num": 43243}``). - -* If the cluster log is directed at syslog, the entries are now - prefixed by both the string-form name and the rank-form name (e.g., - ``mgr.x mgr.12345 ...`` instead of just ``mgr.12345 ...``). - -* The JSON output of the ``osd find`` command has replaced the ``ip`` - field with an ``addrs`` section to reflect that OSDs may bind to - multiple addresses. - -13.2.1 -====== - -* CephFS clients without the 's' flag in their authentication capability - string will no longer be able to create/delete snapshots. To allow - ``client.foo`` to create/delete snapshots in the ``bar`` directory of - filesystem ``cephfs_a``, use command: - - - ``ceph auth caps client.foo mon 'allow r' osd 'allow rw tag cephfs data=cephfs_a' mds 'allow rw, allow rws path=/bar'`` - -13.2.3 -====== - -* The default memory utilization for the mons has been increased - somewhat. Rocksdb now uses 512 MB of RAM by default, which should - be sufficient for small to medium-sized clusters; large clusters - should tune this up. Also, the ``mon_osd_cache_size`` has been - increase from 10 OSDMaps to 500, which will translate to an - additional 500 MB to 1 GB of RAM for large clusters, and much less - for small clusters. - -* Ceph v13.2.2 includes a wrong backport, which may cause mds to go into - 'damaged' state when upgrading Ceph cluster from previous version. - The bug is fixed in v13.2.3. For ceph v13.2.2 installation, upgrading - to v13.2.3 does not requires special action. - -* The bluestore_cache_* options are no longer needed. They are replaced - by osd_memory_target, defaulting to 4GB. BlueStore will expand - and contract its cache to attempt to stay within this - limit. Users upgrading should note this is a higher default - than the previous bluestore_cache_size of 1GB, so OSDs using - BlueStore will use more memory by default. - For more details, see `BlueStore docs _` - -13.2.5 -====== - -* This release fixes the pg log hard limit bug that was introduced in - 13.2.2, https://tracker.ceph.com/issues/36686. A flag called - `pglog_hardlimit` has been introduced, which is off by default. Enabling - this flag will limit the length of the pg log. In order to enable - that, the flag must be set by running `ceph osd set pglog_hardlimit` - after completely upgrading to 13.2.2. Once the cluster has this flag - set, the length of the pg log will be capped by a hard limit. Once set, - this flag *must not* be unset anymore. In luminous, this feature was - introduced in 12.2.11. Users who are running 12.2.11, and want to - continue to use this feauture, should upgrade to 13.2.5 or later. - -* This release also fixes a cve on civetweb, CVE-2019-3821 where ssl fds were - not closed in civetweb in case the initial negotiation fails. - -13.2.6 +13.2.7 ====== -* Ceph v13.2.6 now packages python bindings for python3.6 instead of - python3.4, because EPEL7 recently switched from python3.4 to - python3.6 as the native python3. see the `announcement _` - for more details on the background of this change. \ No newline at end of file +* The `cache drop` admin socket command has been removed. The `ceph tell mds.X + cache drop` remains. diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py index b1f378fd524ea..3a792a7fba516 100644 --- a/qa/tasks/cephfs/test_misc.py +++ b/qa/tasks/cephfs/test_misc.py @@ -217,18 +217,15 @@ class TestMisc(CephFSTestCase): ratio = raw_avail / fs_avail assert 0.9 < ratio < 1.1 - def _run_drop_cache_cmd(self, timeout, use_tell): + def _run_drop_cache_cmd(self, timeout): drop_res = None - if use_tell: - mds_id = self.fs.get_lone_mds_id() - drop_res = json.loads( - self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id), - "cache", "drop", str(timeout))) - else: - drop_res = self.fs.mds_asok(["cache", "drop", str(timeout)]) + mds_id = self.fs.get_lone_mds_id() + drop_res = json.loads( + self.fs.mon_manager.raw_cluster_cmd("tell", "mds.{0}".format(mds_id), + "cache", "drop", str(timeout))) return drop_res - def _drop_cache_command(self, timeout, use_tell=True): + def _drop_cache_command(self, timeout): self.mount_b.umount_wait() ls_data = self.fs.mds_asok(['session', 'ls']) self.assert_session_count(1, ls_data) @@ -236,12 +233,12 @@ class TestMisc(CephFSTestCase): # create some files self.mount_a.create_n_files("dc-dir/dc-file", 1000) # drop cache - drop_res = self._run_drop_cache_cmd(timeout, use_tell) + drop_res = self._run_drop_cache_cmd(timeout) self.assertTrue(drop_res['client_recall']['return_code'] == 0) self.assertTrue(drop_res['flush_journal']['return_code'] == 0) - def _drop_cache_command_timeout(self, timeout, use_tell=True): + def _drop_cache_command_timeout(self, timeout): self.mount_b.umount_wait() ls_data = self.fs.mds_asok(['session', 'ls']) self.assert_session_count(1, ls_data) @@ -251,7 +248,7 @@ class TestMisc(CephFSTestCase): # simulate client death and try drop cache self.mount_a.kill() - drop_res = self._run_drop_cache_cmd(timeout, use_tell) + drop_res = self._run_drop_cache_cmd(timeout) self.assertTrue(drop_res['client_recall']['return_code'] == -errno.ETIMEDOUT) self.assertTrue(drop_res['flush_journal']['return_code'] == 0) @@ -260,28 +257,14 @@ class TestMisc(CephFSTestCase): self.mount_a.mount() self.mount_a.wait_until_mounted() - def test_drop_cache_command_asok(self): - """ - Basic test for checking drop cache command using admin socket. - Note that the cache size post trimming is not checked here. - """ - self._drop_cache_command(10, use_tell=False) - - def test_drop_cache_command_tell(self): + def test_drop_cache_command(self): """ Basic test for checking drop cache command using tell interface. Note that the cache size post trimming is not checked here. """ self._drop_cache_command(10) - def test_drop_cache_command_timeout_asok(self): - """ - Check drop cache command with non-responding client using admin - socket. Note that the cache size post trimming is not checked here. - """ - self._drop_cache_command_timeout(5, use_tell=False) - - def test_drop_cache_command_timeout_tell(self): + def test_drop_cache_command_timeout(self): """ Check drop cache command with non-responding client using tell interface. Note that the cache size post trimming is not checked diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index b0a605957e41d..a572a8dcbfc45 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -250,11 +250,6 @@ void MDSDaemon::set_up_admin_socket() asok_hook, "show cache status"); assert(r == 0); - r = admin_socket->register_command("cache drop", - "cache drop name=timeout,type=CephInt,range=0,req=false", - asok_hook, - "drop cache"); - assert(r == 0); r = admin_socket->register_command("dump tree", "dump tree " "name=root,type=CephString,req=true " diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 0964f5d57ab21..aced4c72a6821 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -2529,18 +2529,6 @@ bool MDSRankDispatcher::handle_asok_command(std::string_view command, } else if (command == "cache status") { Mutex::Locker l(mds_lock); mdcache->cache_status(f); - } else if (command == "cache drop") { - int64_t timeout; - if (!cmd_getval(g_ceph_context, cmdmap, "timeout", timeout)) { - timeout = 0; - } - - C_SaferCond cond; - command_cache_drop((uint64_t)timeout, f, &cond); - int r = cond.wait(); - if (r != 0) { - f->flush(ss); - } } else if (command == "dump tree") { command_dump_tree(cmdmap, ss, f); } else if (command == "dump loads") { -- 2.47.3