From f6e1a797d45d7d646835bad8e258908407b2695b Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Thu, 4 Jan 2018 09:41:06 -0800 Subject: [PATCH] Revert "Merge PR #19369 into master" This reverts commit 3189ba19a779e4a54265aa831a57d686a7911187, reversing changes made to b7620de0200bdbac6a1a7abbdfbafd8b43d7bf47. Despite the change in json format being positive, the unfortunate side-effect is that it broke upgrade testing (because the QA framework must handle the transition of mdsmap["info"] to a list from object) and the ceph-mgr. Fixes: http://tracker.ceph.com/issues/22527 Signed-off-by: Patrick Donnelly --- .githubmap | 1 - PendingReleaseNotes | 3 +-- qa/tasks/cephfs/filesystem.py | 38 +++++++++++++------------------- qa/tasks/cephfs/test_failover.py | 5 +++-- qa/tasks/cephfs/test_strays.py | 3 ++- src/mds/MDSMap.cc | 12 ++++++---- src/pybind/ceph_volume_client.py | 17 +++++--------- 7 files changed, 35 insertions(+), 44 deletions(-) diff --git a/.githubmap b/.githubmap index d2e4d3c894a..bee2e2a3605 100644 --- a/.githubmap +++ b/.githubmap @@ -51,4 +51,3 @@ jtlayton Jeff Layton yuriw Yuri Weinstein jecluis João Eduardo Luís yunfeiguan Yunfei Guan -xiaoxichen Xiaoxi Chen diff --git a/PendingReleaseNotes b/PendingReleaseNotes index f5b945d8df3..b48eb44cf70 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -25,8 +25,7 @@ mds_session_timeout, mds_session_autoclose, and mds_max_file_size are now obsolete. - * The JSON format of `ceph fs status` and `ceph mds stat` has changed - for the "up" and "info" sections of the mds_map. + >= 12.2.2 --------- diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index c94d80bf4ab..d7fa5168c00 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -64,7 +64,7 @@ class FSStatus(object): for info in self.get_standbys(): yield info for fs in self.map['filesystems']: - for info in fs['mdsmap']['info']: + for info in fs['mdsmap']['info'].values(): yield info def get_standbys(self): @@ -97,7 +97,7 @@ class FSStatus(object): Get the standby:replay MDS for the given FSCID. """ fs = self.get_fsmap(fscid) - for info in fs['mdsmap']['info']: + for info in fs['mdsmap']['info'].values(): if info['state'] == 'up:standby-replay': yield info @@ -106,7 +106,7 @@ class FSStatus(object): Get the ranks for the given FSCID. """ fs = self.get_fsmap(fscid) - for info in fs['mdsmap']['info']: + for info in fs['mdsmap']['info'].values(): if info['rank'] >= 0: yield info @@ -119,14 +119,6 @@ class FSStatus(object): return info raise RuntimeError("FSCID {0} has no rank {1}".format(fscid, rank)) - def get_cluster(self, fscid): - """ - Get the MDS cluster for the given FSCID. - """ - fs = self.get_fsmap(fscid) - for info in fs['mdsmap']['info']: - yield info - def get_mds(self, name): """ Get the info for the given MDS name. @@ -302,8 +294,8 @@ class MDSCluster(CephCluster): mdsmap = fs['mdsmap'] metadata_pool = pool_id_name[mdsmap['metadata_pool']] - for info in status.get_ranks(fs['id']): - self.mon_manager.raw_cluster_cmd('mds', 'fail', str(info['gid'])) + for gid in mdsmap['up'].values(): + self.mon_manager.raw_cluster_cmd('mds', 'fail', gid.__str__()) self.mon_manager.raw_cluster_cmd('fs', 'rm', mdsmap['fs_name'], '--yes-i-really-mean-it') self.mon_manager.raw_cluster_cmd('osd', 'pool', 'delete', @@ -670,11 +662,11 @@ class Filesystem(MDSCluster): log.info("are_daemons_healthy: mds map: {0}".format(mds_map)) - for info in mds_map['info']: - if info['state'] not in ["up:active", "up:standby", "up:standby-replay"]: - log.warning("Unhealthy mds state {0}:{1}".format(info['gid'], info['state'])) + for mds_id, mds_status in mds_map['info'].items(): + if mds_status['state'] not in ["up:active", "up:standby", "up:standby-replay"]: + log.warning("Unhealthy mds state {0}:{1}".format(mds_id, mds_status['state'])) return False - elif info['state'] == 'up:active': + elif mds_status['state'] == 'up:active': active_count += 1 log.info("are_daemons_healthy: {0}/{1}".format( @@ -683,10 +675,10 @@ class Filesystem(MDSCluster): if active_count >= mds_map['max_mds']: # The MDSMap says these guys are active, but let's check they really are - for info in mds_map['info']: - if info['state'] == 'up:active': + for mds_id, mds_status in mds_map['info'].items(): + if mds_status['state'] == 'up:active': try: - daemon_status = self.mds_asok(["status"], mds_id=info['name']) + daemon_status = self.mds_asok(["status"], mds_id=mds_status['name']) except CommandFailedError as cfe: if cfe.exitstatus == errno.EINVAL: # Old version, can't do this check @@ -711,7 +703,7 @@ class Filesystem(MDSCluster): """ status = self.get_mds_map() result = [] - for mds_status in sorted(status['info'], lambda a, b: cmp(a['rank'], b['rank'])): + for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): if mds_status['state'] == state or state is None: result.append(mds_status['name']) @@ -729,7 +721,7 @@ class Filesystem(MDSCluster): def get_all_mds_rank(self): status = self.get_mds_map() result = [] - for mds_status in sorted(status['info'], lambda a, b: cmp(a['rank'], b['rank'])): + for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay': result.append(mds_status['rank']) @@ -744,7 +736,7 @@ class Filesystem(MDSCluster): """ status = self.get_mds_map() result = [] - for mds_status in sorted(status['info'], lambda a, b: cmp(a['rank'], b['rank'])): + for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay': result.append(mds_status['name']) diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py index 1ece40fcc6e..3306e9441c7 100644 --- a/qa/tasks/cephfs/test_failover.py +++ b/qa/tasks/cephfs/test_failover.py @@ -83,7 +83,8 @@ class TestFailover(CephFSTestCase): # Wait for everyone to go laggy def laggy(): - for info in self.fs.status().get_cluster(self.fs.id): + mdsmap = self.fs.get_mds_map() + for info in mdsmap['info'].values(): if "laggy_since" not in info: return False @@ -468,7 +469,7 @@ class TestMultiFilesystems(CephFSTestCase): def get_info_by_name(fs, mds_name): mds_map = fs.get_mds_map() - for info in mds_map['info']: + for gid_str, info in mds_map['info'].items(): if info['name'] == mds_name: return info diff --git a/qa/tasks/cephfs/test_strays.py b/qa/tasks/cephfs/test_strays.py index 5483d51bf24..919b059b871 100644 --- a/qa/tasks/cephfs/test_strays.py +++ b/qa/tasks/cephfs/test_strays.py @@ -544,7 +544,8 @@ class TestStrays(CephFSTestCase): time.sleep(1) def _is_stopped(self, rank): - return rank not in self.fs.get_mds_map()['up'] + mds_map = self.fs.get_mds_map() + return rank not in [i['rank'] for i in mds_map['info'].values()] def test_purge_on_shutdown(self): """ diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc index 7678635e534..ea154ff18bb 100644 --- a/src/mds/MDSMap.cc +++ b/src/mds/MDSMap.cc @@ -153,9 +153,11 @@ void MDSMap::dump(Formatter *f) const for (set::const_iterator p = in.begin(); p != in.end(); ++p) f->dump_int("mds", *p); f->close_section(); - f->open_array_section("up"); + f->open_object_section("up"); for (map::const_iterator p = up.begin(); p != up.end(); ++p) { - f->dump_int("mds", p->first); + char s[14]; + sprintf(s, "mds_%d", int(p->first)); + f->dump_int(s, p->second); } f->close_section(); f->open_array_section("failed"); @@ -170,9 +172,11 @@ void MDSMap::dump(Formatter *f) const for (set::const_iterator p = stopped.begin(); p != stopped.end(); ++p) f->dump_int("mds", *p); f->close_section(); - f->open_array_section("info"); + f->open_object_section("info"); for (map::const_iterator p = mds_info.begin(); p != mds_info.end(); ++p) { - f->open_object_section("info_item"); + char s[25]; // 'gid_' + len(str(ULLONG_MAX)) + '\0' + sprintf(s, "gid_%llu", (long long unsigned)p->first); + f->open_object_section(s); p->second.dump(f); f->close_section(); } diff --git a/src/pybind/ceph_volume_client.py b/src/pybind/ceph_volume_client.py index ae6bb8327d1..9e4a2a21682 100644 --- a/src/pybind/ceph_volume_client.py +++ b/src/pybind/ceph_volume_client.py @@ -114,21 +114,14 @@ class RankEvicter(threading.Thread): super(RankEvicter, self).__init__() - def _get_info(self, key, value): - for info in self._mds_map["info"]: - if info[key] == value: - return info - return {} - def _ready_to_evict(self): - info = self._get_info('rank', self.rank) - if (self.rank not in self._mds_map['up']) or info.get('gid', None) != self.gid: + if self._mds_map['up'].get("mds_{0}".format(self.rank), None) != self.gid: log.info("Evicting {0} from {1}/{2}: rank no longer associated with gid, done.".format( self._client_spec, self.rank, self.gid )) raise RankEvicter.GidGone() - info = self._get_info('gid', self.gid) + info = self._mds_map['info']["gid_{0}".format(self.gid)] log.debug("_ready_to_evict: state={0}".format(info['state'])) return info['state'] in ["up:active", "up:clientreplay"] @@ -400,8 +393,10 @@ class CephFSVolumeClient(object): mds_map = self.get_mds_map() up = {} - for rank in mds_map['up']: - up[rank] = self._get_info('rank', rank)['gid'] + for name, gid in mds_map['up'].items(): + # Quirk of the MDSMap JSON dump: keys in the up dict are like "mds_0" + assert name.startswith("mds_") + up[int(name[4:])] = gid # For all MDS ranks held by a daemon # Do the parallelism in python instead of using "tell mds.*", because -- 2.39.5