From: Sage Weil Date: Wed, 17 Feb 2021 16:28:05 +0000 (-0600) Subject: mgr/cephadm: make drain adjust crush weight if not replacing X-Git-Tag: v15.2.17~21^2~3 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=a46c2004218c5d7a30bcb1d2091f0e003e55aa95;p=ceph.git mgr/cephadm: make drain adjust crush weight if not replacing If we are replacing an OSD, we should mark it out and then back in again when a new device shows up. However, if we are going to destroy an OSD, we should just weight it to 0 in crush, so that data doesn't move again once the OSD is purged. Signed-off-by: Sage Weil (cherry picked from commit 4fc1309f281356db0a074da22aa6f2daa034df8d) --- diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py index bcf24957c5436..aa1400212c787 100644 --- a/src/pybind/mgr/cephadm/services/osd.py +++ b/src/pybind/mgr/cephadm/services/osd.py @@ -397,6 +397,34 @@ class RemoveUtil(object): self.mgr.log.info(f"OSDs <{osds}> are now <{flag}>") return True + def get_weight(self, osd: "OSD") -> Optional[float]: + ret, out, err = self.mgr.mon_command({ + 'prefix': 'osd crush tree', + 'format': 'json', + }) + if ret != 0: + self.mgr.log.error(f"Could not dump crush weights. <{err}>") + return None + j = json.loads(out) + for n in j.get("nodes", []): + if n.get("name") == f"osd.{osd.osd_id}": + self.mgr.log.info(f"{osd} crush weight is {n.get('crush_weight')}") + return n.get("crush_weight") + return None + + def reweight_osd(self, osd: "OSD", weight: float) -> bool: + self.mgr.log.debug(f"running cmd: osd crush reweight on {osd}") + ret, out, err = self.mgr.mon_command({ + 'prefix': "osd crush reweight", + 'name': f"osd.{osd.osd_id}", + 'weight': weight, + }) + if ret != 0: + self.mgr.log.error(f"Could not reweight {osd} to {weight}. <{err}>") + return False + self.mgr.log.info(f"{osd} weight is now {weight}") + return True + def safe_to_destroy(self, osd_ids: List[int]) -> bool: """ Queries the safe-to-destroy flag for OSDs """ cmd_args = {'prefix': 'osd safe-to-destroy', @@ -489,6 +517,8 @@ class OSD: # mgr obj to make mgr/mon calls self.rm_util: RemoveUtil = remove_util + self.original_weight: Optional[float] = None + def start(self) -> None: if self.started: logger.debug(f"Already started draining {self}") @@ -500,14 +530,22 @@ class OSD: if self.stopped: logger.debug(f"Won't start draining {self}. OSD draining is stopped.") return False - self.rm_util.set_osd_flag([self], 'out') + if self.replace: + self.rm_util.set_osd_flag([self], 'out') + else: + self.original_weight = self.rm_util.get_weight(self) + self.rm_util.reweight_osd(self, 0.0) self.drain_started_at = datetime.utcnow() self.draining = True logger.debug(f"Started draining {self}.") return True def stop_draining(self) -> bool: - self.rm_util.set_osd_flag([self], 'in') + if self.replace: + self.rm_util.set_osd_flag([self], 'in') + else: + if self.original_weight: + self.rm_util.reweight_osd(self, self.original_weight) self.drain_stopped_at = datetime.utcnow() self.draining = False logger.debug(f"Stopped draining {self}.")