]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: make drain adjust crush weight if not replacing
authorSage Weil <sage@newdream.net>
Wed, 17 Feb 2021 16:28:05 +0000 (10:28 -0600)
committerMichael Fritch <mfritch@suse.com>
Mon, 13 Jun 2022 16:31:37 +0000 (10:31 -0600)
If we are replacing an OSD, we should mark it out and then back in
again when a new device shows up.  However, if we are going to
destroy an OSD, we should just weight it to 0 in crush, so that data
doesn't move again once the OSD is purged.

Signed-off-by: Sage Weil <sage@newdream.net>
(cherry picked from commit 4fc1309f281356db0a074da22aa6f2daa034df8d)

src/pybind/mgr/cephadm/services/osd.py

index bcf24957c543641d9021aff15777dc0133dcc87d..aa1400212c78743bb7a1e5e1b3db8420bad6483e 100644 (file)
@@ -397,6 +397,34 @@ class RemoveUtil(object):
         self.mgr.log.info(f"OSDs <{osds}> are now <{flag}>")
         return True
 
+    def get_weight(self, osd: "OSD") -> Optional[float]:
+        ret, out, err = self.mgr.mon_command({
+            'prefix': 'osd crush tree',
+            'format': 'json',
+        })
+        if ret != 0:
+            self.mgr.log.error(f"Could not dump crush weights. <{err}>")
+            return None
+        j = json.loads(out)
+        for n in j.get("nodes", []):
+            if n.get("name") == f"osd.{osd.osd_id}":
+                self.mgr.log.info(f"{osd} crush weight is {n.get('crush_weight')}")
+                return n.get("crush_weight")
+        return None
+
+    def reweight_osd(self, osd: "OSD", weight: float) -> bool:
+        self.mgr.log.debug(f"running cmd: osd crush reweight on {osd}")
+        ret, out, err = self.mgr.mon_command({
+            'prefix': "osd crush reweight",
+            'name': f"osd.{osd.osd_id}",
+            'weight': weight,
+        })
+        if ret != 0:
+            self.mgr.log.error(f"Could not reweight {osd} to {weight}. <{err}>")
+            return False
+        self.mgr.log.info(f"{osd} weight is now {weight}")
+        return True
+
     def safe_to_destroy(self, osd_ids: List[int]) -> bool:
         """ Queries the safe-to-destroy flag for OSDs """
         cmd_args = {'prefix': 'osd safe-to-destroy',
@@ -489,6 +517,8 @@ class OSD:
         # mgr obj to make mgr/mon calls
         self.rm_util: RemoveUtil = remove_util
 
+        self.original_weight: Optional[float] = None
+
     def start(self) -> None:
         if self.started:
             logger.debug(f"Already started draining {self}")
@@ -500,14 +530,22 @@ class OSD:
         if self.stopped:
             logger.debug(f"Won't start draining {self}. OSD draining is stopped.")
             return False
-        self.rm_util.set_osd_flag([self], 'out')
+        if self.replace:
+            self.rm_util.set_osd_flag([self], 'out')
+        else:
+            self.original_weight = self.rm_util.get_weight(self)
+            self.rm_util.reweight_osd(self, 0.0)
         self.drain_started_at = datetime.utcnow()
         self.draining = True
         logger.debug(f"Started draining {self}.")
         return True
 
     def stop_draining(self) -> bool:
-        self.rm_util.set_osd_flag([self], 'in')
+        if self.replace:
+            self.rm_util.set_osd_flag([self], 'in')
+        else:
+            if self.original_weight:
+                self.rm_util.reweight_osd(self, self.original_weight)
         self.drain_stopped_at = datetime.utcnow()
         self.draining = False
         logger.debug(f"Stopped draining {self}.")