From 1c9ce2fc56b545cfa11861877362e6eda81c9005 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 25 Jun 2018 13:31:27 -0500 Subject: [PATCH] mgr/devicehealth: set primary-affinity 0 for failing devices Signed-off-by: Sage Weil --- src/pybind/mgr/devicehealth/module.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/pybind/mgr/devicehealth/module.py b/src/pybind/mgr/devicehealth/module.py index a5a5c30077322..9f8829027dbfd 100644 --- a/src/pybind/mgr/devicehealth/module.py +++ b/src/pybind/mgr/devicehealth/module.py @@ -359,7 +359,7 @@ class Module(MgrModule): else: osds_out.append(_id) if osds_in: - self.mark_out(osds_in) + self.mark_out_etc(osds_in) # OSD might be marked 'out' (which means it has no # data), however PGs are still attached to it. for _id in osds_out: @@ -369,7 +369,6 @@ class Module(MgrModule): 'osd.%s is marked out ' 'but still has %s PG(s)' % (_id, num_pgs)) - # TODO: set_primary_affinity if life_expectancy_min - now <= warn_threshold_td: # device can appear in more than one location in case @@ -413,7 +412,7 @@ class Module(MgrModule): return stat['num_pgs'] return -1 - def mark_out(self, osd_ids): + def mark_out_etc(self, osd_ids): self.log.info('Marking out OSDs: %s' % osd_ids) result = CommandResult('') self.send_command(result, 'mon', '', json.dumps({ @@ -424,6 +423,18 @@ class Module(MgrModule): r, outb, outs = result.wait() if r != 0: self.log.warn('Could not mark OSD %s out. r: [%s], outb: [%s], outs: [%s]' % (osd_ids, r, outb, outs)) + for osd_id in osd_ids: + result = CommandResult('') + self.send_command(result, 'mon', '', json.dumps({ + 'prefix': 'osd primary-affinity', + 'format': 'json', + 'id': int(osd_id), + 'weight': 0.0, + }), '') + r, outb, outs = result.wait() + if r != 0: + self.log.warn('Could not set osd.%s primary-affinity, r: [%s], outs: [%s]' % (osd_id, r, outb, outs)) + def extract_smart_features(self, raw): # FIXME: extract and normalize raw smartctl --json output and -- 2.39.5