From a5731076add0af10686da482ecc29a1fa2600a14 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Thu, 30 Mar 2017 12:42:54 -0700 Subject: [PATCH] osd: Handle backfillfull_ratio just like nearfull and full Add BACKFILLFULL as a local OSD cur_state Notify monitor of this new fullness state Signed-off-by: David Zafman --- .../osd_internals/recovery_reservation.rst | 6 +-- doc/man/8/ceph.rst | 6 +++ doc/rados/configuration/mon-config-ref.rst | 10 +++++ doc/rados/configuration/osd-config-ref.rst | 9 ---- doc/rados/operations/monitoring-osd-pg.rst | 5 ++- .../troubleshooting/troubleshooting-osd.rst | 10 +++-- qa/tasks/ceph_manager.py | 14 ++++--- qa/workunits/ceph-helpers.sh | 2 +- qa/workunits/cephtool/test.sh | 2 + qa/workunits/rest/test.py | 4 ++ src/common/ceph_strings.cc | 2 + src/common/config_opts.h | 4 +- src/include/rados.h | 1 + src/mon/MonCommands.h | 4 ++ src/mon/OSDMonitor.cc | 30 +++++++++---- src/osd/OSD.cc | 42 ++++++++++++------- src/osd/OSD.h | 4 +- src/osd/OSDMap.cc | 31 +++++++++++--- src/osd/OSDMap.h | 8 +++- src/test/cli/osdmaptool/clobber.t | 2 + src/test/cli/osdmaptool/create-print.t | 1 + src/test/cli/osdmaptool/create-racks.t | 1 + src/test/pybind/test_ceph_argparse.py | 3 ++ src/tools/ceph_monstore_tool.cc | 8 ++++ 24 files changed, 151 insertions(+), 58 deletions(-) diff --git a/doc/dev/osd_internals/recovery_reservation.rst b/doc/dev/osd_internals/recovery_reservation.rst index 24db1387f50d..cabea04cc73b 100644 --- a/doc/dev/osd_internals/recovery_reservation.rst +++ b/doc/dev/osd_internals/recovery_reservation.rst @@ -34,8 +34,8 @@ the typical process. Once the primary has its local reservation, it requests a remote reservation from the backfill target. This reservation CAN be rejected, -for instance if the OSD is too full (osd_backfill_full_ratio config -option). If the reservation is rejected, the primary drops its local +for instance if the OSD is too full (backfillfull_ratio osd setting). +If the reservation is rejected, the primary drops its local reservation, waits (osd_backfill_retry_interval), and then retries. It will retry indefinitely. @@ -64,7 +64,7 @@ to the monitor. The state chart can set: - recovering: recovering - backfill_wait: waiting for remote backfill reservations - backfilling: backfilling - - backfill_toofull: backfill reservation rejected, OSD too full + - backfill_toofull: backfill stopped, OSD(s) above backfillfull ratio -------- diff --git a/doc/man/8/ceph.rst b/doc/man/8/ceph.rst index f878f8825252..b24891268481 100644 --- a/doc/man/8/ceph.rst +++ b/doc/man/8/ceph.rst @@ -1166,6 +1166,12 @@ Usage:: ceph pg set_full_ratio +Subcommand ``set_backfillfull_ratio`` sets ratio at which pgs are considered too full to backfill. + +Usage:: + + ceph pg set_backfillfull_ratio + Subcommand ``set_nearfull_ratio`` sets ratio at which pgs are considered nearly full. diff --git a/doc/rados/configuration/mon-config-ref.rst b/doc/rados/configuration/mon-config-ref.rst index 8c05571c6ce2..b19461f7a621 100644 --- a/doc/rados/configuration/mon-config-ref.rst +++ b/doc/rados/configuration/mon-config-ref.rst @@ -400,6 +400,7 @@ a reasonable number for a near full ratio. [global] mon osd full ratio = .80 + mon osd backfillfull ratio = .75 mon osd nearfull ratio = .70 @@ -412,6 +413,15 @@ a reasonable number for a near full ratio. :Default: ``.95`` +``mon osd backfillfull ratio`` + +:Description: The percentage of disk space used before an OSD is + considered too ``full`` to backfill. + +:Type: Float +:Default: ``.90`` + + ``mon osd nearfull ratio`` :Description: The percentage of disk space used before an OSD is diff --git a/doc/rados/configuration/osd-config-ref.rst b/doc/rados/configuration/osd-config-ref.rst index d68316dff603..5679c0caeebc 100644 --- a/doc/rados/configuration/osd-config-ref.rst +++ b/doc/rados/configuration/osd-config-ref.rst @@ -560,15 +560,6 @@ priority than requests to read or write data. :Default: ``512`` -``osd backfill full ratio`` - -:Description: Refuse to accept backfill requests when the Ceph OSD Daemon's - full ratio is above this value. - -:Type: Float -:Default: ``0.85`` - - ``osd backfill retry interval`` :Description: The number of seconds to wait before retrying backfill requests. diff --git a/doc/rados/operations/monitoring-osd-pg.rst b/doc/rados/operations/monitoring-osd-pg.rst index 37f1960cba6b..b390b030b715 100644 --- a/doc/rados/operations/monitoring-osd-pg.rst +++ b/doc/rados/operations/monitoring-osd-pg.rst @@ -496,8 +496,9 @@ placement group can't be backfilled, it may be considered ``incomplete``. Ceph provides a number of settings to manage the load spike associated with reassigning placement groups to an OSD (especially a new OSD). By default, ``osd_max_backfills`` sets the maximum number of concurrent backfills to or from -an OSD to 10. The ``osd backfill full ratio`` enables an OSD to refuse a -backfill request if the OSD is approaching its full ratio (85%, by default). +an OSD to 10. The ``backfill full ratio`` enables an OSD to refuse a +backfill request if the OSD is approaching its full ratio (90%, by default) and +change with ``ceph osd set-backfillfull-ratio`` comand. If an OSD refuses a backfill request, the ``osd backfill retry interval`` enables an OSD to retry the request (after 10 seconds, by default). OSDs can also set ``osd backfill scan min`` and ``osd backfill scan max`` to manage scan diff --git a/doc/rados/troubleshooting/troubleshooting-osd.rst b/doc/rados/troubleshooting/troubleshooting-osd.rst index 3661f8af45df..651907dfb058 100644 --- a/doc/rados/troubleshooting/troubleshooting-osd.rst +++ b/doc/rados/troubleshooting/troubleshooting-osd.rst @@ -206,7 +206,9 @@ Ceph prevents you from writing to a full OSD so that you don't lose data. In an operational cluster, you should receive a warning when your cluster is getting near its full ratio. The ``mon osd full ratio`` defaults to ``0.95``, or 95% of capacity before it stops clients from writing data. -The ``mon osd nearfull ratio`` defaults to ``0.85``, or 85% of capacity +The ``mon osd backfillfull ratio`` defaults to ``0.90``, or 90 % of +capacity when it blocks backfills from starting. The +``mon osd nearfull ratio`` defaults to ``0.85``, or 85% of capacity when it generates a health warning. Full cluster issues usually arise when testing how Ceph handles an OSD @@ -214,7 +216,8 @@ failure on a small cluster. When one node has a high percentage of the cluster's data, the cluster can easily eclipse its nearfull and full ratio immediately. If you are testing how Ceph reacts to OSD failures on a small cluster, you should leave ample free disk space and consider temporarily -lowering the ``mon osd full ratio`` and ``mon osd nearfull ratio``. +lowering the ``mon osd full ratio``, ``mon osd backfillfull ratio`` and +``mon osd nearfull ratio``. Full ``ceph-osds`` will be reported by ``ceph health``:: @@ -225,9 +228,10 @@ Full ``ceph-osds`` will be reported by ``ceph health``:: Or:: ceph health - HEALTH_ERR 1 nearfull osds, 1 full osds + HEALTH_ERR 1 nearfull osds, 1 backfillfull osds, 1 full osds osd.2 is near full at 85% osd.3 is full at 97% + osd.4 is backfill full at 91% The best way to deal with a full cluster is to add new ``ceph-osds``, allowing the cluster to redistribute data to the newly available storage. diff --git a/qa/tasks/ceph_manager.py b/qa/tasks/ceph_manager.py index 8ff2556a7a0e..1a9aff93c3c8 100644 --- a/qa/tasks/ceph_manager.py +++ b/qa/tasks/ceph_manager.py @@ -696,7 +696,7 @@ class Thrasher: """ Test backfills stopping when the replica fills up. - First, use osd_backfill_full_ratio to simulate a now full + First, use injectfull admin command to simulate a now full osd by setting it to 0 on all of the OSDs. Second, on a random subset, set @@ -705,13 +705,14 @@ class Thrasher: Then, verify that all backfills stop. """ - self.log("injecting osd_backfill_full_ratio = 0") + self.log("injecting backfill full") for i in self.live_osds: self.ceph_manager.set_config( i, osd_debug_skip_full_check_in_backfill_reservation= - random.choice(['false', 'true']), - osd_backfill_full_ratio=0) + random.choice(['false', 'true'])) + self.ceph_manager.osd_admin_socket(i, command=['injectfull', 'backfillfull'], + check_status=True, timeout=30, stdout=DEVNULL) for i in range(30): status = self.ceph_manager.compile_pg_status() if 'backfill' not in status.keys(): @@ -724,8 +725,9 @@ class Thrasher: for i in self.live_osds: self.ceph_manager.set_config( i, - osd_debug_skip_full_check_in_backfill_reservation='false', - osd_backfill_full_ratio=0.85) + osd_debug_skip_full_check_in_backfill_reservation='false') + self.ceph_manager.osd_admin_socket(i, command=['injectfull', 'none'], + check_status=True, timeout=30, stdout=DEVNULL) def test_map_discontinuity(self): """ diff --git a/qa/workunits/ceph-helpers.sh b/qa/workunits/ceph-helpers.sh index 9863668de755..8642d376a738 100755 --- a/qa/workunits/ceph-helpers.sh +++ b/qa/workunits/ceph-helpers.sh @@ -400,6 +400,7 @@ EOF if test -z "$(get_config mon $id mon_initial_members)" ; then ceph osd pool delete rbd rbd --yes-i-really-really-mean-it || return 1 ceph osd pool create rbd $PG_NUM || return 1 + ceph osd set-backfillfull-ratio .99 fi } @@ -634,7 +635,6 @@ function activate_osd() { ceph_disk_args+=" --prepend-to-path=" local ceph_args="$CEPH_ARGS" - ceph_args+=" --osd-backfill-full-ratio=.99" ceph_args+=" --osd-failsafe-full-ratio=.99" ceph_args+=" --osd-journal-size=100" ceph_args+=" --osd-scrub-load-threshold=2000" diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh index 8d0ce1baeffd..a6c7b617cc67 100755 --- a/qa/workunits/cephtool/test.sh +++ b/qa/workunits/cephtool/test.sh @@ -1419,6 +1419,8 @@ function test_mon_pg() ceph osd set-full-ratio .962 ceph osd dump | grep '^full_ratio 0.962' + ceph osd set-backfillfull-ratio .912 + ceph osd dump | grep '^backfillfull_ratio 0.912' ceph osd set-nearfull-ratio .892 ceph osd dump | grep '^nearfull_ratio 0.892' diff --git a/qa/workunits/rest/test.py b/qa/workunits/rest/test.py index 1a7ab30b8863..7bbb6f3ccaec 100755 --- a/qa/workunits/rest/test.py +++ b/qa/workunits/rest/test.py @@ -359,6 +359,10 @@ if __name__ == '__main__': r = expect('osd/dump', 'GET', 200, 'json', JSONHDR) assert(float(r.myjson['output']['full_ratio']) == 0.90) expect('osd/set-full-ratio?ratio=0.95', 'PUT', 200, '') + expect('osd/set-backfillfull-ratio?ratio=0.88', 'PUT', 200, '') + r = expect('osd/dump', 'GET', 200, 'json', JSONHDR) + assert(float(r.myjson['output']['backfillfull_ratio']) == 0.88) + expect('osd/set-backfillfull-ratio?ratio=0.90', 'PUT', 200, '') expect('osd/set-nearfull-ratio?ratio=0.90', 'PUT', 200, '') r = expect('osd/dump', 'GET', 200, 'json', JSONHDR) assert(float(r.myjson['output']['nearfull_ratio']) == 0.90) diff --git a/src/common/ceph_strings.cc b/src/common/ceph_strings.cc index 462dd6db249c..1fec2f7b0a1e 100644 --- a/src/common/ceph_strings.cc +++ b/src/common/ceph_strings.cc @@ -42,6 +42,8 @@ const char *ceph_osd_state_name(int s) return "full"; case CEPH_OSD_NEARFULL: return "nearfull"; + case CEPH_OSD_BACKFILLFULL: + return "backfillfull"; default: return "???"; } diff --git a/src/common/config_opts.h b/src/common/config_opts.h index a3e74b84860c..9eed4d485e21 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -308,6 +308,7 @@ OPTION(mon_pg_warn_min_pool_objects, OPT_INT, 1000) // do not warn on pools bel OPTION(mon_pg_check_down_all_threshold, OPT_FLOAT, .5) // threshold of down osds after which we check all pgs OPTION(mon_cache_target_full_warn_ratio, OPT_FLOAT, .66) // position between pool cache_target_full and max where we start warning OPTION(mon_osd_full_ratio, OPT_FLOAT, .95) // what % full makes an OSD "full" +OPTION(mon_osd_backfillfull_ratio, OPT_FLOAT, .90) // what % full makes an OSD backfill full (backfill halted) OPTION(mon_osd_nearfull_ratio, OPT_FLOAT, .85) // what % full makes an OSD near full OPTION(mon_allow_pool_delete, OPT_BOOL, false) // allow pool deletion OPTION(mon_globalid_prealloc, OPT_U32, 10000) // how many globalids to prealloc @@ -626,9 +627,6 @@ OPTION(osd_max_backfills, OPT_U64, 1) // Minimum recovery priority (255 = max, smaller = lower) OPTION(osd_min_recovery_priority, OPT_INT, 0) -// Refuse backfills when OSD full ratio is above this value -OPTION(osd_backfill_full_ratio, OPT_FLOAT, 0.90) - // Seconds to wait before retrying refused backfills OPTION(osd_backfill_retry_interval, OPT_DOUBLE, 30.0) diff --git a/src/include/rados.h b/src/include/rados.h index c8bc8ac4c0c5..986b42969c4e 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -116,6 +116,7 @@ struct ceph_eversion { #define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */ #define CEPH_OSD_FULL (1<<4) /* osd is at or above full threshold */ #define CEPH_OSD_NEARFULL (1<<5) /* osd is at or above nearfull threshold */ +#define CEPH_OSD_BACKFILLFULL (1<<6) /* osd is at or above backfillfull threshold */ extern const char *ceph_osd_state_name(int s); diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 4e816890820b..d1b09e66024c 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -592,6 +592,10 @@ COMMAND("osd set-full-ratio " \ "name=ratio,type=CephFloat,range=0.0|1.0", \ "set usage ratio at which OSDs are marked full", "osd", "rw", "cli,rest") +COMMAND("osd set-backfillfull-ratio " \ + "name=ratio,type=CephFloat,range=0.0|1.0", \ + "set usage ratio at which OSDs are marked too full to backfill", + "osd", "rw", "cli,rest") COMMAND("osd set-nearfull-ratio " \ "name=ratio,type=CephFloat,range=0.0|1.0", \ "set usage ratio at which OSDs are marked near-full", diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 7daca9c887d9..3566c3f73308 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -164,6 +164,7 @@ void OSDMonitor::create_initial() if (!g_conf->mon_debug_no_require_luminous) { newmap.set_flag(CEPH_OSDMAP_REQUIRE_LUMINOUS); newmap.full_ratio = g_conf->mon_osd_full_ratio; + newmap.backfillfull_ratio = g_conf->mon_osd_backfillfull_ratio; newmap.nearfull_ratio = g_conf->mon_osd_nearfull_ratio; } @@ -784,8 +785,15 @@ void OSDMonitor::create_pending() OSDMap::clean_temps(g_ceph_context, osdmap, &pending_inc); dout(10) << "create_pending did clean_temps" << dendl; + // On upgrade OSDMap has new field set by mon_osd_backfillfull_ratio config + // instead of osd_backfill_full_ratio config + if (osdmap.backfillfull_ratio <= 0) { + dout(1) << __func__ << " setting backfillfull_ratio = " + << g_conf->mon_osd_backfillfull_ratio << dendl; + pending_inc.new_backfillfull_ratio = g_conf->mon_osd_backfillfull_ratio; + } if (!osdmap.test_flag(CEPH_OSDMAP_REQUIRE_LUMINOUS)) { - // transition nearfull ratios from PGMap to OSDMap (on upgrade) + // transition full ratios from PGMap to OSDMap (on upgrade) PGMap *pg_map = &mon->pgmon()->pg_map; if (osdmap.full_ratio != pg_map->full_ratio) { dout(10) << __func__ << " full_ratio " << osdmap.full_ratio @@ -1048,8 +1056,8 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t) tmp.apply_incremental(pending_inc); if (tmp.test_flag(CEPH_OSDMAP_REQUIRE_LUMINOUS)) { - int full, nearfull; - tmp.count_full_nearfull_osds(&full, &nearfull); + int full, backfill, nearfull; + tmp.count_full_nearfull_osds(&full, &backfill, &nearfull); if (full > 0) { if (!tmp.test_flag(CEPH_OSDMAP_FULL)) { dout(10) << __func__ << " setting full flag" << dendl; @@ -2287,7 +2295,7 @@ bool OSDMonitor::preprocess_full(MonOpRequestRef op) MOSDFull *m = static_cast(op->get_req()); int from = m->get_orig_source().num(); set state; - unsigned mask = CEPH_OSD_NEARFULL | CEPH_OSD_FULL; + unsigned mask = CEPH_OSD_NEARFULL | CEPH_OSD_BACKFILLFULL | CEPH_OSD_FULL; // check permissions, ignore if failed MonSession *session = m->get_session(); @@ -2337,7 +2345,7 @@ bool OSDMonitor::prepare_full(MonOpRequestRef op) const MOSDFull *m = static_cast(op->get_req()); const int from = m->get_orig_source().num(); - const unsigned mask = CEPH_OSD_NEARFULL | CEPH_OSD_FULL; + const unsigned mask = CEPH_OSD_NEARFULL | CEPH_OSD_BACKFILLFULL | CEPH_OSD_FULL; const unsigned want_state = m->state & mask; // safety first unsigned cur_state = osdmap.get_state(from); @@ -3342,13 +3350,18 @@ void OSDMonitor::get_health(list >& summary, } if (osdmap.test_flag(CEPH_OSDMAP_REQUIRE_LUMINOUS)) { - int full, nearfull; - osdmap.count_full_nearfull_osds(&full, &nearfull); + int full, backfill, nearfull; + osdmap.count_full_nearfull_osds(&full, &backfill, &nearfull); if (full > 0) { ostringstream ss; ss << full << " full osd(s)"; summary.push_back(make_pair(HEALTH_ERR, ss.str())); } + if (backfill > 0) { + ostringstream ss; + ss << backfill << " backfillfull osd(s)"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + } if (nearfull > 0) { ostringstream ss; ss << nearfull << " nearfull osd(s)"; @@ -6929,6 +6942,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, return true; } else if (prefix == "osd set-full-ratio" || + prefix == "osd set-backfillfull-ratio" || prefix == "osd set-nearfull-ratio") { if (!osdmap.test_flag(CEPH_OSDMAP_REQUIRE_LUMINOUS)) { ss << "you must complete the upgrade and set require_luminous_osds before" @@ -6945,6 +6959,8 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, } if (prefix == "osd set-full-ratio") pending_inc.new_full_ratio = n; + else if (prefix == "osd set-backfillfull-ratio") + pending_inc.new_backfillfull_ratio = n; else if (prefix == "osd set-nearfull-ratio") pending_inc.new_nearfull_ratio = n; ss << prefix << " " << n; diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 8799b7933a3d..1368c9d654e9 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -736,20 +736,24 @@ void OSDService::check_full_status(const osd_stat_t &osd_stat) return; } float nearfull_ratio = osdmap->get_nearfull_ratio(); - float full_ratio = std::max(osdmap->get_full_ratio(), nearfull_ratio); + float backfillfull_ratio = std::max(osdmap->get_backfillfull_ratio(), nearfull_ratio); + float full_ratio = std::max(osdmap->get_full_ratio(), backfillfull_ratio); float failsafe_ratio = std::max(get_failsafe_full_ratio(), full_ratio); if (!osdmap->test_flag(CEPH_OSDMAP_REQUIRE_LUMINOUS)) { // use the failsafe for nearfull and full; the mon isn't using the // flags anyway because we're mid-upgrade. full_ratio = failsafe_ratio; + backfillfull_ratio = failsafe_ratio; nearfull_ratio = failsafe_ratio; } else if (full_ratio <= 0 || + backfillfull_ratio <= 0 || nearfull_ratio <= 0) { - derr << __func__ << " full_ratio or nearfull_ratio is <= 0" << dendl; + derr << __func__ << " full_ratio, backfillfull_ratio or nearfull_ratio is <= 0" << dendl; // use failsafe flag. ick. the monitor did something wrong or the user // did something stupid. full_ratio = failsafe_ratio; + backfillfull_ratio = failsafe_ratio; nearfull_ratio = failsafe_ratio; } @@ -759,6 +763,8 @@ void OSDService::check_full_status(const osd_stat_t &osd_stat) new_state = FAILSAFE; } else if (ratio > full_ratio || injectfull) { new_state = FULL; + } else if (ratio > backfillfull_ratio) { + new_state = BACKFILLFULL; } else if (ratio > nearfull_ratio) { new_state = NEARFULL; } else { @@ -766,6 +772,7 @@ void OSDService::check_full_status(const osd_stat_t &osd_stat) } dout(20) << __func__ << " cur ratio " << ratio << ". nearfull_ratio " << nearfull_ratio + << ". backfillfull_ratio " << backfillfull_ratio << ", full_ratio " << full_ratio << ", failsafe_ratio " << failsafe_ratio << ", new state " << get_full_state_name(new_state) @@ -793,6 +800,8 @@ bool OSDService::need_fullness_update() if (osdmap->exists(whoami)) { if (osdmap->get_state(whoami) & CEPH_OSD_FULL) { cur = FULL; + } else if (osdmap->get_state(whoami) & CEPH_OSD_BACKFILLFULL) { + cur = BACKFILLFULL; } else if (osdmap->get_state(whoami) & CEPH_OSD_NEARFULL) { cur = NEARFULL; } @@ -800,6 +809,8 @@ bool OSDService::need_fullness_update() s_names want = NONE; if (is_full()) want = FULL; + else if (is_backfillfull()) + want = BACKFILLFULL; else if (is_nearfull()) want = NEARFULL; return want != cur; @@ -818,15 +829,19 @@ bool OSDService::check_failsafe_full() return true; } - if (cur_state == FAILSAFE) - return true; - return false; + return cur_state == FAILSAFE; } bool OSDService::is_nearfull() { Mutex::Locker l(full_status_lock); - return cur_state == NEARFULL; + return cur_state >= NEARFULL; +} + +bool OSDService::is_backfillfull() +{ + Mutex::Locker l(full_status_lock); + return cur_state >= BACKFILLFULL; } bool OSDService::is_full() @@ -838,18 +853,11 @@ bool OSDService::is_full() bool OSDService::too_full_for_backfill(ostream &ss) { Mutex::Locker l(full_status_lock); - if (injectfull) { - // injectfull is either a count of the number of times to return full - // or if -1 then always return full - if (injectfull > 0) - --injectfull; - ss << "Injected full OSD (" << (injectfull < 0 ? string("set") : std::to_string(injectfull)) << ")"; + if (cur_state >= BACKFILLFULL) { + ss << "current usage is " << cur_ratio << ", which is greater than max allowed ratio"; return true; } - double max_ratio; - max_ratio = cct->_conf->osd_backfill_full_ratio; - ss << "current usage is " << cur_ratio << ", which is greater than max allowed ratio " << max_ratio; - return cur_ratio >= max_ratio; + return false; } void OSDService::update_osd_stat(vector& hb_peers) @@ -5213,6 +5221,8 @@ void OSD::send_full_update() unsigned state = 0; if (service.is_full()) { state = CEPH_OSD_FULL; + } else if (service.is_backfillfull()) { + state = CEPH_OSD_BACKFILLFULL; } else if (service.is_nearfull()) { state = CEPH_OSD_NEARFULL; } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 29bacb608795..19a1ac7a2df1 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1139,11 +1139,12 @@ public: // -- OSD Full Status -- private: Mutex full_status_lock; - enum s_names { NONE, NEARFULL, FULL, FAILSAFE } cur_state; // ascending + enum s_names { NONE, NEARFULL, BACKFILLFULL, FULL, FAILSAFE } cur_state; // ascending const char *get_full_state_name(s_names s) { switch (s) { case NONE: return "none"; case NEARFULL: return "nearfull"; + case BACKFILLFULL: return "backfillfull"; case FULL: return "full"; case FAILSAFE: return "failsafe"; default: return "???"; @@ -1155,6 +1156,7 @@ private: public: bool check_failsafe_full(); bool is_nearfull(); + bool is_backfillfull(); bool is_full(); bool too_full_for_backfill(ostream &ss); bool need_fullness_update(); ///< osdmap state needs update diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index c4e388f86cfb..6d0cbfe0a283 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -450,7 +450,7 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const } { - uint8_t target_v = 3; + uint8_t target_v = 4; if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { target_v = 2; } @@ -470,6 +470,7 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const if (target_v >= 3) { ::encode(new_nearfull_ratio, bl); ::encode(new_full_ratio, bl); + ::encode(new_backfillfull_ratio, bl); } ENCODE_FINISH(bl); // osd-only data } @@ -654,7 +655,7 @@ void OSDMap::Incremental::decode(bufferlist::iterator& bl) } { - DECODE_START(3, bl); // extended, osd-only data + DECODE_START(4, bl); // extended, osd-only data ::decode(new_hb_back_up, bl); ::decode(new_up_thru, bl); ::decode(new_last_clean_interval, bl); @@ -677,6 +678,11 @@ void OSDMap::Incremental::decode(bufferlist::iterator& bl) new_nearfull_ratio = -1; new_full_ratio = -1; } + if (struct_v >= 4) { + ::decode(new_backfillfull_ratio, bl); + } else { + new_backfillfull_ratio = -1; + } DECODE_FINISH(bl); // osd-only data } @@ -720,6 +726,7 @@ void OSDMap::Incremental::dump(Formatter *f) const f->dump_int("new_flags", new_flags); f->dump_float("new_full_ratio", new_full_ratio); f->dump_float("new_nearfull_ratio", new_nearfull_ratio); + f->dump_float("new_backfillfull_ratio", new_backfillfull_ratio); if (fullmap.length()) { f->open_object_section("full_map"); @@ -1022,14 +1029,17 @@ int OSDMap::calc_num_osds() return num_osd; } -void OSDMap::count_full_nearfull_osds(int *full, int *nearfull) const +void OSDMap::count_full_nearfull_osds(int *full, int *backfill, int *nearfull) const { *full = 0; + *backfill = 0; *nearfull = 0; for (int i = 0; i < max_osd; ++i) { if (exists(i) && is_up(i) && is_in(i)) { if (osd_state[i] & CEPH_OSD_FULL) ++(*full); + else if (osd_state[i] & CEPH_OSD_BACKFILLFULL) + ++(*backfill); else if (osd_state[i] & CEPH_OSD_NEARFULL) ++(*nearfull); } @@ -1575,6 +1585,9 @@ int OSDMap::apply_incremental(const Incremental &inc) if (inc.new_nearfull_ratio >= 0) { nearfull_ratio = inc.new_nearfull_ratio; } + if (inc.new_backfillfull_ratio >= 0) { + backfillfull_ratio = inc.new_backfillfull_ratio; + } if (inc.new_full_ratio >= 0) { full_ratio = inc.new_full_ratio; } @@ -2148,7 +2161,7 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const } { - uint8_t target_v = 2; + uint8_t target_v = 3; if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { target_v = 1; } @@ -2173,6 +2186,7 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const if (target_v >= 2) { ::encode(nearfull_ratio, bl); ::encode(full_ratio, bl); + ::encode(backfillfull_ratio, bl); } ENCODE_FINISH(bl); // osd-only data } @@ -2390,7 +2404,7 @@ void OSDMap::decode(bufferlist::iterator& bl) } { - DECODE_START(2, bl); // extended, osd-only data + DECODE_START(3, bl); // extended, osd-only data ::decode(osd_addrs->hb_back_addr, bl); ::decode(osd_info, bl); ::decode(blacklist, bl); @@ -2407,6 +2421,11 @@ void OSDMap::decode(bufferlist::iterator& bl) nearfull_ratio = 0; full_ratio = 0; } + if (struct_v >= 3) { + ::decode(backfillfull_ratio, bl); + } else { + backfillfull_ratio = 0; + } DECODE_FINISH(bl); // osd-only data } @@ -2480,6 +2499,7 @@ void OSDMap::dump(Formatter *f) const f->dump_stream("modified") << get_modified(); f->dump_string("flags", get_flag_string()); f->dump_float("full_ratio", full_ratio); + f->dump_float("backfillfull_ratio", backfillfull_ratio); f->dump_float("nearfull_ratio", nearfull_ratio); f->dump_string("cluster_snapshot", get_cluster_snapshot()); f->dump_int("pool_max", get_pool_max()); @@ -2701,6 +2721,7 @@ void OSDMap::print(ostream& out) const out << "flags " << get_flag_string() << "\n"; out << "full_ratio " << full_ratio << "\n"; + out << "backfillfull_ratio " << backfillfull_ratio << "\n"; out << "nearfull_ratio " << nearfull_ratio << "\n"; if (get_cluster_snapshot().length()) out << "cluster_snapshot " << get_cluster_snapshot() << "\n"; diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index eb0399edda6e..2e8fcf800d98 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -155,6 +155,7 @@ public: string cluster_snapshot; float new_nearfull_ratio = -1; + float new_backfillfull_ratio = -1; float new_full_ratio = -1; mutable bool have_crc; ///< crc values are defined @@ -254,7 +255,7 @@ private: string cluster_snapshot; bool new_blacklist_entries; - float full_ratio = 0, nearfull_ratio = 0; + float full_ratio = 0, backfillfull_ratio = 0, nearfull_ratio = 0; mutable uint64_t cached_up_osd_features; @@ -336,10 +337,13 @@ public: float get_full_ratio() const { return full_ratio; } + float get_backfillfull_ratio() const { + return backfillfull_ratio; + } float get_nearfull_ratio() const { return nearfull_ratio; } - void count_full_nearfull_osds(int *full, int *nearfull) const; + void count_full_nearfull_osds(int *full, int *backfill, int *nearfull) const; /***** cluster state *****/ /* osds */ diff --git a/src/test/cli/osdmaptool/clobber.t b/src/test/cli/osdmaptool/clobber.t index 275fefcc737d..dd7e1756104e 100644 --- a/src/test/cli/osdmaptool/clobber.t +++ b/src/test/cli/osdmaptool/clobber.t @@ -20,6 +20,7 @@ modified \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re) flags full_ratio 0 + backfillfull_ratio 0 nearfull_ratio 0 pool 0 'rbd' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 flags hashpspool stripe_width 0 @@ -43,6 +44,7 @@ modified \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re) flags full_ratio 0 + backfillfull_ratio 0 nearfull_ratio 0 pool 0 'rbd' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 0 flags hashpspool stripe_width 0 diff --git a/src/test/cli/osdmaptool/create-print.t b/src/test/cli/osdmaptool/create-print.t index e619f7206e92..32468a4a6fab 100644 --- a/src/test/cli/osdmaptool/create-print.t +++ b/src/test/cli/osdmaptool/create-print.t @@ -77,6 +77,7 @@ modified \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re) flags full_ratio 0 + backfillfull_ratio 0 nearfull_ratio 0 pool 0 'rbd' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 192 pgp_num 192 last_change 0 flags hashpspool stripe_width 0 diff --git a/src/test/cli/osdmaptool/create-racks.t b/src/test/cli/osdmaptool/create-racks.t index 19006986f681..0759698127d9 100644 --- a/src/test/cli/osdmaptool/create-racks.t +++ b/src/test/cli/osdmaptool/create-racks.t @@ -790,6 +790,7 @@ modified \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+ (re) flags full_ratio 0 + backfillfull_ratio 0 nearfull_ratio 0 pool 0 'rbd' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 15296 pgp_num 15296 last_change 0 flags hashpspool stripe_width 0 diff --git a/src/test/pybind/test_ceph_argparse.py b/src/test/pybind/test_ceph_argparse.py index e9694064bd20..0c9cc7524c57 100755 --- a/src/test/pybind/test_ceph_argparse.py +++ b/src/test/pybind/test_ceph_argparse.py @@ -1150,6 +1150,9 @@ class TestOSD(TestArgparse): def test_set_full_ratio(self): self.set_ratio('set-full-ratio') + def test_set_backfillfull_ratio(self): + self.set_ratio('set-backfillfull-ratio') + def test_set_nearfull_ratio(self): self.set_ratio('set-nearfull-ratio') diff --git a/src/tools/ceph_monstore_tool.cc b/src/tools/ceph_monstore_tool.cc index 874a4f0583fd..8c941443d818 100644 --- a/src/tools/ceph_monstore_tool.cc +++ b/src/tools/ceph_monstore_tool.cc @@ -654,6 +654,14 @@ static int update_pgmap_meta(MonitorDBStore& st) ::encode(full_ratio, bl); t->put(prefix, "full_ratio", bl); } + { + auto backfillfull_ratio = g_ceph_context->_conf->mon_osd_backfillfull_ratio; + if (backfillfull_ratio > 1.0) + backfillfull_ratio /= 100.0; + bufferlist bl; + ::encode(backfillfull_ratio, bl); + t->put(prefix, "backfillfull_ratio", bl); + } { auto nearfull_ratio = g_ceph_context->_conf->mon_osd_nearfull_ratio; if (nearfull_ratio > 1.0) -- 2.47.3