From 6ceeab6e204286148a69eb147fbc4045beddef49 Mon Sep 17 00:00:00 2001 From: Neha Ojha Date: Mon, 17 Dec 2018 16:20:10 -0800 Subject: [PATCH] osd/mon: fix upgrades for pg log hard limit Signed-off-by: Neha Ojha --- src/include/rados.h | 1 + src/mon/MonCommands.h | 2 +- src/mon/OSDMonitor.cc | 23 +++++++++++++++++++++++ src/osd/OSDMap.cc | 11 +++++++++++ src/osd/PG.h | 4 ++++ src/osd/PrimaryLogPG.cc | 15 ++++++++++++--- 6 files changed, 52 insertions(+), 4 deletions(-) diff --git a/src/include/rados.h b/src/include/rados.h index fdc3c6b7c40..d5d0e4c1b12 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -159,6 +159,7 @@ extern const char *ceph_osd_state_name(int s); #define CEPH_OSDMAP_RECOVERY_DELETES (1<<19) /* deletes performed during recovery instead of peering */ #define CEPH_OSDMAP_PURGED_SNAPDIRS (1<<20) /* osds have converted snapsets */ #define CEPH_OSDMAP_NOSNAPTRIM (1<<21) /* disable snap trimming */ +#define CEPH_OSDMAP_PGLOG_HARDLIMIT (1<<22) /* put a hard limit on pg log length */ /* these are hidden in 'ceph status' view */ #define CEPH_OSDMAP_SEMIHIDDEN_FLAGS (CEPH_OSDMAP_REQUIRE_JEWEL| \ diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 7e10e2628f3..e533631b68a 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -755,7 +755,7 @@ COMMAND("osd erasure-code-profile ls", \ "list all erasure code profiles", \ "osd", "r") COMMAND("osd set " \ - "name=key,type=CephChoices,strings=full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent|nosnaptrim|sortbitwise|recovery_deletes|require_jewel_osds|require_kraken_osds " \ + "name=key,type=CephChoices,strings=full|pause|noup|nodown|noout|noin|nobackfill|norebalance|norecover|noscrub|nodeep-scrub|notieragent|nosnaptrim|sortbitwise|recovery_deletes|require_jewel_osds|require_kraken_osds|pglog_hardlimit " \ "name=yes_i_really_mean_it,type=CephBool,req=false", \ "set ", "osd", "rw") COMMAND("osd unset " \ diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 058cb8da33b..f3a3b170c5d 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -2831,6 +2831,14 @@ bool OSDMonitor::preprocess_boot(MonOpRequestRef op) goto ignore; } + if (osdmap.test_flag(CEPH_OSDMAP_PGLOG_HARDLIMIT) && + !(m->osd_features & CEPH_FEATURE_OSD_PGLOG_HARDLIMIT)) { + mon->clog->info() << "disallowing boot of OSD " + << m->get_orig_source_inst() + << " because 'pglog_hardlimit' osdmap flag is set and OSD lacks the OSD_PGLOG_HARDLIMIT feature"; + goto ignore; + } + // already booted? if (osdmap.is_up(from) && osdmap.get_addrs(from) == m->get_orig_source_addrs() && @@ -10077,6 +10085,21 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, err = -EPERM; goto reply; } + } else if (key == "pglog_hardlimit") { + if (!osdmap.get_num_up_osds() && !sure) { + ss << "Not advisable to continue since no OSDs are up. Pass " + << "--yes-i-really-mean-it if you really wish to continue."; + err = -EPERM; + goto reply; + } + if (HAVE_FEATURE(osdmap.get_up_osd_features(), OSD_PGLOG_HARDLIMIT) + || sure) { + return prepare_set_flag(op, CEPH_OSDMAP_PGLOG_HARDLIMIT); + } else { + ss << "not all up OSDs have OSD_PGLOG_HARDLIMIT feature"; + err = -EPERM; + goto reply; + } } else if (key == "require_jewel_osds") { if (!osdmap.get_num_up_osds() && !sure) { ss << "Not advisable to continue since no OSDs are up. Pass " diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index d4dbe21cefb..f11cb0518ad 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -2118,6 +2118,12 @@ int OSDMap::apply_incremental(const Incremental &inc) } } + if (inc.new_require_osd_release >= 0) { + require_osd_release = inc.new_require_osd_release; + if (require_osd_release >= CEPH_RELEASE_NAUTILUS) { + flags |= CEPH_OSDMAP_PGLOG_HARDLIMIT; + } + } // do new crush map last (after up/down stuff) if (inc.crush.length()) { bufferlist bl(inc.crush); @@ -3092,6 +3098,9 @@ void OSDMap::decode(bufferlist::const_iterator& bl) if (struct_v >= 5) { decode(require_min_compat_client, bl); decode(require_osd_release, bl); + if (require_osd_release >= CEPH_RELEASE_NAUTILUS) { + flags |= CEPH_OSDMAP_PGLOG_HARDLIMIT; + } if (require_osd_release >= CEPH_RELEASE_LUMINOUS) { flags &= ~(CEPH_OSDMAP_LEGACY_REQUIRE_FLAGS); flags |= CEPH_OSDMAP_RECOVERY_DELETES; @@ -3419,6 +3428,8 @@ string OSDMap::get_flag_string(unsigned f) s += ",recovery_deletes"; if (f & CEPH_OSDMAP_PURGED_SNAPDIRS) s += ",purged_snapdirs"; + if (f & CEPH_OSDMAP_PGLOG_HARDLIMIT) + s += ",pglog_hardlimit"; if (s.length()) s.erase(0, 1); return s; diff --git a/src/osd/PG.h b/src/osd/PG.h index 50fef7a6e98..e53b75e0a6d 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -2773,6 +2773,10 @@ protected: return !(get_osdmap()->test_flag(CEPH_OSDMAP_RECOVERY_DELETES)); } + bool hard_limit_pglog() const { + return (get_osdmap()->test_flag(CEPH_OSDMAP_PGLOG_HARDLIMIT)); + } + void init_primary_up_acting( const vector &newup, const vector &newacting, diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 6633e3dbc11..dff549535d3 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -4021,7 +4021,10 @@ void PrimaryLogPG::execute_ctx(OpContext *ctx) ceph_assert(op->may_write() || op->may_cache()); // trim log? - calc_trim_to(); + if (hard_limit_pglog()) + calc_trim_to_aggressive(); + else + calc_trim_to(); // verify that we are doing this in order? if (cct->_conf->osd_debug_op_order && m->get_source().is_client() && @@ -10625,7 +10628,10 @@ void PrimaryLogPG::simple_opc_submit(OpContextUPtr ctx) dout(20) << __func__ << " " << repop << dendl; issue_repop(repop, ctx.get()); eval_repop(repop); - calc_trim_to(); + if (hard_limit_pglog()) + calc_trim_to_aggressive(); + else + calc_trim_to(); repop->put(); } @@ -10739,7 +10745,10 @@ void PrimaryLogPG::submit_log_entries( op_applied(info.last_update); }); - calc_trim_to(); + if (hard_limit_pglog()) + calc_trim_to_aggressive(); + else + calc_trim_to(); } void PrimaryLogPG::cancel_log_updates() -- 2.39.5