From: Sage Weil Date: Thu, 9 Aug 2018 13:33:42 +0000 (-0500) Subject: osd: vary tick interval +/- 5% to avoid scrub livelocks X-Git-Tag: v14.0.1~595^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F23512%2Fhead;p=ceph.git osd: vary tick interval +/- 5% to avoid scrub livelocks If you have two pgs that need to scrub on two OSDs, each the primary for one pg and the replica for the other, you can end up in a livelock: - both osds locally reserve a scrub slot - both osds send a scrub schedule request - both scrub requests are rejected - both osds wait exactly 1 second - repeat Seems a bit unlikely, but I've seen test cases where it goes on more an hour. Fixes: http://tracker.ceph.com/issues/26890 Signed-off-by: Sage Weil --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index db51ad153a32..6831fc702711 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -36,6 +36,7 @@ #include "include/types.h" #include "include/compat.h" +#include "include/random.h" #include "OSD.h" #include "OSDMap.h" @@ -169,8 +170,6 @@ #define dout_prefix _prefix(_dout, whoami, get_osdmap_epoch()) -const double OSD::OSD_TICK_INTERVAL = 1.0; - static ostream& _prefix(std::ostream* _dout, int whoami, epoch_t epoch) { return *_dout << "osd." << whoami << " " << epoch << " "; } @@ -603,8 +602,8 @@ void OSDService::promote_throttle_recalibrate() promote_probability_millis = prob; // set hard limits for this interval to mitigate stampedes - promote_max_objects = target_obj_sec * OSD::OSD_TICK_INTERVAL * 2; - promote_max_bytes = target_bytes_sec * OSD::OSD_TICK_INTERVAL * 2; + promote_max_objects = target_obj_sec * osd->OSD_TICK_INTERVAL * 2; + promote_max_bytes = target_bytes_sec * osd->OSD_TICK_INTERVAL * 2; } // ------------------------------------- @@ -1978,6 +1977,14 @@ OSD::~OSD() delete store; } +double OSD::get_tick_interval() const +{ + // vary +/- 5% to avoid scrub scheduling livelocks + constexpr auto delta = 0.05; + return (OSD_TICK_INTERVAL * + ceph::util::generate_random_number(1.0 - delta, 1.0 + delta)); +} + void cls_initialize(ClassHandler *ch); void OSD::handle_signal(int signum) @@ -2606,11 +2613,11 @@ int OSD::init() heartbeat_thread.create("osd_srv_heartbt"); // tick - tick_timer.add_event_after(OSD_TICK_INTERVAL, + tick_timer.add_event_after(get_tick_interval(), new C_Tick(this)); { Mutex::Locker l(tick_timer_lock); - tick_timer_without_osd_lock.add_event_after(OSD_TICK_INTERVAL, + tick_timer_without_osd_lock.add_event_after(get_tick_interval(), new C_Tick_WithoutOSDLock(this)); } @@ -4806,7 +4813,7 @@ void OSD::tick() do_waiters(); - tick_timer.add_event_after(OSD_TICK_INTERVAL, new C_Tick(this)); + tick_timer.add_event_after(get_tick_interval(), new C_Tick(this)); } void OSD::tick_without_osd_lock() @@ -4877,7 +4884,7 @@ void OSD::tick_without_osd_lock() mgrc.update_daemon_health(get_health_metrics()); service.kick_recovery_queue(); - tick_timer_without_osd_lock.add_event_after(OSD_TICK_INTERVAL, + tick_timer_without_osd_lock.add_event_after(get_tick_interval(), new C_Tick_WithoutOSDLock(this)); } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index e435b99000a7..18739f52dfa8 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1237,7 +1237,8 @@ public: protected: - static const double OSD_TICK_INTERVAL; // tick interval for tick_timer and tick_timer_without_osd_lock + const double OSD_TICK_INTERVAL = { 1.0 }; + double get_tick_interval() const; AuthAuthorizeHandlerRegistry *authorize_handler_cluster_registry; AuthAuthorizeHandlerRegistry *authorize_handler_service_registry;