From 4cee819661f73f5449edcbf52edcd1e453e035ea Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Thu, 5 Jul 2018 14:39:31 +0200 Subject: [PATCH] common: fix races when visiting a TrackedOp. Before the commit following race can happen: ``` A : OpTracker::visit_ops_in_flight(..., callable leaking TrackedOpRef outside) A : Mutex::Locker::Locker(sdata->ops_in_flight_lock_sharded) A with lock : (nref > 0) == true B : TrackedOp::put(), nref := 0 // updating the counter is done without the lock B : OpTracker::unregister_inflight_op() B : Mutex::Locker::Locker(sdata->ops_in_flight_lock_sharded) A with lock : visit() -> TrackedOp::get(), nref := 1 A with lock : Mutex::Locker::~Locker() B with lock : boost::intrusive::list::iterator_to(op) B with lock : boost::intrusive::list::erase(iter) B with lock : Mutex::Locker::~Locker() A : TrackedOp::put(), nref := 0 A : OpTracker::unregister_inflight_op() A : Mutex::Locker::Locker(sdata->ops_in_flight_lock_sharded) A with lock : boost::intrusive::list::iterator_to(op) // oops as op doesn't belong to the list anymore ``` Fixes: https://tracker.ceph.com/issues/24664 Related-To: https://tracker.ceph.com/issues/24037 Signed-off-by: Radoslaw Zarzynski (cherry picked from commit 871cbf0f6114c301acc749ae184dbbac32baacf2) --- src/common/TrackedOp.cc | 18 +++++++----------- src/common/TrackedOp.h | 15 ++++++++++++++- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/src/common/TrackedOp.cc b/src/common/TrackedOp.cc index 8d4e986190aa2..93df29736e182 100644 --- a/src/common/TrackedOp.cc +++ b/src/common/TrackedOp.cc @@ -265,7 +265,7 @@ bool OpTracker::register_inflight_op(TrackedOp *i) return true; } -void OpTracker::unregister_inflight_op(TrackedOp *i) +void OpTracker::unregister_inflight_op(TrackedOp* const i) { // caller checks; assert(i->state); @@ -278,16 +278,12 @@ void OpTracker::unregister_inflight_op(TrackedOp *i) auto p = sdata->ops_in_flight_sharded.iterator_to(*i); sdata->ops_in_flight_sharded.erase(p); } - i->_unregistered(); +} - if (!tracking_enabled) - delete i; - else { - RWLock::RLocker l(lock); - i->state = TrackedOp::STATE_HISTORY; - utime_t now = ceph_clock_now(); - history.insert(now, TrackedOpRef(i)); - } +void OpTracker::record_history_op(TrackedOpRef&& i) +{ + RWLock::RLocker l(lock); + history.insert(ceph_clock_now(), std::move(i)); } bool OpTracker::visit_ops_in_flight(utime_t* oldest_secs, @@ -328,7 +324,7 @@ bool OpTracker::visit_ops_in_flight(utime_t* oldest_secs, assert(NULL != sdata); Mutex::Locker locker(sdata->ops_in_flight_lock_sharded); for (auto& op : sdata->ops_in_flight_sharded) { - if (op.nref > 0 && !visit(op)) + if (!visit(op)) break; } } diff --git a/src/common/TrackedOp.h b/src/common/TrackedOp.h index 2e3cc09a22c2f..a0790e48d9853 100644 --- a/src/common/TrackedOp.h +++ b/src/common/TrackedOp.h @@ -134,6 +134,7 @@ public: bool dump_historic_slow_ops(Formatter *f, set filters = {""}); bool register_inflight_op(TrackedOp *i); void unregister_inflight_op(TrackedOp *i); + void record_history_op(TrackedOpRef&& i); void get_age_ms_histogram(pow2_hist_t *h); @@ -303,7 +304,9 @@ public: ++nref; } void put() { - if (--nref == 0) { + again: + auto nref_snap = nref.load(); + if (nref_snap == 1) { switch (state.load()) { case STATE_UNTRACKED: _unregistered(); @@ -313,6 +316,14 @@ public: case STATE_LIVE: mark_event("done"); tracker->unregister_inflight_op(this); + _unregistered(); + if (!tracker->is_tracking()) { + delete this; + } else { + state = TrackedOp::STATE_HISTORY; + tracker->record_history_op( + TrackedOpRef(this, /* add_ref = */ false)); + } break; case STATE_HISTORY: @@ -322,6 +333,8 @@ public: default: ceph_abort(); } + } else if (!nref.compare_exchange_weak(nref_snap, nref_snap - 1)) { + goto again; } } -- 2.39.5