Before the commit following race can happen:
```
A : OpTracker::visit_ops_in_flight(..., callable leaking TrackedOpRef outside)
A : Mutex::Locker::Locker(sdata->ops_in_flight_lock_sharded)
A with lock : (nref > 0) == true
B : TrackedOp::put(), nref := 0 // updating the counter is done without the lock
B : OpTracker::unregister_inflight_op()
B : Mutex::Locker::Locker(sdata->ops_in_flight_lock_sharded)
A with lock : visit() -> TrackedOp::get(), nref := 1
A with lock : Mutex::Locker::~Locker()
B with lock : boost::intrusive::list::iterator_to(op)
B with lock : boost::intrusive::list::erase(iter)
B with lock : Mutex::Locker::~Locker()
A : TrackedOp::put(), nref := 0
A : OpTracker::unregister_inflight_op()
A : Mutex::Locker::Locker(sdata->ops_in_flight_lock_sharded)
A with lock : boost::intrusive::list::iterator_to(op) // oops as op doesn't belong to the list anymore
```
Fixes: https://tracker.ceph.com/issues/24664
Related-To: https://tracker.ceph.com/issues/24037
Signed-off-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
(cherry picked from commit
871cbf0f6114c301acc749ae184dbbac32baacf2)
return true;
}
-void OpTracker::unregister_inflight_op(TrackedOp *i)
+void OpTracker::unregister_inflight_op(TrackedOp* const i)
{
// caller checks;
assert(i->state);
auto p = sdata->ops_in_flight_sharded.iterator_to(*i);
sdata->ops_in_flight_sharded.erase(p);
}
- i->_unregistered();
+}
- if (!tracking_enabled)
- delete i;
- else {
- RWLock::RLocker l(lock);
- i->state = TrackedOp::STATE_HISTORY;
- utime_t now = ceph_clock_now();
- history.insert(now, TrackedOpRef(i));
- }
+void OpTracker::record_history_op(TrackedOpRef&& i)
+{
+ RWLock::RLocker l(lock);
+ history.insert(ceph_clock_now(), std::move(i));
}
bool OpTracker::visit_ops_in_flight(utime_t* oldest_secs,
assert(NULL != sdata);
Mutex::Locker locker(sdata->ops_in_flight_lock_sharded);
for (auto& op : sdata->ops_in_flight_sharded) {
- if (op.nref > 0 && !visit(op))
+ if (!visit(op))
break;
}
}
bool dump_historic_slow_ops(Formatter *f, set<string> filters = {""});
bool register_inflight_op(TrackedOp *i);
void unregister_inflight_op(TrackedOp *i);
+ void record_history_op(TrackedOpRef&& i);
void get_age_ms_histogram(pow2_hist_t *h);
++nref;
}
void put() {
- if (--nref == 0) {
+ again:
+ auto nref_snap = nref.load();
+ if (nref_snap == 1) {
switch (state.load()) {
case STATE_UNTRACKED:
_unregistered();
case STATE_LIVE:
mark_event("done");
tracker->unregister_inflight_op(this);
+ _unregistered();
+ if (!tracker->is_tracking()) {
+ delete this;
+ } else {
+ state = TrackedOp::STATE_HISTORY;
+ tracker->record_history_op(
+ TrackedOpRef(this, /* add_ref = */ false));
+ }
break;
case STATE_HISTORY:
default:
ceph_abort();
}
+ } else if (!nref.compare_exchange_weak(nref_snap, nref_snap - 1)) {
+ goto again;
}
}