From: Sage Weil Date: Tue, 12 Dec 2017 22:38:14 +0000 (-0600) Subject: osd/PG: wait for commit *and* readable before deleting more of PG X-Git-Tag: v13.0.2~510^2~13 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=24fd1e0087d26fb4de7673f8b788fb50f13d8dcc;p=ceph.git osd/PG: wait for commit *and* readable before deleting more of PG For filestore, waiting for onreadable ensures that (1) the backend has done (all) of the deletion work (we are throttled) and (2) that the flush() will not block. So, all good. For bluestore, onreadable happens at queue time, so the flush() was needed to throttle progress. However, we don't want to block the op thread on flush. And waiting for commit isn't sufficient because that would not capture the filestore apply work. Fix by waiting for both commit and readable before doing more deletion work. Signed-off-by: Sage Weil --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 87fcace0db9b..aa4b5555b5d7 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -6079,9 +6079,15 @@ void PG::_delete_some() dout(20) << __func__ << " deleting " << num << " objects" << dendl; struct C_DeleteMore : public Context { PGRef pg; + int count = 2; epoch_t epoch; C_DeleteMore(PG *p, epoch_t e) : pg(p), epoch(e) {} - void finish(int r) { + void complete(int r) { + // complete will be called exactly count times; only the last time will actualy + // complete. + if (--count) { + return; + } if (r >= 0) { pg->lock(); if (!pg->pg_has_reset_since(epoch)) { @@ -6089,12 +6095,15 @@ void PG::_delete_some() } pg->unlock(); } + delete this; } }; + Context *fin = new C_DeleteMore(this, e); osd->store->queue_transaction( osr.get(), std::move(t), - new C_DeleteMore(this, e)); + fin, + fin); } else { dout(20) << __func__ << " finished" << dendl; if (cct->_conf->osd_inject_failure_on_pg_removal) {