From 70aacb4a6e58e38ee026417ad1f0326f1e4fb491 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 12 Dec 2017 18:35:55 -0600 Subject: [PATCH] osd/PG: start delete after initial delete start info is applied This allows us to eliminate the flush in _delete_some(). Signed-off-by: Sage Weil --- src/osd/PG.cc | 56 +++++++++++++++++++++++++++------------------------ src/osd/PG.h | 1 + 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index aa4b5555b5d..9517f77046e 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -6037,15 +6037,37 @@ void PG::update_store_on_load() } } +struct C_DeleteMore : public Context { + PGRef pg; + epoch_t epoch; + int count = 2; + C_DeleteMore(PG *p, epoch_t e) : pg(p), epoch(e) {} + void finish(int r) override { + ceph_abort(); + } + void complete(int r) override { + assert(r == 0); + // complete will be called exactly count times; only the last time will actualy + // complete. + if (--count) { + return; + } + pg->lock(); + if (!pg->pg_has_reset_since(epoch)) { + pg->osd->queue_for_pg_delete(pg->get_pgid(), epoch); + } + pg->unlock(); + delete this; + } +}; void PG::_delete_some() { dout(10) << __func__ << dendl; - // this ensures we get a valid result. it *also* serves to throttle - // us a bit (on filestore) because we won't delete more until the - // previous deletions are applied. - osr->flush(); + // we do not need to flush here because (1) we only start deleting after + // the initial metadata changes are applied and committed, and (2) we do not + // process the next chunk until we have applied and committed our work. vector olist; ObjectStore::Transaction t; @@ -6077,27 +6099,6 @@ void PG::_delete_some() epoch_t e = get_osdmap()->get_epoch(); if (num) { dout(20) << __func__ << " deleting " << num << " objects" << dendl; - struct C_DeleteMore : public Context { - PGRef pg; - int count = 2; - epoch_t epoch; - C_DeleteMore(PG *p, epoch_t e) : pg(p), epoch(e) {} - void complete(int r) { - // complete will be called exactly count times; only the last time will actualy - // complete. - if (--count) { - return; - } - if (r >= 0) { - pg->lock(); - if (!pg->pg_has_reset_since(epoch)) { - pg->osd->queue_for_pg_delete(pg->get_pgid(), epoch); - } - pg->unlock(); - } - delete this; - } - }; Context *fin = new C_DeleteMore(this, e); osd->store->queue_transaction( osr.get(), @@ -7974,7 +7975,10 @@ PG::RecoveryState::Deleting::Deleting(my_context ctx) ObjectStore::Transaction* t = context().get_cur_transaction(); pg->on_removal(t); pg->osd->logger->inc(l_osd_pg_removing); - pg->osd->queue_for_pg_delete(pg->get_pgid(), pg->get_osdmap()->get_epoch()); + RecoveryCtx *rctx = context().get_recovery_ctx(); + Context *fin = new C_DeleteMore(pg, pg->get_osdmap()->get_epoch()); + rctx->on_applied->contexts.push_back(fin); + rctx->on_safe->contexts.push_back(fin); } boost::statechart::result PG::RecoveryState::Deleting::react(const DeleteSome& evt) diff --git a/src/osd/PG.h b/src/osd/PG.h index 4bf83bc2b27..8fe5da84af3 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1421,6 +1421,7 @@ protected: virtual void _split_into(pg_t child_pgid, PG *child, unsigned split_bits) = 0; friend class C_OSD_RepModify_Commit; + friend class C_DeleteMore; // -- backoff -- Mutex backoff_lock; // orders inside Backoff::lock -- 2.39.5