From: Samuel Just Date: Thu, 6 Apr 2023 07:04:05 +0000 (-0700) Subject: osd/: differentiate priority for PGRecovery[Context] X-Git-Tag: v17.2.7~418^2~9 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=474022959ff2f9d9d74e829112fa0ba113542f31;p=ceph.git osd/: differentiate priority for PGRecovery[Context] PGs with degraded objects should be higher priority. Signed-off-by: Samuel Just --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index de9ff3e5fa42a..68531133203c8 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1640,7 +1640,8 @@ void OSDService::enqueue_front(OpSchedulerItem&& qi) void OSDService::queue_recovery_context( PG *pg, GenContext *c, - uint64_t cost) + uint64_t cost, + int priority) { epoch_t e = get_osdmap_epoch(); @@ -1660,7 +1661,7 @@ void OSDService::queue_recovery_context( enqueue_back( OpSchedulerItem( unique_ptr( - new PGRecoveryContext(pg->get_pgid(), c, e)), + new PGRecoveryContext(pg->get_pgid(), c, e, priority)), cost_for_queue, cct->_conf->osd_recovery_priority, ceph_clock_now(), @@ -2005,7 +2006,8 @@ void OSDService::_queue_for_recovery( new PGRecovery( p.pg->get_pgid(), p.epoch_queued, - reserved_pushes)), + reserved_pushes, + p.priority)), cost_for_queue, cct->_conf->osd_recovery_priority, ceph_clock_now(), @@ -9670,7 +9672,7 @@ unsigned OSDService::get_target_pg_log_entries() const } void OSD::do_recovery( - PG *pg, epoch_t queued, uint64_t reserved_pushes, + PG *pg, epoch_t queued, uint64_t reserved_pushes, int priority, ThreadPool::TPHandle &handle) { uint64_t started = 0; @@ -9687,13 +9689,14 @@ void OSD::do_recovery( std::lock_guard l(service.sleep_lock); if (recovery_sleep > 0 && service.recovery_needs_sleep) { PGRef pgref(pg); - auto recovery_requeue_callback = new LambdaContext([this, pgref, queued, reserved_pushes](int r) { + auto recovery_requeue_callback = new LambdaContext( + [this, pgref, queued, reserved_pushes, priority](int r) { dout(20) << "do_recovery wake up at " << ceph_clock_now() << ", re-queuing recovery" << dendl; std::lock_guard l(service.sleep_lock); service.recovery_needs_sleep = false; - service.queue_recovery_after_sleep(pgref.get(), queued, reserved_pushes); + service.queue_recovery_after_sleep(pgref.get(), queued, reserved_pushes, priority); }); // This is true for the first recovery op and when the previous recovery op diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 8f205e5a55f72..a1bb82f07b701 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -538,7 +538,8 @@ public: AsyncReserver snap_reserver; void queue_recovery_context(PG *pg, GenContext *c, - uint64_t cost); + uint64_t cost, + int priority); void queue_for_snap_trim(PG *pg); void queue_for_scrub(PG* pg, Scrub::scrub_prio_t with_priority); @@ -618,6 +619,7 @@ private: const epoch_t epoch_queued; PGRef pg; const uint64_t cost_per_object; + const int priority; }; std::list awaiting_throttle; @@ -680,25 +682,31 @@ public: unsigned get_target_pg_log_entries() const; // delayed pg activation - void queue_for_recovery(PG *pg, uint64_t cost_per_object) { + void queue_for_recovery( + PG *pg, uint64_t cost_per_object, + int priority) { std::lock_guard l(recovery_lock); if (pg->is_forced_recovery_or_backfill()) { awaiting_throttle.emplace_front( pg_awaiting_throttle_t{ - pg->get_osdmap()->get_epoch(), pg, cost_per_object}); + pg->get_osdmap()->get_epoch(), pg, cost_per_object, priority}); } else { awaiting_throttle.emplace_back( pg_awaiting_throttle_t{ - pg->get_osdmap()->get_epoch(), pg, cost_per_object}); + pg->get_osdmap()->get_epoch(), pg, cost_per_object, priority}); } _maybe_queue_recovery(); } - void queue_recovery_after_sleep(PG *pg, epoch_t queued, uint64_t reserved_pushes) { + void queue_recovery_after_sleep( + PG *pg, epoch_t queued, uint64_t reserved_pushes, + int priority) { std::lock_guard l(recovery_lock); // Send cost as 1 in pg_awaiting_throttle_t below. The cost is ignored // as this path is only applicable for WeightedPriorityQueue scheduler. - _queue_for_recovery(pg_awaiting_throttle_t{queued, pg, 1}, reserved_pushes); + _queue_for_recovery( + pg_awaiting_throttle_t{queued, pg, 1, priority}, + reserved_pushes); } void queue_check_readable(spg_t spgid, @@ -1941,6 +1949,7 @@ protected: // -- pg recovery -- void do_recovery(PG *pg, epoch_t epoch_queued, uint64_t pushes_reserved, + int priority, ThreadPool::TPHandle &handle); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 667e9c106b28c..3b3df466f7c9f 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -429,7 +429,9 @@ void PG::queue_recovery() 1, // ensure objects is non-negative and non-zero info.stats.stats.sum.num_objects)); uint64_t cost_per_object = std::max(num_bytes / num_objects, 1); - osd->queue_for_recovery(this, cost_per_object); + osd->queue_for_recovery( + this, cost_per_object, recovery_state.get_recovery_op_priority() + ); } } diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index d1f6a4c52b336..63757e232d8b2 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -528,7 +528,9 @@ void PrimaryLogPG::schedule_recovery_work( GenContext *c, uint64_t cost) { - osd->queue_recovery_context(this, c, cost); + osd->queue_recovery_context( + this, c, cost, + recovery_state.get_recovery_op_priority()); } void PrimaryLogPG::replica_clear_repop_obc( diff --git a/src/osd/scheduler/OpSchedulerItem.cc b/src/osd/scheduler/OpSchedulerItem.cc index 9f834e9077812..4eab93bd9c350 100644 --- a/src/osd/scheduler/OpSchedulerItem.cc +++ b/src/osd/scheduler/OpSchedulerItem.cc @@ -225,7 +225,7 @@ void PGRecovery::run( PGRef& pg, ThreadPool::TPHandle &handle) { - osd->do_recovery(pg.get(), epoch_queued, reserved_pushes, handle); + osd->do_recovery(pg.get(), epoch_queued, reserved_pushes, priority, handle); pg->unlock(); } diff --git a/src/osd/scheduler/OpSchedulerItem.h b/src/osd/scheduler/OpSchedulerItem.h index dff3acb608b81..2bc251c4633a4 100644 --- a/src/osd/scheduler/OpSchedulerItem.h +++ b/src/osd/scheduler/OpSchedulerItem.h @@ -502,14 +502,17 @@ class PGScrubChunkIsFree : public PGScrubItem { class PGRecovery : public PGOpQueueable { epoch_t epoch_queued; uint64_t reserved_pushes; + int priority; public: PGRecovery( spg_t pg, epoch_t epoch_queued, - uint64_t reserved_pushes) + uint64_t reserved_pushes, + int priority) : PGOpQueueable(pg), epoch_queued(epoch_queued), - reserved_pushes(reserved_pushes) {} + reserved_pushes(reserved_pushes), + priority(priority) {} std::ostream &print(std::ostream &rhs) const final { return rhs << "PGRecovery(pgid=" << get_pgid() << " epoch_queued=" << epoch_queued @@ -522,18 +525,20 @@ public: void run( OSD *osd, OSDShard *sdata, PGRef& pg, ThreadPool::TPHandle &handle) final; op_scheduler_class get_scheduler_class() const final { - return op_scheduler_class::background_recovery; + return priority_to_scheduler_class(priority); } }; class PGRecoveryContext : public PGOpQueueable { std::unique_ptr> c; epoch_t epoch; + int priority; public: PGRecoveryContext(spg_t pgid, - GenContext *c, epoch_t epoch) + GenContext *c, epoch_t epoch, + int priority) : PGOpQueueable(pgid), - c(c), epoch(epoch) {} + c(c), epoch(epoch), priority(priority) {} std::ostream &print(std::ostream &rhs) const final { return rhs << "PGRecoveryContext(pgid=" << get_pgid() << " c=" << c.get() << " epoch=" << epoch @@ -542,7 +547,7 @@ public: void run( OSD *osd, OSDShard *sdata, PGRef& pg, ThreadPool::TPHandle &handle) final; op_scheduler_class get_scheduler_class() const final { - return op_scheduler_class::background_recovery; + return priority_to_scheduler_class(priority); } };