From: Samuel Just Date: Thu, 6 Apr 2023 07:04:05 +0000 (-0700) Subject: osd/: differentiate priority for PGRecovery[Context] X-Git-Tag: v18.1.0~122^2~9 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=606f9350b8add94a0093da24999df41bdfa1c926;p=ceph-ci.git osd/: differentiate priority for PGRecovery[Context] PGs with degraded objects should be higher priority. Signed-off-by: Samuel Just --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index bcc85145fc7..91c6522d231 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1697,7 +1697,8 @@ void OSDService::enqueue_front(OpSchedulerItem&& qi) void OSDService::queue_recovery_context( PG *pg, GenContext *c, - uint64_t cost) + uint64_t cost, + int priority) { epoch_t e = get_osdmap_epoch(); @@ -1717,7 +1718,7 @@ void OSDService::queue_recovery_context( enqueue_back( OpSchedulerItem( unique_ptr( - new PGRecoveryContext(pg->get_pgid(), c, e)), + new PGRecoveryContext(pg->get_pgid(), c, e, priority)), cost_for_queue, cct->_conf->osd_recovery_priority, ceph_clock_now(), @@ -2063,7 +2064,8 @@ void OSDService::_queue_for_recovery( new PGRecovery( p.pg->get_pgid(), p.epoch_queued, - reserved_pushes)), + reserved_pushes, + p.priority)), cost_for_queue, cct->_conf->osd_recovery_priority, ceph_clock_now(), @@ -9398,7 +9400,7 @@ unsigned OSDService::get_target_pg_log_entries() const } void OSD::do_recovery( - PG *pg, epoch_t queued, uint64_t reserved_pushes, + PG *pg, epoch_t queued, uint64_t reserved_pushes, int priority, ThreadPool::TPHandle &handle) { uint64_t started = 0; @@ -9415,13 +9417,14 @@ void OSD::do_recovery( std::lock_guard l(service.sleep_lock); if (recovery_sleep > 0 && service.recovery_needs_sleep) { PGRef pgref(pg); - auto recovery_requeue_callback = new LambdaContext([this, pgref, queued, reserved_pushes](int r) { + auto recovery_requeue_callback = new LambdaContext( + [this, pgref, queued, reserved_pushes, priority](int r) { dout(20) << "do_recovery wake up at " << ceph_clock_now() << ", re-queuing recovery" << dendl; std::lock_guard l(service.sleep_lock); service.recovery_needs_sleep = false; - service.queue_recovery_after_sleep(pgref.get(), queued, reserved_pushes); + service.queue_recovery_after_sleep(pgref.get(), queued, reserved_pushes, priority); }); // This is true for the first recovery op and when the previous recovery op diff --git a/src/osd/OSD.h b/src/osd/OSD.h index ad000a0d1d8..96393b0863d 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -509,7 +509,8 @@ public: AsyncReserver snap_reserver; void queue_recovery_context(PG *pg, GenContext *c, - uint64_t cost); + uint64_t cost, + int priority); void queue_for_snap_trim(PG *pg); void queue_for_scrub(PG* pg, Scrub::scrub_prio_t with_priority); @@ -589,6 +590,7 @@ private: const epoch_t epoch_queued; PGRef pg; const uint64_t cost_per_object; + const int priority; }; std::list awaiting_throttle; @@ -651,25 +653,31 @@ public: unsigned get_target_pg_log_entries() const; // delayed pg activation - void queue_for_recovery(PG *pg, uint64_t cost_per_object) { + void queue_for_recovery( + PG *pg, uint64_t cost_per_object, + int priority) { std::lock_guard l(recovery_lock); if (pg->is_forced_recovery_or_backfill()) { awaiting_throttle.emplace_front( pg_awaiting_throttle_t{ - pg->get_osdmap()->get_epoch(), pg, cost_per_object}); + pg->get_osdmap()->get_epoch(), pg, cost_per_object, priority}); } else { awaiting_throttle.emplace_back( pg_awaiting_throttle_t{ - pg->get_osdmap()->get_epoch(), pg, cost_per_object}); + pg->get_osdmap()->get_epoch(), pg, cost_per_object, priority}); } _maybe_queue_recovery(); } - void queue_recovery_after_sleep(PG *pg, epoch_t queued, uint64_t reserved_pushes) { + void queue_recovery_after_sleep( + PG *pg, epoch_t queued, uint64_t reserved_pushes, + int priority) { std::lock_guard l(recovery_lock); // Send cost as 1 in pg_awaiting_throttle_t below. The cost is ignored // as this path is only applicable for WeightedPriorityQueue scheduler. - _queue_for_recovery(pg_awaiting_throttle_t{queued, pg, 1}, reserved_pushes); + _queue_for_recovery( + pg_awaiting_throttle_t{queued, pg, 1, priority}, + reserved_pushes); } void queue_check_readable(spg_t spgid, @@ -1875,6 +1883,7 @@ protected: // -- pg recovery -- void do_recovery(PG *pg, epoch_t epoch_queued, uint64_t pushes_reserved, + int priority, ThreadPool::TPHandle &handle); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index e8c2e2b8ad7..fa49038ed27 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -429,7 +429,9 @@ void PG::queue_recovery() 1, // ensure objects is non-negative and non-zero info.stats.stats.sum.num_objects)); uint64_t cost_per_object = std::max(num_bytes / num_objects, 1); - osd->queue_for_recovery(this, cost_per_object); + osd->queue_for_recovery( + this, cost_per_object, recovery_state.get_recovery_op_priority() + ); } } diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 0d8d5cf3856..6dea32b60f3 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -526,7 +526,9 @@ void PrimaryLogPG::schedule_recovery_work( GenContext *c, uint64_t cost) { - osd->queue_recovery_context(this, c, cost); + osd->queue_recovery_context( + this, c, cost, + recovery_state.get_recovery_op_priority()); } void PrimaryLogPG::replica_clear_repop_obc( diff --git a/src/osd/scheduler/OpSchedulerItem.cc b/src/osd/scheduler/OpSchedulerItem.cc index 194e17f301c..4a08bca4e4f 100644 --- a/src/osd/scheduler/OpSchedulerItem.cc +++ b/src/osd/scheduler/OpSchedulerItem.cc @@ -224,7 +224,7 @@ void PGRecovery::run( PGRef& pg, ThreadPool::TPHandle &handle) { - osd->do_recovery(pg.get(), epoch_queued, reserved_pushes, handle); + osd->do_recovery(pg.get(), epoch_queued, reserved_pushes, priority, handle); pg->unlock(); } diff --git a/src/osd/scheduler/OpSchedulerItem.h b/src/osd/scheduler/OpSchedulerItem.h index 00389c4b637..f41e66880e2 100644 --- a/src/osd/scheduler/OpSchedulerItem.h +++ b/src/osd/scheduler/OpSchedulerItem.h @@ -502,14 +502,17 @@ class PGScrubChunkIsFree : public PGScrubItem { class PGRecovery : public PGOpQueueable { epoch_t epoch_queued; uint64_t reserved_pushes; + int priority; public: PGRecovery( spg_t pg, epoch_t epoch_queued, - uint64_t reserved_pushes) + uint64_t reserved_pushes, + int priority) : PGOpQueueable(pg), epoch_queued(epoch_queued), - reserved_pushes(reserved_pushes) {} + reserved_pushes(reserved_pushes), + priority(priority) {} std::ostream &print(std::ostream &rhs) const final { return rhs << "PGRecovery(pgid=" << get_pgid() << " epoch_queued=" << epoch_queued @@ -522,18 +525,20 @@ public: void run( OSD *osd, OSDShard *sdata, PGRef& pg, ThreadPool::TPHandle &handle) final; op_scheduler_class get_scheduler_class() const final { - return op_scheduler_class::background_recovery; + return priority_to_scheduler_class(priority); } }; class PGRecoveryContext : public PGOpQueueable { std::unique_ptr> c; epoch_t epoch; + int priority; public: PGRecoveryContext(spg_t pgid, - GenContext *c, epoch_t epoch) + GenContext *c, epoch_t epoch, + int priority) : PGOpQueueable(pgid), - c(c), epoch(epoch) {} + c(c), epoch(epoch), priority(priority) {} std::ostream &print(std::ostream &rhs) const final { return rhs << "PGRecoveryContext(pgid=" << get_pgid() << " c=" << c.get() << " epoch=" << epoch @@ -542,7 +547,7 @@ public: void run( OSD *osd, OSDShard *sdata, PGRef& pg, ThreadPool::TPHandle &handle) final; op_scheduler_class get_scheduler_class() const final { - return op_scheduler_class::background_recovery; + return priority_to_scheduler_class(priority); } };