From d77dd6352ee4cbc9f7c6c145567aa24cf952f2f2 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 9 Dec 2017 14:42:10 -0600 Subject: [PATCH] osd: leave PG registered (and stray) during delete; reimplement pg deletion A lot of awkward complexity is implemented in OSD to handle PGs that aren't in pg_map and are in the process of being deleted. This is hard because if the PG is recreated (or split, or whatever) then we need to stop the deletion and create a fresh PG referencing the same data. Instead, leave deleting PGs registered and Stray, with a new peering state Stray/Deleting. Let them continue to process OSDMaps, splits, peering intervals, and so on. If they are not fully deleted, they'll go back to Reset -> Stray and so on and the new primary will get the notify and decide what to do with them (usually instruct them to delete again). This (1) streamlines and cleans up the code structure, and also (2) gets rid of the special purpose RemoveWQ and moves the delete work into the main op_wq where it can get throttled and so on. Signed-off-by: Sage Weil --- src/common/legacy_config_opts.h | 6 + src/common/options.cc | 68 +++++++++++ src/osd/OSD.cc | 202 +++++++++++++++++++++----------- src/osd/OSD.h | 10 +- src/osd/OpQueueItem.cc | 7 ++ src/osd/OpQueueItem.h | 24 +++- src/osd/PG.cc | 128 +++++++++++++++++++- src/osd/PG.h | 38 ++++-- src/osd/PrimaryLogPG.cc | 6 +- src/osd/mClockOpClassSupport.cc | 3 + src/osd/mClockOpClassSupport.h | 8 +- 11 files changed, 414 insertions(+), 86 deletions(-) diff --git a/src/common/legacy_config_opts.h b/src/common/legacy_config_opts.h index f2b8196ef8e2d..231a24396196a 100644 --- a/src/common/legacy_config_opts.h +++ b/src/common/legacy_config_opts.h @@ -653,6 +653,9 @@ OPTION(osd_op_queue_mclock_recov_lim, OPT_DOUBLE) OPTION(osd_op_queue_mclock_scrub_res, OPT_DOUBLE) OPTION(osd_op_queue_mclock_scrub_wgt, OPT_DOUBLE) OPTION(osd_op_queue_mclock_scrub_lim, OPT_DOUBLE) +OPTION(osd_op_queue_mclock_pg_delete_res, OPT_DOUBLE) +OPTION(osd_op_queue_mclock_pg_delete_wgt, OPT_DOUBLE) +OPTION(osd_op_queue_mclock_pg_delete_lim, OPT_DOUBLE) OPTION(osd_op_queue_mclock_peering_event_res, OPT_DOUBLE) OPTION(osd_op_queue_mclock_peering_event_wgt, OPT_DOUBLE) OPTION(osd_op_queue_mclock_peering_event_lim, OPT_DOUBLE) @@ -870,6 +873,9 @@ OPTION(osd_scrub_cost, OPT_U32) // requested scrubs jump the queue of scheduled scrubs OPTION(osd_requested_scrub_priority, OPT_U32) +OPTION(osd_pg_delete_priority, OPT_U32) +OPTION(osd_pg_delete_cost, OPT_U32) // set default cost equal to 1MB io + OPTION(osd_recovery_priority, OPT_U32) // set default cost equal to 20MB io OPTION(osd_recovery_cost, OPT_U32) diff --git a/src/common/options.cc b/src/common/options.cc index e7d767bb9eaae..12b7e18ff1b8c 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -2357,6 +2357,66 @@ std::vector