]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/PG: use local_reserver to schedule delete
authorSage Weil <sage@redhat.com>
Tue, 19 Dec 2017 17:48:26 +0000 (11:48 -0600)
committerSage Weil <sage@redhat.com>
Thu, 11 Jan 2018 23:07:00 +0000 (17:07 -0600)
Use the reserver so that delete competes for the same slot(s) as recovery
and such.

Priority below recovery normally, unless the OSD is getting fullish, in
which case we set a very high priority.  We have to be careful here because
backfill will back off when the OSD gets full(ish) but log recovery does
not.

Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/PG.cc
src/osd/PG.h
src/osd/osd_types.h

index 9517f77046e0a7632b8a5bc29c122a852f195c76..9f5b1b4c4a70cb1d25e3a0503ad374dbea53dd77 100644 (file)
@@ -2211,6 +2211,19 @@ unsigned PG::get_backfill_priority()
   return static_cast<unsigned>(ret);
 }
 
+unsigned PG::get_delete_priority()
+{
+  auto state = get_osdmap()->get_state(osd->whoami);
+  if (state & (CEPH_OSD_NEARFULL |
+              CEPH_OSD_FULL)) {
+    return OSD_DELETE_PRIORITY_FULL;
+  } else if (state & CEPH_OSD_BACKFILLFULL) {
+    return OSD_DELETE_PRIORITY_FULLISH;
+  } else {
+    return OSD_DELETE_PRIORITY_NORMAL;
+  }
+}
+
 void PG::finish_recovery(list<Context*>& tfin)
 {
   dout(10) << "finish_recovery" << dendl;
@@ -6125,6 +6138,10 @@ void PG::_delete_some()
 
     osd->finish_pg_delete(this);
     deleted = true;
+
+    // cancel reserver here, since the PG is about to get deleted and the
+    // exit() methods don't run when that happens.
+    osd->local_reserver.cancel_reservation(info.pgid);
   }
 }
 
@@ -7964,24 +7981,80 @@ void PG::RecoveryState::Stray::exit()
 }
 
 
-/*--------Deleting----------*/
+/*--------ToDelete----------*/
+PG::RecoveryState::ToDelete::ToDelete(my_context ctx)
+  : my_base(ctx),
+    NamedState(context< RecoveryMachine >().pg, "Started/ToDelete")
+{
+  context< RecoveryMachine >().log_enter(state_name);
+  PG *pg = context< RecoveryMachine >().pg;
+  pg->osd->logger->inc(l_osd_pg_removing);
+}
+
+void PG::RecoveryState::ToDelete::exit()
+{
+  context< RecoveryMachine >().log_exit(state_name, enter_time);
+  PG *pg = context< RecoveryMachine >().pg;
+  pg->osd->logger->dec(l_osd_pg_removing);
+  pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
+}
+
+/*----WaitDeleteReserved----*/
+PG::RecoveryState::WaitDeleteReserved::WaitDeleteReserved(my_context ctx)
+  : my_base(ctx),
+    NamedState(context< RecoveryMachine >().pg,
+              "Started/ToDelete/WaitDeleteReseved")
+{
+  context< RecoveryMachine >().log_enter(state_name);
+  PG *pg = context< RecoveryMachine >().pg;
+  context<ToDelete>().priority = pg->get_delete_priority();
+  pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
+  pg->osd->local_reserver.request_reservation(
+    pg->info.pgid,
+    new QueuePeeringEvt<DeleteReserved>(
+      pg, pg->get_osdmap()->get_epoch(),
+      DeleteReserved()),
+    context<ToDelete>().priority,
+    new QueuePeeringEvt<DeleteInterrupted>(
+      pg, pg->get_osdmap()->get_epoch(),
+      DeleteInterrupted()));
+}
+
+boost::statechart::result PG::RecoveryState::ToDelete::react(
+  const ActMap& evt)
+{
+  PG *pg = context< RecoveryMachine >().pg;
+  if (pg->get_delete_priority() != priority) {
+    ldout(pg->cct,10) << __func__ << " delete priority changed, resetting"
+                     << dendl;
+    return transit<ToDelete>();
+  }
+  return discard_event();
+}
+
+void PG::RecoveryState::WaitDeleteReserved::exit()
+{
+  context< RecoveryMachine >().log_exit(state_name, enter_time);
+}
+
+/*----Deleting-----*/
 PG::RecoveryState::Deleting::Deleting(my_context ctx)
   : my_base(ctx),
-    NamedState(context< RecoveryMachine >().pg, "Started/Deleting")
+    NamedState(context< RecoveryMachine >().pg, "Started/ToDelete/Deleting")
 {
   context< RecoveryMachine >().log_enter(state_name);
   PG *pg = context< RecoveryMachine >().pg;
   pg->deleting = true;
   ObjectStore::Transaction* t = context<RecoveryMachine>().get_cur_transaction();
   pg->on_removal(t);
-  pg->osd->logger->inc(l_osd_pg_removing);
   RecoveryCtx *rctx = context<RecoveryMachine>().get_recovery_ctx();
   Context *fin = new C_DeleteMore(pg, pg->get_osdmap()->get_epoch());
   rctx->on_applied->contexts.push_back(fin);
   rctx->on_safe->contexts.push_back(fin);
 }
 
-boost::statechart::result PG::RecoveryState::Deleting::react(const DeleteSome& evt)
+boost::statechart::result PG::RecoveryState::Deleting::react(
+  const DeleteSome& evt)
 {
   PG *pg = context< RecoveryMachine >().pg;
   pg->_delete_some();
@@ -7993,7 +8066,7 @@ void PG::RecoveryState::Deleting::exit()
   context< RecoveryMachine >().log_exit(state_name, enter_time);
   PG *pg = context< RecoveryMachine >().pg;
   pg->deleting = false;
-  pg->osd->logger->dec(l_osd_pg_removing);
+  pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
 }
 
 /*--------GetInfo---------*/
index 8fe5da84af3f7db2ccef60ca61fd18f751b9da7e..0a708a7a0b0fa4895ac70780ee1b504c5beba348 100644 (file)
@@ -1231,6 +1231,8 @@ protected:
   unsigned get_recovery_priority();
   /// get backfill reservation priority
   unsigned get_backfill_priority();
+  /// get priority for pg deletion
+  unsigned get_delete_priority();
 
   void mark_clean();  ///< mark an active pg clean
 
@@ -1870,6 +1872,8 @@ protected:
 
   TrivialEvent(DeleteStart)
   TrivialEvent(DeleteSome)
+  TrivialEvent(DeleteReserved)
+  TrivialEvent(DeleteInterrupted)
 
   /* Encapsulates PG recovery process */
   class RecoveryState {
@@ -1981,7 +1985,9 @@ protected:
     //       RepWaitBackfillReserved
     //       RepWaitRecoveryReserved
     //     Stray
-    //     Deleting
+    //     ToDelete
+    //       WaitDeleteReserved
+    //       Deleting
     // Crashed
 
     struct Crashed : boost::statechart::state< Crashed, RecoveryMachine >, NamedState {
@@ -2280,7 +2286,7 @@ protected:
       void exit();
     };
 
-    struct Deleting;
+    struct ToDelete;
     struct RepNotRecovering;
     struct ReplicaActive : boost::statechart::state< ReplicaActive, Started, RepNotRecovering >, NamedState {
       explicit ReplicaActive(my_context ctx);
@@ -2299,7 +2305,7 @@ protected:
        boost::statechart::custom_reaction< UnfoundBackfill >,
        boost::statechart::custom_reaction< RemoteBackfillPreempted >,
        boost::statechart::custom_reaction< RemoteRecoveryPreempted >,
-       boost::statechart::transition<DeleteStart, Deleting>
+       boost::statechart::transition<DeleteStart, ToDelete>
        > reactions;
       boost::statechart::result react(const QueryState& q);
       boost::statechart::result react(const MInfoRec& infoevt);
@@ -2461,7 +2467,7 @@ protected:
        boost::statechart::custom_reaction< MInfoRec >,
        boost::statechart::custom_reaction< ActMap >,
        boost::statechart::custom_reaction< RecoveryDone >,
-       boost::statechart::transition<DeleteStart, Deleting>
+       boost::statechart::transition<DeleteStart, ToDelete>
        > reactions;
       boost::statechart::result react(const MQuery& query);
       boost::statechart::result react(const MLogRec& logevt);
@@ -2472,15 +2478,39 @@ protected:
       }
     };
 
-    struct Deleting : boost::statechart::state<Deleting, Started>, NamedState {
+    struct WaitDeleteReserved;
+    struct ToDelete : boost::statechart::state<ToDelete, Started, WaitDeleteReserved>, NamedState {
+      unsigned priority = 0;
       typedef boost::mpl::list <
        boost::statechart::custom_reaction< ActMap >,
        boost::statechart::custom_reaction< DeleteSome >
        > reactions;
-      explicit Deleting(my_context ctx);
-      boost::statechart::result react(const ActMap &evt) {
+      explicit ToDelete(my_context ctx);
+      boost::statechart::result react(const ActMap &evt);
+      boost::statechart::result react(const DeleteSome &evt) {
+       // happens if we drop out of Deleting due to reprioritization etc.
        return discard_event();
       }
+      void exit();
+    };
+
+    struct Deleting;
+    struct WaitDeleteReserved : boost::statechart::state<WaitDeleteReserved,
+                                                        ToDelete>, NamedState {
+      typedef boost::mpl::list <
+       boost::statechart::transition<DeleteReserved, Deleting>
+       > reactions;
+      explicit WaitDeleteReserved(my_context ctx);
+      void exit();
+    };
+
+    struct Deleting : boost::statechart::state<Deleting,
+                                              ToDelete>, NamedState {
+      typedef boost::mpl::list <
+       boost::statechart::custom_reaction< DeleteSome >,
+       boost::statechart::transition<DeleteInterrupted, WaitDeleteReserved>
+       > reactions;
+      explicit Deleting(my_context ctx);
       boost::statechart::result react(const DeleteSome &evt);
       void exit();
     };
index 2eb347eabaae7e08dd788a76bf9e8fa2cc9d654e..bdab163e9021aa60fc251e905e18ed17c312edfb 100644 (file)
 /// max recovery priority for MBackfillReserve, only when forced manually
 #define OSD_RECOVERY_PRIORITY_FORCED 255
 
+/// priority for pg deletion when osd is not fullish
+#define OSD_DELETE_PRIORITY_NORMAL 179
+
+/// priority for pg deletion when osd is approaching full
+#define OSD_DELETE_PRIORITY_FULLISH 219
+
+/// priority when more full
+#define OSD_DELETE_PRIORITY_FULL 255
+
 
 typedef hobject_t collection_list_handle_t;