]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
PrimaryLogPG: reimplement osd_snap_trim_sleep within the state machine
authorGreg Farnum <gfarnum@redhat.com>
Fri, 7 Apr 2017 22:45:12 +0000 (15:45 -0700)
committerGreg Farnum <gfarnum@redhat.com>
Wed, 26 Apr 2017 18:25:42 +0000 (11:25 -0700)
Rather than blocking the main op queue, just pause for that amount of
time between state machine cycles.

Also, add osd_snap_trim_sleep to a few of the thrasher yamls.

Signed-off-by: Samuel Just <sjust@redhat.com>
(cherry picked from commit 2ed7759cfeb03e71f0fbd98fe7c2db2bb741861c)

Conflicts:
src/osd/PrimaryLogPG.cc

Signed-off-by: Greg Farnum <gfarnum@redhat.com>
qa/suites/rados/thrash/thrashers/default.yaml
qa/suites/rados/thrash/thrashers/pggrow.yaml
src/common/config_opts.h
src/osd/OSD.cc
src/osd/OSD.h
src/osd/PrimaryLogPG.cc
src/osd/PrimaryLogPG.h

index f5e432d3fca050413d63a5bc67df887cf964a068..5df84bedabeb1768632e78891e7e8fdf0800cc3c 100644 (file)
@@ -10,6 +10,7 @@ tasks:
         osd scrub min interval: 60
         osd scrub max interval: 120
         osd max backfills: 3
+        osd snap trim sleep: 2
 - thrashosds:
     timeout: 1200
     chance_pgnum_grow: 1
index d381026af3b6f74b53d4d5987f09c6f4218bb398..30d8957c77b4a91bf3d00db65e75c8daa63e185f 100644 (file)
@@ -10,6 +10,7 @@ tasks:
         osd scrub max interval: 120
         filestore odsync write: true
         osd max backfills: 2
+        osd snap trim sleep: .5
 - thrashosds:
     timeout: 1200
     chance_pgnum_grow: 2
index 1a851c5f0f4891d14a171b64e60d975b31ec4766..70ee16b433ce39f3bcec7f3279baac2b5a24d73e 100644 (file)
@@ -747,7 +747,7 @@ OPTION(osd_op_thread_suicide_timeout, OPT_INT, 150)
 OPTION(osd_recovery_thread_timeout, OPT_INT, 30)
 OPTION(osd_recovery_thread_suicide_timeout, OPT_INT, 300)
 OPTION(osd_recovery_sleep, OPT_FLOAT, 0)         // seconds to sleep between recovery ops
-OPTION(osd_snap_trim_sleep, OPT_FLOAT, 0)
+OPTION(osd_snap_trim_sleep, OPT_DOUBLE, 0)
 OPTION(osd_scrub_invalid_stats, OPT_BOOL, true)
 OPTION(osd_remove_thread_timeout, OPT_INT, 60*60)
 OPTION(osd_remove_thread_suicide_timeout, OPT_INT, 10*60*60)
index d11099943a4b9c4db11bae058bebc0fc19bcdcda..7def76cf49426d97ec5e430fe87a3f3bafa82f70 100644 (file)
@@ -257,6 +257,9 @@ OSDService::OSDService(OSD *osd) :
   remote_reserver(&reserver_finisher, cct->_conf->osd_max_backfills,
                  cct->_conf->osd_min_recovery_priority),
   pg_temp_lock("OSDService::pg_temp_lock"),
+  snap_sleep_lock("OSDService::snap_sleep_lock"),
+  snap_sleep_timer(
+    osd->client_messenger->cct, snap_sleep_lock, false /* relax locking */),
   snap_reserver(&reserver_finisher,
                cct->_conf->osd_max_trimming_pgs),
   recovery_lock("OSDService::recovery_lock"),
@@ -489,6 +492,12 @@ void OSDService::shutdown()
     Mutex::Locker l(backfill_request_lock);
     backfill_request_timer.shutdown();
   }
+
+  {
+    Mutex::Locker l(snap_sleep_lock);
+    snap_sleep_timer.shutdown();
+  }
+
   osdmap = OSDMapRef();
   next_osdmap = OSDMapRef();
 }
@@ -500,6 +509,7 @@ void OSDService::init()
   objecter->set_client_incarnation(0);
   watch_timer.init();
   agent_timer.init();
+  snap_sleep_timer.init();
 
   agent_thread.create("osd_srv_agent");
 
index 08b4edd8b1b0a757a17624b2fd734f8f6f6f32af..fc5cbdb6bbd9794d731472820ed4e79162ad9c92 100644 (file)
@@ -890,6 +890,9 @@ public:
 
   void queue_for_peering(PG *pg);
 
+  Mutex snap_sleep_lock;
+  SafeTimer snap_sleep_timer;
+
   AsyncReserver<spg_t> snap_reserver;
   void queue_for_snap_trim(PG *pg);
 
index 7ae0e9be2c7bade9513a0e6848adb5d9e3d2ab00..2bf70bf2eb2643e5e275aea4442dc5f23872d0b0 100644 (file)
@@ -3831,14 +3831,6 @@ void PrimaryLogPG::snap_trimmer(epoch_t queued)
   if (deleting || pg_has_reset_since(queued)) {
     return;
   }
-  if (g_conf->osd_snap_trim_sleep > 0) {
-    unlock();
-    utime_t t;
-    t.set_from_double(g_conf->osd_snap_trim_sleep);
-    t.sleep();
-    lock();
-    dout(20) << __func__ << " slept for " << t << dendl;
-  }
 
   assert(is_primary());
 
index b55907edf6be43415bca633b4a23c5b0ba63407d..c33ef21e9a84d8902ca74e2b25f573cc2640d0bd 100644 (file)
@@ -1443,6 +1443,9 @@ private:
   struct SnapTrimReserved : boost::statechart::event< SnapTrimReserved > {
     SnapTrimReserved() : boost::statechart::event< SnapTrimReserved >() {}
   };
+  struct SnapTrimTimerReady : boost::statechart::event< SnapTrimTimerReady > {
+    SnapTrimTimerReady() : boost::statechart::event< SnapTrimTimerReady >() {}
+  };
 
   struct NotTrimming;
   struct SnapTrimmer : public boost::statechart::state_machine< SnapTrimmer, NotTrimming > {
@@ -1485,6 +1488,57 @@ private:
   };
 
   /* SnapTrimmerStates */
+  struct WaitTrimTimer : boost::statechart::state< WaitTrimTimer, Trimming >, NamedState {
+    typedef boost::mpl::list <
+      boost::statechart::custom_reaction< SnapTrimTimerReady >
+      > reactions;
+    Context *wakeup = nullptr;
+    explicit WaitTrimTimer(my_context ctx)
+      : my_base(ctx),
+       NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitTrimTimer") {
+      context< SnapTrimmer >().log_enter(state_name);
+      assert(context<Trimming>().in_flight.empty());
+      struct OnTimer : Context {
+       PrimaryLogPGRef pg;
+       epoch_t epoch;
+       OnTimer(PrimaryLogPGRef pg, epoch_t epoch) : pg(pg), epoch(epoch) {}
+       void finish(int) override {
+         pg->lock();
+         if (!pg->pg_has_reset_since(epoch))
+           pg->snap_trimmer_machine.process_event(SnapTrimTimerReady());
+         pg->unlock();
+       }
+      };
+      auto *pg = context< SnapTrimmer >().pg;
+      if (pg->cct->_conf->osd_snap_trim_sleep > 0) {
+       wakeup = new OnTimer{pg, pg->get_osdmap()->get_epoch()};
+       Mutex::Locker l(pg->osd->snap_sleep_lock);
+       pg->osd->snap_sleep_timer.add_event_after(
+         pg->cct->_conf->osd_snap_trim_sleep, wakeup);
+      } else {
+       post_event(SnapTrimTimerReady());
+      }
+    }
+    void exit() {
+      context< SnapTrimmer >().log_exit(state_name, enter_time);
+      auto *pg = context< SnapTrimmer >().pg;
+      if (wakeup) {
+       Mutex::Locker l(pg->osd->snap_sleep_lock);
+       pg->osd->snap_sleep_timer.cancel_event(wakeup);
+       wakeup = nullptr;
+      }
+    }
+    boost::statechart::result react(const SnapTrimTimerReady &) {
+      wakeup = nullptr;
+      if (!context< SnapTrimmer >().can_trim()) {
+       post_event(KickTrim());
+       return transit< NotTrimming >();
+      } else {
+       return transit< AwaitAsyncWork >();
+      }
+    }
+  };
+
   struct WaitRWLock : boost::statechart::state< WaitRWLock, Trimming >, NamedState {
     typedef boost::mpl::list <
       boost::statechart::custom_reaction< TrimWriteUnblocked >
@@ -1526,7 +1580,7 @@ private:
        post_event(KickTrim());
        return transit< NotTrimming >();
       } else {
-       return transit< AwaitAsyncWork >();
+       return transit< WaitTrimTimer >();
       }
     }
   };