]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mon/OSDMonitor: do clean_pg_upmaps the parallel way if necessary
authorxie xingguo <xie.xingguo@zte.com.cn>
Mon, 3 Jun 2019 08:10:22 +0000 (16:10 +0800)
committerNathan Cutler <ncutler@suse.com>
Wed, 26 Jun 2019 15:25:12 +0000 (17:25 +0200)
There could definitely be some certain cases we could reliably
skip this kind of checking, but there is no easy way to separate
those out.
However, this is clearly the general way to do the massive pg
upmap clean-up job more efficiently and hence should make sense
in all cases.

Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
(cherry picked from commit c395f45f1f4d6f5e2b538a34730d9c92d8f9ae8b)

src/common/legacy_config_opts.h
src/common/options.cc
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h

index b1efe6eac6600f4db4fbda3be87a7fdf304a6c17..91e72336111d85b327027dc35cc25ed22f254134 100644 (file)
@@ -202,6 +202,7 @@ OPTION(mon_osd_cache_size, OPT_INT)  // the size of osdmaps cache, not to rely o
 
 OPTION(mon_cpu_threads, OPT_INT)
 OPTION(mon_osd_mapping_pgs_per_chunk, OPT_INT)
+OPTION(mon_clean_pg_upmaps_per_chunk, OPT_INT)
 OPTION(mon_osd_max_creating_pgs, OPT_INT)
 OPTION(mon_tick_interval, OPT_INT)
 OPTION(mon_session_timeout, OPT_INT)    // must send keepalive or subscribe
index fa1e800735295ea65fc97d914f4a054f0f2f9597..a093f9b17e6c073e062de46c111ed07881cc96cd 100644 (file)
@@ -1375,6 +1375,11 @@ std::vector<Option> get_global_options() {
     .add_service("mon")
     .set_description("granularity of PG placement calculation background work"),
 
+    Option("mon_clean_pg_upmaps_per_chunk", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(256)
+    .add_service("mon")
+    .set_description("granularity of PG upmap validation background work"),
+
     Option("mon_osd_max_creating_pgs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
     .set_default(1024)
     .add_service("mon")
index e7f347750173b9f6f03e96f866c8891306a14411..e2b47bd228d08d92e58bb2b4d701fd352d3e3619 100644 (file)
@@ -1095,7 +1095,21 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t)
     OSDMap::clean_temps(cct, osdmap, tmp, &pending_inc);
 
     // clean inappropriate pg_upmap/pg_upmap_items (if any)
-    tmp.clean_pg_upmaps(cct, &pending_inc);
+    {
+      // check every upmapped pg for now
+      // until we could reliably identify certain cases to ignore,
+      // which is obviously the hard part TBD..
+      vector<pg_t> pgs_to_check;
+      tmp.get_upmap_pgs(&pgs_to_check);
+      if (pgs_to_check.size() < g_conf()->mon_clean_pg_upmaps_per_chunk * 2) {
+        // not enough pgs, do it inline
+        tmp.clean_pg_upmaps(cct, &pending_inc);
+      } else {
+        CleanUpmapJob job(cct, tmp, pending_inc);
+        mapper.queue(&job, g_conf()->mon_clean_pg_upmaps_per_chunk, pgs_to_check);
+        job.wait();
+      }
+    }
 
     // update creating pgs first so that we can remove the created pgid and
     // process the pool flag removal below in the same osdmap epoch.
index 1cb9f5262c2878275f2cddbbd84df4048d02e0d7..eb1f4aed6592d8c6ee009e1b950f357c93ae3482 100644 (file)
@@ -249,6 +249,35 @@ public:
     FAST_READ_DEFAULT
   };
 
+  struct CleanUpmapJob : public ParallelPGMapper::Job {
+    CephContext *cct;
+    const OSDMap& osdmap;
+    OSDMap::Incremental& pending_inc;
+    // lock to protect pending_inc form changing
+    // when checking is done
+    Mutex pending_inc_lock = {"CleanUpmapJob::pending_inc_lock"};
+
+    CleanUpmapJob(CephContext *cct, const OSDMap& om, OSDMap::Incremental& pi)
+      : ParallelPGMapper::Job(&om),
+        cct(cct),
+        osdmap(om),
+        pending_inc(pi) {}
+
+    void process(const vector<pg_t>& to_check) override {
+      vector<pg_t> to_cancel;
+      map<pg_t, mempool::osdmap::vector<pair<int,int>>> to_remap;
+      osdmap.check_pg_upmaps(cct, to_check, &to_cancel, &to_remap);
+      // don't bother taking lock if nothing changes
+      if (!to_cancel.empty() || !to_remap.empty()) {
+        std::lock_guard l(pending_inc_lock);
+        osdmap.clean_pg_upmaps(cct, &pending_inc, to_cancel, to_remap);
+      }
+    }
+
+    void process(int64_t poolid, unsigned ps_begin, unsigned ps_end) override {}
+    void complete() override {}
+  }; // public as this will need to be accessible from TestTestOSDMap.cc
+
   // svc
 public:
   void create_initial() override;