]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mon/OSDMonitor: do clean_pg_upmaps the parallel way if necessary
authorxie xingguo <xie.xingguo@zte.com.cn>
Mon, 3 Jun 2019 08:10:22 +0000 (16:10 +0800)
committerxie xingguo <xie.xingguo@zte.com.cn>
Thu, 6 Jun 2019 02:10:45 +0000 (10:10 +0800)
There could definitely be some certain cases we could reliably
skip this kind of checking, but there is no easy way to separate
those out.
However, this is clearly the general way to do the massive pg
upmap clean-up job more efficiently and hence should make sense
in all cases.

Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
src/common/legacy_config_opts.h
src/common/options.cc
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h

index 305ec25ed3f8e4d976eb339f4f6931c1638a4af5..f6b42d641709ceea9dea0fc7825ff0f7ca9aff17 100644 (file)
@@ -187,6 +187,7 @@ OPTION(mon_osd_cache_size, OPT_INT)  // the size of osdmaps cache, not to rely o
 
 OPTION(mon_cpu_threads, OPT_INT)
 OPTION(mon_osd_mapping_pgs_per_chunk, OPT_INT)
+OPTION(mon_clean_pg_upmaps_per_chunk, OPT_INT)
 OPTION(mon_osd_max_creating_pgs, OPT_INT)
 OPTION(mon_tick_interval, OPT_INT)
 OPTION(mon_session_timeout, OPT_INT)    // must send keepalive or subscribe
index d4720f133c858ced340c7eb1d6a82ecc8cfbd11a..b1d6b2b7c737e7022585366588174e50fbd48d52 100644 (file)
@@ -1320,6 +1320,11 @@ std::vector<Option> get_global_options() {
     .add_service("mon")
     .set_description("granularity of PG placement calculation background work"),
 
+    Option("mon_clean_pg_upmaps_per_chunk", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(256)
+    .add_service("mon")
+    .set_description("granularity of PG upmap validation background work"),
+
     Option("mon_osd_max_creating_pgs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
     .set_default(1024)
     .add_service("mon")
index 3ab79076a3b9b6925fbaa9ec893a7e1365402b01..14119d0354f8f66484de45586635c83d2777a067 100644 (file)
@@ -1186,7 +1186,21 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t)
     OSDMap::clean_temps(cct, osdmap, tmp, &pending_inc);
 
     // clean inappropriate pg_upmap/pg_upmap_items (if any)
-    tmp.clean_pg_upmaps(cct, &pending_inc);
+    {
+      // check every upmapped pg for now
+      // until we could reliably identify certain cases to ignore,
+      // which is obviously the hard part TBD..
+      vector<pg_t> pgs_to_check;
+      tmp.get_upmap_pgs(&pgs_to_check);
+      if (pgs_to_check.size() < g_conf()->mon_clean_pg_upmaps_per_chunk * 2) {
+        // not enough pgs, do it inline
+        tmp.clean_pg_upmaps(cct, &pending_inc);
+      } else {
+        CleanUpmapJob job(cct, tmp, pending_inc);
+        mapper.queue(&job, g_conf()->mon_clean_pg_upmaps_per_chunk, pgs_to_check);
+        job.wait();
+      }
+    }
 
     // update creating pgs first so that we can remove the created pgid and
     // process the pool flag removal below in the same osdmap epoch.
index 1cb9f5262c2878275f2cddbbd84df4048d02e0d7..eb1f4aed6592d8c6ee009e1b950f357c93ae3482 100644 (file)
@@ -249,6 +249,35 @@ public:
     FAST_READ_DEFAULT
   };
 
+  struct CleanUpmapJob : public ParallelPGMapper::Job {
+    CephContext *cct;
+    const OSDMap& osdmap;
+    OSDMap::Incremental& pending_inc;
+    // lock to protect pending_inc form changing
+    // when checking is done
+    Mutex pending_inc_lock = {"CleanUpmapJob::pending_inc_lock"};
+
+    CleanUpmapJob(CephContext *cct, const OSDMap& om, OSDMap::Incremental& pi)
+      : ParallelPGMapper::Job(&om),
+        cct(cct),
+        osdmap(om),
+        pending_inc(pi) {}
+
+    void process(const vector<pg_t>& to_check) override {
+      vector<pg_t> to_cancel;
+      map<pg_t, mempool::osdmap::vector<pair<int,int>>> to_remap;
+      osdmap.check_pg_upmaps(cct, to_check, &to_cancel, &to_remap);
+      // don't bother taking lock if nothing changes
+      if (!to_cancel.empty() || !to_remap.empty()) {
+        std::lock_guard l(pending_inc_lock);
+        osdmap.clean_pg_upmaps(cct, &pending_inc, to_cancel, to_remap);
+      }
+    }
+
+    void process(int64_t poolid, unsigned ps_begin, unsigned ps_end) override {}
+    void complete() override {}
+  }; // public as this will need to be accessible from TestTestOSDMap.cc
+
   // svc
 public:
   void create_initial() override;