]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon/OSDMonitor: do clean_pg_upmaps the parallel way if necessary
authorxie xingguo <xie.xingguo@zte.com.cn>
Mon, 3 Jun 2019 08:10:22 +0000 (16:10 +0800)
committerxie xingguo <xie.xingguo@zte.com.cn>
Tue, 18 Jun 2019 02:15:50 +0000 (10:15 +0800)
There could definitely be some certain cases we could reliably
skip this kind of checking, but there is no easy way to separate
those out.
However, this is clearly the general way to do the massive pg
upmap clean-up job more efficiently and hence should make sense
in all cases.

Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
(cherry picked from commit c395f45f1f4d6f5e2b538a34730d9c92d8f9ae8b)

src/common/legacy_config_opts.h
src/common/options.cc
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h

index 4727f36bbbbca3695e46abe7ad23b9fe4ee85a26..c0c00640ebbdcde63a7bfaa10e990e903c423b8d 100644 (file)
@@ -197,6 +197,7 @@ OPTION(mon_osd_cache_size, OPT_INT)  // the size of osdmaps cache, not to rely o
 
 OPTION(mon_cpu_threads, OPT_INT)
 OPTION(mon_osd_mapping_pgs_per_chunk, OPT_INT)
+OPTION(mon_clean_pg_upmaps_per_chunk, OPT_INT)
 OPTION(mon_osd_max_creating_pgs, OPT_INT)
 OPTION(mon_tick_interval, OPT_INT)
 OPTION(mon_session_timeout, OPT_INT)    // must send keepalive or subscribe
index 8710b57672e60581c7ee7aa28b7402bdb8270b4c..17dbf9fad7729d60a03d285dddb354391c0440ae 100644 (file)
@@ -1204,6 +1204,11 @@ std::vector<Option> get_global_options() {
     .set_default(4096)
     .set_description(""),
 
+    Option("mon_clean_pg_upmaps_per_chunk", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(256)
+    .add_service("mon")
+    .set_description("granularity of PG upmap validation background work"),
+
     Option("mon_osd_max_creating_pgs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
     .set_default(1024)
     .set_description(""),
index fb665f00c4606e53c44b5694632b6e071548e2d2..0c99b03871b73f6e822814b4926c071d54992af9 100644 (file)
@@ -1062,7 +1062,21 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t)
     tmp.apply_incremental(pending_inc);
 
     // clean inappropriate pg_upmap/pg_upmap_items (if any)
-    tmp.clean_pg_upmaps(cct, &pending_inc);
+    {
+      // check every upmapped pg for now
+      // until we could reliably identify certain cases to ignore,
+      // which is obviously the hard part TBD..
+      vector<pg_t> pgs_to_check;
+      tmp.get_upmap_pgs(&pgs_to_check);
+      if (pgs_to_check.size() < g_conf->mon_clean_pg_upmaps_per_chunk * 2) {
+        // not enough pgs, do it inline
+        tmp.clean_pg_upmaps(cct, &pending_inc);
+      } else {
+        CleanUpmapJob job(cct, tmp, pending_inc);
+        mapper.queue(&job, g_conf->mon_clean_pg_upmaps_per_chunk, pgs_to_check);
+        job.wait();
+      }
+    }
 
     // remove any legacy osdmap nearfull/full flags
     {
index 117cf2217442792a283b14ec69da7bcc468a1b0c..bc03065aee7cb75a7e6334772017519340a6d974 100644 (file)
@@ -248,6 +248,35 @@ public:
     FAST_READ_DEFAULT
   };
 
+  struct CleanUpmapJob : public ParallelPGMapper::Job {
+    CephContext *cct;
+    const OSDMap& osdmap;
+    OSDMap::Incremental& pending_inc;
+    // lock to protect pending_inc form changing
+    // when checking is done
+    Mutex pending_inc_lock = {"CleanUpmapJob::pending_inc_lock"};
+
+    CleanUpmapJob(CephContext *cct, const OSDMap& om, OSDMap::Incremental& pi)
+      : ParallelPGMapper::Job(&om),
+        cct(cct),
+        osdmap(om),
+        pending_inc(pi) {}
+
+    void process(const vector<pg_t>& to_check) override {
+      vector<pg_t> to_cancel;
+      map<pg_t, mempool::osdmap::vector<pair<int,int>>> to_remap;
+      osdmap.check_pg_upmaps(cct, to_check, &to_cancel, &to_remap);
+      // don't bother taking lock if nothing changes
+      if (!to_cancel.empty() || !to_remap.empty()) {
+        Mutex::Locker l(pending_inc_lock);
+        osdmap.clean_pg_upmaps(cct, &pending_inc, to_cancel, to_remap);
+      }
+    }
+
+    void process(int64_t poolid, unsigned ps_begin, unsigned ps_end) override {}
+    void complete() override {}
+  }; // public as this will need to be accessible from TestTestOSDMap.cc
+
   // svc
 public:
   void create_initial() override;