There could definitely be some certain cases we could reliably
skip this kind of checking, but there is no easy way to separate
those out.
However, this is clearly the general way to do the massive pg
upmap clean-up job more efficiently and hence should make sense
in all cases.
Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
(cherry picked from commit
c395f45f1f4d6f5e2b538a34730d9c92d8f9ae8b)
OPTION(mon_cpu_threads, OPT_INT)
OPTION(mon_osd_mapping_pgs_per_chunk, OPT_INT)
+OPTION(mon_clean_pg_upmaps_per_chunk, OPT_INT)
OPTION(mon_osd_max_creating_pgs, OPT_INT)
OPTION(mon_tick_interval, OPT_INT)
OPTION(mon_session_timeout, OPT_INT) // must send keepalive or subscribe
.set_default(4096)
.set_description(""),
+ Option("mon_clean_pg_upmaps_per_chunk", Option::TYPE_INT, Option::LEVEL_DEV)
+ .set_default(256)
+ .add_service("mon")
+ .set_description("granularity of PG upmap validation background work"),
+
Option("mon_osd_max_creating_pgs", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(1024)
.set_description(""),
tmp.apply_incremental(pending_inc);
// clean inappropriate pg_upmap/pg_upmap_items (if any)
- tmp.clean_pg_upmaps(cct, &pending_inc);
+ {
+ // check every upmapped pg for now
+ // until we could reliably identify certain cases to ignore,
+ // which is obviously the hard part TBD..
+ vector<pg_t> pgs_to_check;
+ tmp.get_upmap_pgs(&pgs_to_check);
+ if (pgs_to_check.size() < g_conf->mon_clean_pg_upmaps_per_chunk * 2) {
+ // not enough pgs, do it inline
+ tmp.clean_pg_upmaps(cct, &pending_inc);
+ } else {
+ CleanUpmapJob job(cct, tmp, pending_inc);
+ mapper.queue(&job, g_conf->mon_clean_pg_upmaps_per_chunk, pgs_to_check);
+ job.wait();
+ }
+ }
// remove any legacy osdmap nearfull/full flags
{
FAST_READ_DEFAULT
};
+ struct CleanUpmapJob : public ParallelPGMapper::Job {
+ CephContext *cct;
+ const OSDMap& osdmap;
+ OSDMap::Incremental& pending_inc;
+ // lock to protect pending_inc form changing
+ // when checking is done
+ Mutex pending_inc_lock = {"CleanUpmapJob::pending_inc_lock"};
+
+ CleanUpmapJob(CephContext *cct, const OSDMap& om, OSDMap::Incremental& pi)
+ : ParallelPGMapper::Job(&om),
+ cct(cct),
+ osdmap(om),
+ pending_inc(pi) {}
+
+ void process(const vector<pg_t>& to_check) override {
+ vector<pg_t> to_cancel;
+ map<pg_t, mempool::osdmap::vector<pair<int,int>>> to_remap;
+ osdmap.check_pg_upmaps(cct, to_check, &to_cancel, &to_remap);
+ // don't bother taking lock if nothing changes
+ if (!to_cancel.empty() || !to_remap.empty()) {
+ Mutex::Locker l(pending_inc_lock);
+ osdmap.clean_pg_upmaps(cct, &pending_inc, to_cancel, to_remap);
+ }
+ }
+
+ void process(int64_t poolid, unsigned ps_begin, unsigned ps_end) override {}
+ void complete() override {}
+ }; // public as this will need to be accessible from TestTestOSDMap.cc
+
// svc
public:
void create_initial() override;