]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd,mon: include more pg merge metadata in pg_pool_t
authorSage Weil <sage@redhat.com>
Mon, 11 Mar 2019 22:31:54 +0000 (17:31 -0500)
committerSage Weil <sage@redhat.com>
Mon, 11 Mar 2019 22:31:54 +0000 (17:31 -0500)
The ones we need are source_version and target_version.  Include it in a
nice containing structure to keep things tidy.

Signed-off-by: Sage Weil <sage@redhat.com>
src/messages/MOSDPGReadyToMerge.h
src/mon/OSDMonitor.cc
src/osd/OSD.cc
src/osd/OSD.h
src/osd/PG.cc
src/osd/PG.h
src/osd/osd_types.cc
src/osd/osd_types.h

index cbe22af39b022c2af7342a87746ca6bd5663629c..b8c180956758ee5e9b7203269efff70bcb3a9973 100644 (file)
@@ -7,6 +7,7 @@ class MOSDPGReadyToMerge
   : public MessageInstance<MOSDPGReadyToMerge, PaxosServiceMessage> {
 public:
   pg_t pgid;
+  eversion_t source_version, target_version;
   epoch_t last_epoch_started = 0;
   epoch_t last_epoch_clean = 0;
   bool ready = true;
@@ -14,9 +15,12 @@ public:
   MOSDPGReadyToMerge()
     : MessageInstance(MSG_OSD_PG_READY_TO_MERGE, 0)
   {}
-  MOSDPGReadyToMerge(pg_t p, epoch_t les, epoch_t lec, bool r, epoch_t v)
+  MOSDPGReadyToMerge(pg_t p, eversion_t sv, eversion_t tv,
+                    epoch_t les, epoch_t lec, bool r, epoch_t v)
     : MessageInstance(MSG_OSD_PG_READY_TO_MERGE, v),
       pgid(p),
+      source_version(sv),
+      target_version(tv),
       last_epoch_started(les),
       last_epoch_clean(lec),
       ready(r)
@@ -25,6 +29,8 @@ public:
     using ceph::encode;
     paxos_encode();
     encode(pgid, payload);
+    encode(source_version, payload);
+    encode(target_version, payload);
     encode(last_epoch_started, payload);
     encode(last_epoch_clean, payload);
     encode(ready, payload);
@@ -33,6 +39,8 @@ public:
     bufferlist::const_iterator p = payload.begin();
     paxos_decode(p);
     decode(pgid, p);
+    decode(source_version, p);
+    decode(target_version, p);
     decode(last_epoch_started, p);
     decode(last_epoch_clean, p);
     decode(ready, p);
@@ -41,6 +49,8 @@ public:
   void print(ostream &out) const {
     out << get_type_name()
         << "(" << pgid
+       << " sv " << source_version
+       << " tv " << target_version
        << " les/c " << last_epoch_started << "/" << last_epoch_clean
        << (ready ? " ready" : " NOT READY")
         << " v" << version << ")";
index efbc8c1257c66ac8873272d2f09756d311258d17..0929d904f7844faf311cd6ff3b51ec8ec1217d10 100644 (file)
@@ -3318,7 +3318,12 @@ bool OSDMonitor::prepare_pg_ready_to_merge(MonOpRequestRef op)
   }
 
   if (m->ready) {
-    p.dec_pg_num(m->last_epoch_started, m->last_epoch_clean);
+    p.dec_pg_num(m->pgid,
+                pending_inc.epoch,
+                m->source_version,
+                m->target_version,
+                m->last_epoch_started,
+                m->last_epoch_clean);
     p.last_change = pending_inc.epoch;
   } else {
     // back off the merge attempt!
index 3b5f4e80bfffcd7ec6bee17f90f394d780c45099..2d7945b1e88fbdc524ada7efee716cc49b8a5e2a 100644 (file)
@@ -1857,24 +1857,26 @@ bool OSDService::try_finish_pg_delete(PG *pg, unsigned old_pg_num)
 
 // ---
 
-void OSDService::set_ready_to_merge_source(PG *pg)
+void OSDService::set_ready_to_merge_source(PG *pg, eversion_t version)
 {
   std::lock_guard l(merge_lock);
   dout(10) << __func__ << " " << pg->pg_id << dendl;
-  ready_to_merge_source.insert(pg->pg_id.pgid);
+  ready_to_merge_source[pg->pg_id.pgid] = version;
   assert(not_ready_to_merge_source.count(pg->pg_id.pgid) == 0);
   _send_ready_to_merge();
 }
 
 void OSDService::set_ready_to_merge_target(PG *pg,
+                                          eversion_t version,
                                           epoch_t last_epoch_started,
                                           epoch_t last_epoch_clean)
 {
   std::lock_guard l(merge_lock);
   dout(10) << __func__ << " " << pg->pg_id << dendl;
   ready_to_merge_target.insert(make_pair(pg->pg_id.pgid,
-                                        make_pair(last_epoch_started,
-                                                  last_epoch_clean)));
+                                        make_tuple(version,
+                                                   last_epoch_started,
+                                                   last_epoch_clean)));
   assert(not_ready_to_merge_target.count(pg->pg_id.pgid) == 0);
   _send_ready_to_merge();
 }
@@ -1916,8 +1918,7 @@ void OSDService::_send_ready_to_merge()
     if (sent_ready_to_merge_source.count(src) == 0) {
       monc->send_mon_message(new MOSDPGReadyToMerge(
                               src,
-                              0,
-                              0,
+                              {}, {}, 0, 0,
                               false,
                               osdmap->get_epoch()));
       sent_ready_to_merge_source.insert(src);
@@ -1927,28 +1928,29 @@ void OSDService::_send_ready_to_merge()
     if (sent_ready_to_merge_source.count(p.second) == 0) {
       monc->send_mon_message(new MOSDPGReadyToMerge(
                               p.second,
-                              0,
-                              0,
+                              {}, {}, 0, 0,
                               false,
                               osdmap->get_epoch()));
       sent_ready_to_merge_source.insert(p.second);
     }
   }
   for (auto src : ready_to_merge_source) {
-    if (not_ready_to_merge_source.count(src) ||
-       not_ready_to_merge_target.count(src.get_parent())) {
+    if (not_ready_to_merge_source.count(src.first) ||
+       not_ready_to_merge_target.count(src.first.get_parent())) {
       continue;
     }
-    auto p = ready_to_merge_target.find(src.get_parent());
+    auto p = ready_to_merge_target.find(src.first.get_parent());
     if (p != ready_to_merge_target.end() &&
-       sent_ready_to_merge_source.count(src) == 0) {
+       sent_ready_to_merge_source.count(src.first) == 0) {
       monc->send_mon_message(new MOSDPGReadyToMerge(
-                              src,
-                              p->second.first,   // PG's last_epoch_started
-                              p->second.second,  // PG's last_epoch_clean
+                              src.first,           // source pgid
+                              src.second,          // src version
+                              std::get<0>(p->second), // target version
+                              std::get<1>(p->second), // PG's last_epoch_started
+                              std::get<2>(p->second), // PG's last_epoch_clean
                               true,
                               osdmap->get_epoch()));
-      sent_ready_to_merge_source.insert(src);
+      sent_ready_to_merge_source.insert(src.first);
     }
   }
 }
@@ -8613,9 +8615,7 @@ bool OSD::advance_pg(
            pg->merge_from(
              sources, rctx, split_bits,
              nextmap->get_pg_pool(
-               pg->pg_id.pool())->get_pg_num_dec_last_epoch_started(),
-             nextmap->get_pg_pool(
-               pg->pg_id.pool())->get_pg_num_dec_last_epoch_clean());
+               pg->pg_id.pool())->last_pg_merge_meta);
            pg->pg_slot->waiting_for_merge_epoch = 0;
          } else {
            dout(20) << __func__ << " not ready to merge yet" << dendl;
index bf054d7011250dc000528fa61b6f969752be670e..25a5bcc7dbf91e57474ad0c76f7b2436a88f46fc 100644 (file)
@@ -711,14 +711,16 @@ public:
 
   // -- pg merge --
   Mutex merge_lock = {"OSD::merge_lock"};
-  set<pg_t> ready_to_merge_source;
-  map<pg_t,pair<epoch_t,epoch_t>> ready_to_merge_target;  // pg -> (les,lec)
+  map<pg_t,eversion_t> ready_to_merge_source;   // pg -> version
+  map<pg_t,std::tuple<eversion_t,epoch_t,epoch_t>> ready_to_merge_target;  // pg -> (version,les,lec)
   set<pg_t> not_ready_to_merge_source;
   map<pg_t,pg_t> not_ready_to_merge_target;
   set<pg_t> sent_ready_to_merge_source;
 
-  void set_ready_to_merge_source(PG *pg);
+  void set_ready_to_merge_source(PG *pg,
+                                eversion_t version);
   void set_ready_to_merge_target(PG *pg,
+                                eversion_t version,
                                 epoch_t last_epoch_started,
                                 epoch_t last_epoch_clean);
   void set_not_ready_to_merge_source(pg_t source);
index f7aeabea198174cb467b0d5dc90240b3706a7d9e..0fce841bafdd8ea2432a78ff790d9b4631316b97 100644 (file)
@@ -2378,11 +2378,12 @@ void PG::try_mark_clean()
        if (target) {
          ldout(cct, 10) << "ready to merge (target)" << dendl;
          osd->set_ready_to_merge_target(this,
+                                        info.last_update,
                                         info.history.last_epoch_started,
                                         info.history.last_epoch_clean);
        } else {
          ldout(cct, 10) << "ready to merge (source)" << dendl;
-         osd->set_ready_to_merge_source(this);
+         osd->set_ready_to_merge_source(this, info.last_update);
        }
       }
     } else {
@@ -2706,8 +2707,7 @@ void PG::finish_split_stats(const object_stat_sum_t& stats, ObjectStore::Transac
 
 void PG::merge_from(map<spg_t,PGRef>& sources, RecoveryCtx *rctx,
                    unsigned split_bits,
-                   epoch_t dec_last_epoch_started,
-                   epoch_t dec_last_epoch_clean)
+                   const pg_merge_meta_t& last_pg_merge_meta)
 {
   dout(10) << __func__ << " from " << sources << " split_bits " << split_bits
           << dendl;
@@ -2792,15 +2792,15 @@ void PG::merge_from(map<spg_t,PGRef>& sources, RecoveryCtx *rctx,
     // remapped in concert with each other...
     info.history = sources.begin()->second->info.history;
 
-    // we use the pg_num_dec_last_epoch_{started,clean} we got from
+    // we use the last_epoch_{started,clean} we got from
     // the caller, which are the epochs that were reported by the PGs were
     // found to be ready for merge.
-    info.history.last_epoch_clean = dec_last_epoch_clean;
-    info.history.last_epoch_started = dec_last_epoch_started;
-    info.last_epoch_started = dec_last_epoch_started;
+    info.history.last_epoch_clean = last_pg_merge_meta.last_epoch_clean;
+    info.history.last_epoch_started = last_pg_merge_meta.last_epoch_started;
+    info.last_epoch_started = last_pg_merge_meta.last_epoch_started;
     dout(10) << __func__
-            << " set les/c to " << dec_last_epoch_started << "/"
-            << dec_last_epoch_clean
+            << " set les/c to " << last_pg_merge_meta.last_epoch_started << "/"
+            << last_pg_merge_meta.last_epoch_clean
             << " from pool last_dec_*, source pg history was "
             << sources.begin()->second->info.history
             << dendl;
index b1f3db006504a24285b10223a1c3ea16f54b9166..52fba8b5b0bb559f498e3d900c07eacc5696dd84 100644 (file)
@@ -414,8 +414,7 @@ public:
   void split_into(pg_t child_pgid, PG *child, unsigned split_bits);
   void merge_from(map<spg_t,PGRef>& sources, RecoveryCtx *rctx,
                  unsigned split_bits,
-                 epoch_t dec_last_epoch_started,
-                 epoch_t dec_last_epoch_clean);
+                 const pg_merge_meta_t& last_pg_merge_meta);
   void finish_split_stats(const object_stat_sum_t& stats, ObjectStore::Transaction *t);
 
   void scrub(epoch_t queued, ThreadPool::TPHandle &handle);
index fce9cbee788ac21afa5f57a8140684f7d4cdcc80..9aeda158a7933a06a173095706a6ae325049385a 100644 (file)
@@ -1337,10 +1337,7 @@ void pg_pool_t::dump(Formatter *f) const
   f->dump_unsigned("pg_placement_num_target", get_pgp_num_target());
   f->dump_unsigned("pg_num_target", get_pg_num_target());
   f->dump_unsigned("pg_num_pending", get_pg_num_pending());
-  f->dump_unsigned("pg_num_dec_last_epoch_started",
-                  get_pg_num_dec_last_epoch_started());
-  f->dump_unsigned("pg_num_dec_last_epoch_clean",
-                  get_pg_num_dec_last_epoch_clean());
+  f->dump_object("last_pg_merge_meta", last_pg_merge_meta);
   f->dump_stream("last_change") << get_last_change();
   f->dump_stream("last_force_op_resend") << get_last_force_op_resend();
   f->dump_stream("last_force_op_resend_prenautilus")
@@ -1747,7 +1744,7 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
     return;
   }
 
-  uint8_t v = 28;
+  uint8_t v = 29;
   // NOTE: any new encoding dependencies must be reflected by
   // SIGNIFICANT_FEATURES
   if (!(features & CEPH_FEATURE_NEW_OSDOP_ENCODING)) {
@@ -1842,17 +1839,20 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
     encode(pg_num_target, bl);
     encode(pgp_num_target, bl);
     encode(pg_num_pending, bl);
-    encode(pg_num_dec_last_epoch_started, bl);
-    encode(pg_num_dec_last_epoch_clean, bl);
+    encode((epoch_t)0, bl);  // pg_num_dec_last_epoch_started from 14.1.[01]
+    encode((epoch_t)0, bl);  // pg_num_dec_last_epoch_clean from 14.1.[01]
     encode(last_force_op_resend, bl);
     encode(pg_autoscale_mode, bl);
   }
+  if (v >= 29) {
+    encode(last_pg_merge_meta, bl);
+  }
   ENCODE_FINISH(bl);
 }
 
 void pg_pool_t::decode(bufferlist::const_iterator& bl)
 {
-  DECODE_START_LEGACY_COMPAT_LEN(28, 5, 5, bl);
+  DECODE_START_LEGACY_COMPAT_LEN(29, 5, 5, bl);
   decode(type, bl);
   decode(size, bl);
   decode(crush_rule, bl);
@@ -2009,10 +2009,14 @@ void pg_pool_t::decode(bufferlist::const_iterator& bl)
     decode(pg_num_target, bl);
     decode(pgp_num_target, bl);
     decode(pg_num_pending, bl);
-    decode(pg_num_dec_last_epoch_started, bl);
-    decode(pg_num_dec_last_epoch_clean, bl);
+    epoch_t e;
+    decode(e, bl);
+    decode(e, bl);
     decode(last_force_op_resend, bl);
     decode(pg_autoscale_mode, bl);
+    if (struct_v >= 29) {
+      decode(last_pg_merge_meta, bl);
+    }
   } else {
     pg_num_target = pg_num;
     pgp_num_target = pgp_num;
@@ -2040,8 +2044,8 @@ void pg_pool_t::generate_test_instances(list<pg_pool_t*>& o)
   a.pgp_num_target = 4;
   a.pg_num_target = 5;
   a.pg_num_pending = 5;
-  a.pg_num_dec_last_epoch_started = 2;
-  a.pg_num_dec_last_epoch_clean = 3;
+  a.last_pg_merge_meta.last_epoch_started = 2;
+  a.last_pg_merge_meta.last_epoch_clean = 2;
   a.last_change = 9;
   a.last_force_op_resend = 123823;
   a.last_force_op_resend_preluminous = 123824;
@@ -2112,10 +2116,6 @@ ostream& operator<<(ostream& out, const pg_pool_t& p)
   }
   if (p.get_pg_num_pending() != p.get_pg_num()) {
     out << " pg_num_pending " << p.get_pg_num_pending();
-    if (p.get_pg_num_dec_last_epoch_started() ||
-       p.get_pg_num_dec_last_epoch_clean())
-      out << " dles/c " << p.get_pg_num_dec_last_epoch_started()
-         << "/" << p.get_pg_num_dec_last_epoch_clean();
   }
   if (p.pg_autoscale_mode) {
     out << " autoscale_mode " << p.get_pg_autoscale_mode_name(p.pg_autoscale_mode);
index bfde9710e0b56d7dcbe308b39d434a9aaf8e55c9..2edd539507e0f47d4f70a668cce7b0bdb2d0accd 100644 (file)
@@ -1080,6 +1080,45 @@ private:
 };
 WRITE_CLASS_ENCODER_FEATURES(pool_opts_t)
 
+struct pg_merge_meta_t {
+  pg_t source_pgid;
+  epoch_t ready_epoch = 0;
+  epoch_t last_epoch_started = 0;
+  epoch_t last_epoch_clean = 0;
+  eversion_t source_version;
+  eversion_t target_version;
+
+  void encode(bufferlist& bl) const {
+    ENCODE_START(1, 1, bl);
+    encode(source_pgid, bl);
+    encode(ready_epoch, bl);
+    encode(last_epoch_started, bl);
+    encode(last_epoch_clean, bl);
+    encode(source_version, bl);
+    encode(target_version, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(bufferlist::const_iterator& p) {
+    DECODE_START(1, p);
+    decode(source_pgid, p);
+    decode(ready_epoch, p);
+    decode(last_epoch_started, p);
+    decode(last_epoch_clean, p);
+    decode(source_version, p);
+    decode(target_version, p);
+    DECODE_FINISH(p);
+  }
+  void dump(Formatter *f) const {
+    f->dump_stream("source_pgid") << source_pgid;
+    f->dump_unsigned("ready_epoch", ready_epoch);
+    f->dump_unsigned("last_epoch_started", last_epoch_started);
+    f->dump_unsigned("last_epoch_clean", last_epoch_clean);
+    f->dump_stream("source_version") << source_version;
+    f->dump_stream("target_version") << target_version;
+  }
+};
+WRITE_CLASS_ENCODER(pg_merge_meta_t)
+
 /*
  * pg_pool
  */
@@ -1306,10 +1345,9 @@ public:
   /// last epoch that forced clients to resend (pre-luminous clients only)
   epoch_t last_force_op_resend_preluminous = 0;
 
-  /// last_epoch_started preceding pg_num decrement request
-  epoch_t pg_num_dec_last_epoch_started = 0;
-  /// last_epoch_clean preceding pg_num decrement request
-  epoch_t pg_num_dec_last_epoch_clean = 0;
+  /// metadata for the most recent PG merge
+  pg_merge_meta_t last_pg_merge_meta;
+  
   snapid_t snap_seq;        ///< seq for per-pool snapshot
   epoch_t snap_epoch;       ///< osdmap epoch of last snap
   uint64_t auid;            ///< who owns the pg
@@ -1565,13 +1603,6 @@ public:
   // pool size that it represents.
   unsigned get_pg_num_divisor(pg_t pgid) const;
 
-  epoch_t get_pg_num_dec_last_epoch_started() const {
-    return pg_num_dec_last_epoch_started;
-  }
-  epoch_t get_pg_num_dec_last_epoch_clean() const {
-    return pg_num_dec_last_epoch_clean;
-  }
-
   bool is_pending_merge(pg_t pgid, bool *target) const;
 
   void set_pg_num(int p) {
@@ -1593,11 +1624,19 @@ public:
   void set_pgp_num_target(int p) {
     pgp_num_target = p;
   }
-  void dec_pg_num(epoch_t last_epoch_started,
+  void dec_pg_num(pg_t source_pgid,
+                 epoch_t ready_epoch,
+                 eversion_t source_version,
+                 eversion_t target_version,
+                 epoch_t last_epoch_started,
                  epoch_t last_epoch_clean) {
     --pg_num;
-    pg_num_dec_last_epoch_started = last_epoch_started;
-    pg_num_dec_last_epoch_clean = last_epoch_clean;
+    last_pg_merge_meta.source_pgid = source_pgid;
+    last_pg_merge_meta.ready_epoch = ready_epoch;
+    last_pg_merge_meta.source_version = source_version;
+    last_pg_merge_meta.target_version = target_version;
+    last_pg_merge_meta.last_epoch_started = last_epoch_started;
+    last_pg_merge_meta.last_epoch_clean = last_epoch_clean;
     calc_pg_masks();
   }