]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: pglog: with config, don't assert in the presence of stale divergent_priors 14648/head
authorGreg Farnum <gfarnum@redhat.com>
Fri, 7 Apr 2017 21:33:20 +0000 (14:33 -0700)
committerGreg Farnum <gfarnum@redhat.com>
Thu, 20 Apr 2017 00:57:52 +0000 (17:57 -0700)
Fixes: http://tracker.ceph.com/issues/17916
Signed-off-by: Greg Farnum <gfarnum@redhat.com>
src/common/config_opts.h
src/osd/PG.cc
src/osd/PGLog.h
src/tools/ceph_objectstore_tool.cc

index 2c8f947795b2fc09d9236baf22493276516bc2e6..bf65e4ffd0d5d2a079dcb71663914fc23d4804dc 100644 (file)
@@ -751,6 +751,8 @@ OPTION(osd_op_num_shards, OPT_INT, 5)
 OPTION(osd_op_queue, OPT_STR, "wpq") // PrioritzedQueue (prio), Weighted Priority Queue (wpq), or debug_random
 OPTION(osd_op_queue_cut_off, OPT_STR, "low") // Min priority to go to strict queue. (low, high, debug_random)
 
+OPTION(osd_ignore_stale_divergent_priors, OPT_BOOL, false) // do not assert on divergent_prior entries which aren't in the log and whose on-disk objects are newer
+
 // Set to true for testing.  Users should NOT set this.
 // If set to true even after reading enough shards to
 // decode the object, any error will be reported.
index aabb1290a722d127c98c0de13100f7f77afa584c..f8c14131f6dbe1040552c1e2696ae231ca743822 100644 (file)
@@ -3289,6 +3289,7 @@ void PG::read_state(ObjectStore *store, bufferlist &bl)
     ghobject_t(info_struct_v < 8 ? OSD::make_pg_log_oid(pg_id) : pgmeta_oid),
     info,
     oss,
+    cct->_conf->osd_ignore_stale_divergent_priors,
     cct->_conf->osd_debug_verify_missing_on_start);
   if (oss.tellp())
     osd->clog->error() << oss.rdbuf();
index 5096e203179cc922b3c974ed11dd6da1fd0618b1..e4d0fa304ab326489342d6c72f900608ac6705c9 100644 (file)
@@ -1098,11 +1098,13 @@ public:
     coll_t log_coll, ghobject_t log_oid,
     const pg_info_t &info,
     ostringstream &oss,
+    bool tolerate_divergent_missing_log,
     bool debug_verify_stored_missing = false
     ) {
     return read_log_and_missing(
       store, pg_coll, log_coll, log_oid, info,
       log, missing, oss,
+      tolerate_divergent_missing_log,
       &clear_divergent_priors,
       this,
       (pg_log_debug ? &log_keys_debug : 0),
@@ -1115,6 +1117,7 @@ public:
     const pg_info_t &info,
     IndexedLog &log,
     missing_type &missing, ostringstream &oss,
+    bool tolerate_divergent_missing_log,
     bool *clear_divergent_priors = NULL,
     const DoutPrefixProvider *dpp = NULL,
     set<string> *log_keys_debug = 0,
@@ -1290,7 +1293,20 @@ public:
                 * version would not have been recovered, and a newer version
                 * would show up in the log above.
                 */
-             assert(oi.version == i->first);
+               /**
+                * Unfortunately the assessment above is incorrect because of
+                * http://tracker.ceph.com/issues/17916 (we were incorrectly
+                * not removing the divergent_priors set from disk state!),
+                * so let's check that.
+                */
+             if (oi.version > i->first && tolerate_divergent_missing_log) {
+               ldpp_dout(dpp, 0) << "read_log divergent_priors entry (" << *i
+                                 << ") inconsistent with disk state (" <<  oi
+                                 << "), assuming it is tracker.ceph.com/issues/17916"
+                                 << dendl;
+             } else {
+               assert(oi.version == i->first);
+             }
            } else {
              ldpp_dout(dpp, 15) << "read_log_and_missing  missing " << *i << dendl;
              missing.add(i->second, i->first, eversion_t());
index f9bf85a5d63a848ff0f517efddb0ab0aefb8f0e8..c4897805c382da86feb749eb35bd1702682bcada 100644 (file)
@@ -328,7 +328,8 @@ int get_log(ObjectStore *fs, __u8 struct_ver,
     PGLog::read_log_and_missing(fs, coll,
                    struct_ver >= 8 ? coll : coll_t::meta(),
                    struct_ver >= 8 ? pgid.make_pgmeta_oid() : log_oid,
-                   info, log, missing, oss);
+                   info, log, missing, oss,
+                   g_ceph_context->_conf->osd_ignore_stale_divergent_priors);
     if (debug && oss.str().size())
       cerr << oss.str() << std::endl;
   }