]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: pglog: with config, don't assert in the presence of stale divergent_priors 14646/head
authorGreg Farnum <gfarnum@redhat.com>
Fri, 7 Apr 2017 21:33:20 +0000 (14:33 -0700)
committerGreg Farnum <gfarnum@redhat.com>
Thu, 20 Apr 2017 00:59:33 +0000 (17:59 -0700)
Fixes: http://tracker.ceph.com/issues/17916
Signed-off-by: Greg Farnum <gfarnum@redhat.com>
src/common/config_opts.h
src/osd/PG.cc
src/osd/PGLog.h
src/tools/ceph_objectstore_tool.cc

index 673da99a8acf0846acd0be9255f11dcb4cd0dfb0..b8ec0f2590d29f286c605d4fe82889927d23e02e 100644 (file)
@@ -731,6 +731,8 @@ OPTION(osd_op_num_shards, OPT_INT, 5)
 OPTION(osd_op_queue, OPT_STR, "wpq") // PrioritzedQueue (prio), Weighted Priority Queue (wpq), or debug_random
 OPTION(osd_op_queue_cut_off, OPT_STR, "low") // Min priority to go to strict queue. (low, high, debug_random)
 
+OPTION(osd_ignore_stale_divergent_priors, OPT_BOOL, false) // do not assert on divergent_prior entries which aren't in the log and whose on-disk objects are newer
+
 // Set to true for testing.  Users should NOT set this.
 // If set to true even after reading enough shards to
 // decode the object, any error will be reported.
index 068d337fe58b015e87c1698b4d10ef09d15dbb80..a63583e543c80e1fcc38a6eec3ffe7d52de93748 100644 (file)
@@ -3251,6 +3251,7 @@ void PG::read_state(ObjectStore *store, bufferlist &bl)
     ghobject_t(info_struct_v < 8 ? OSD::make_pg_log_oid(pg_id) : pgmeta_oid),
     info,
     oss,
+    cct->_conf->osd_ignore_stale_divergent_priors,
     cct->_conf->osd_debug_verify_missing_on_start);
   if (oss.tellp())
     osd->clog->error() << oss.rdbuf();
index 8c1ce7ee997eaeea76e94c34a08326b419409b3c..a2e9e696179f7b9d155cce43fbf09debbeb22a84 100644 (file)
@@ -1097,11 +1097,13 @@ public:
     coll_t log_coll, ghobject_t log_oid,
     const pg_info_t &info,
     ostringstream &oss,
+    bool tolerate_divergent_missing_log,
     bool debug_verify_stored_missing = false
     ) {
     return read_log_and_missing(
       store, pg_coll, log_coll, log_oid, info,
       log, missing, oss,
+      tolerate_divergent_missing_log,
       &clear_divergent_priors,
       this,
       (pg_log_debug ? &log_keys_debug : 0),
@@ -1114,6 +1116,7 @@ public:
     const pg_info_t &info,
     IndexedLog &log,
     missing_type &missing, ostringstream &oss,
+    bool tolerate_divergent_missing_log,
     bool *clear_divergent_priors = NULL,
     const DoutPrefixProvider *dpp = NULL,
     set<string> *log_keys_debug = 0,
@@ -1289,7 +1292,20 @@ public:
                 * version would not have been recovered, and a newer version
                 * would show up in the log above.
                 */
-             assert(oi.version == i->first);
+               /**
+                * Unfortunately the assessment above is incorrect because of
+                * http://tracker.ceph.com/issues/17916 (we were incorrectly
+                * not removing the divergent_priors set from disk state!),
+                * so let's check that.
+                */
+             if (oi.version > i->first && tolerate_divergent_missing_log) {
+               ldpp_dout(dpp, 0) << "read_log divergent_priors entry (" << *i
+                                 << ") inconsistent with disk state (" <<  oi
+                                 << "), assuming it is tracker.ceph.com/issues/17916"
+                                 << dendl;
+             } else {
+               assert(oi.version == i->first);
+             }
            } else {
              ldpp_dout(dpp, 15) << "read_log_and_missing  missing " << *i << dendl;
              missing.add(i->second, i->first, eversion_t());
index d5ce63a98c1bb83be410926ad1a6d31546d843e2..0d5eb87c7faba7b57e625e05b07db4f1abbd37a0 100644 (file)
@@ -329,7 +329,8 @@ int get_log(ObjectStore *fs, __u8 struct_ver,
     PGLog::read_log_and_missing(fs, coll,
                    struct_ver >= 8 ? coll : coll_t::meta(),
                    struct_ver >= 8 ? pgid.make_pgmeta_oid() : log_oid,
-                   info, log, missing, oss);
+                   info, log, missing, oss,
+                   g_ceph_context->_conf->osd_ignore_stale_divergent_priors);
     if (debug && oss.str().size())
       cerr << oss.str() << std::endl;
   }