]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Handle repair when no object_info_t so eversion_t not known at primary
authorDavid Zafman <dzafman@redhat.com>
Fri, 5 May 2017 03:45:55 +0000 (20:45 -0700)
committerDavid Zafman <dzafman@redhat.com>
Fri, 23 Jun 2017 15:02:51 +0000 (08:02 -0700)
Signed-off-by: David Zafman <dzafman@redhat.com>
src/osd/PrimaryLogPG.cc
src/osd/ReplicatedBackend.cc

index f8a4e910b9647f48eb15c07f40bac5bb71e1e989..6d726afccf072c33e1394d731d540ed597c3a523 100644 (file)
@@ -13877,21 +13877,25 @@ int PrimaryLogPG::rep_repair_primary_object(const hobject_t& soid, OpRequestRef
 
   assert(!pg_log.get_missing().is_missing(soid));
   bufferlist bv;
-  int r = get_pgbackend()->objects_get_attr(soid, OI_ATTR, &bv);
-  if (r < 0)
-    return r;
   object_info_t oi;
-  try {
+  eversion_t v;
+  int r = get_pgbackend()->objects_get_attr(soid, OI_ATTR, &bv);
+  if (r < 0) {
+    // Leave v and try to repair without a version, getting attr failed
+    dout(0) << __func__ << ": Need version of replica, objects_get_attr failed: "
+           << soid << " error=" << r << dendl;
+  } else try {
     bufferlist::iterator bliter = bv.begin();
     ::decode(oi, bliter);
+    v = oi.version;
   } catch (...) {
-    dout(0) << __func__ << ":  bad object_info_t: " << soid << dendl;
-    // XXX: Too bad I can't get the version to recover, so can't repair
-    return -EIO;
+    // Leave v as default constructed. This will fail when sent to older OSDs, but
+    // not much worse than failing here.
+    dout(0) << __func__ << ": Need version of replica, bad object_info_t: " << soid << dendl;
   }
 
-  missing_loc.add_missing(soid, oi.version, eversion_t());
-  if (primary_error(soid, oi.version)) {
+  missing_loc.add_missing(soid, v, eversion_t());
+  if (primary_error(soid, v)) {
     dout(0) << __func__ << " No other replicas available for " << soid << dendl;
     // XXX: If we knew that there is no down osd which could include this
     // object, it would be nice if we could return EIO here.
index 9bb7abc34bee5ee9c2bb687c38ed8071c6c79f14..12010168bcbc48c8bc4b2862bed753465144f6d8 100644 (file)
@@ -1750,6 +1750,10 @@ bool ReplicatedBackend::handle_pull_response(
     pi.recovery_info.copy_subset.intersection_of(
       pop.recovery_info.copy_subset);
   }
+  // If primary doesn't have object info and didn't know version
+  if (pi.recovery_info.version == eversion_t()) {
+    pi.recovery_info.version = pop.version;
+  }
 
   bool first = pi.recovery_progress.first;
   if (first) {
@@ -1931,6 +1935,7 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info,
          << " recovery_info: " << recovery_info
           << dendl;
 
+  eversion_t v  = recovery_info.version;
   if (progress.first) {
     int r = store->omap_get_header(coll, ghobject_t(recovery_info.soid), &out_op->omap_header);
     if(r < 0) {
@@ -1945,9 +1950,19 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info,
 
     // Debug
     bufferlist bv = out_op->attrset[OI_ATTR];
-    object_info_t oi(bv);
+    object_info_t oi;
+    try {
+     bufferlist::iterator bliter = bv.begin();
+     ::decode(oi, bliter);
+    } catch (...) {
+      dout(0) << __func__ << ": bad object_info_t: " << recovery_info.soid << dendl;
+      return -EINVAL;
+    }
 
-    if (oi.version != recovery_info.version) {
+    // If requestor didn't know the version, use ours
+    if (v == eversion_t()) {
+      v = oi.version;
+    } else if (oi.version != v) {
       get_parent()->clog_error() << get_info().pgid << " push "
                                 << recovery_info.soid << " v "
                                 << recovery_info.version
@@ -1958,6 +1973,9 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info,
 
     new_progress.first = false;
   }
+  // Once we provide the version subsequent requests will have it, so
+  // at this point it must be known.
+  assert(v != eversion_t());
 
   uint64_t available = cct->_conf->osd_recovery_max_chunk;
   if (!progress.omap_complete) {
@@ -2057,7 +2075,7 @@ int ReplicatedBackend::build_push_op(const ObjectRecoveryInfo &recovery_info,
   get_parent()->get_logger()->inc(l_osd_push_outb, out_op->data.length());
 
   // send
-  out_op->version = recovery_info.version;
+  out_op->version = v;
   out_op->soid = recovery_info.soid;
   out_op->recovery_info = recovery_info;
   out_op->after_progress = new_progress;