From: Sage Weil Date: Tue, 7 Sep 2010 17:01:58 +0000 (-0700) Subject: osd: log error instead of crashing on failed pull attempt X-Git-Tag: v0.22~202 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7f9bab9ee7346e7b1aa2edd9dbe0e52084776b30;p=ceph.git osd: log error instead of crashing on failed pull attempt If peering screws up and the primary mistakenly tries to pull an object from us we don't have, log an error instead of crashing. This will still throw off recovery (it will hang), but that's better than crashing outright. --- diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 5c3e51b6a87c..d95b2e84a9c9 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -3018,14 +3018,24 @@ void ReplicatedPG::sub_op_pull(MOSDSubOp *op) struct stat st; int r = osd->store->stat(coll_t::build_pg_coll(info.pgid), soid, &st); - assert(r == 0); - uint64_t size = st.st_size; + if (r != 0) { + stringstream ss; + char buf[80]; + ss << op->get_source() << " tried to pull " << soid << " in " << info.pgid + << " but got " << strerror_r(-r, buf, sizeof(buf)); + osd->logclient.log(LOG_ERROR, ss); + + // FIXME: do something more intelligent.. mark the pg as needing repair? + + } else { + uint64_t size = st.st_size; - bool complete = false; - if (!op->data_subset.empty() && op->data_subset.end() >= size) - complete = true; + bool complete = false; + if (!op->data_subset.empty() && op->data_subset.end() >= size) + complete = true; - send_push_op(soid, op->get_source().num(), size, op->first, complete, op->data_subset, op->clone_subsets); + send_push_op(soid, op->get_source().num(), size, op->first, complete, op->data_subset, op->clone_subsets); + } op->put(); }