From 47f2efb2c36925df644277b22412ad948fdc9e39 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 7 Sep 2010 10:01:58 -0700 Subject: [PATCH] osd: log error instead of crashing on failed pull attempt If peering screws up and the primary mistakenly tries to pull an object from us we don't have, log an error instead of crashing. This will still throw off recovery (it will hang), but that's better than crashing outright. --- src/osd/ReplicatedPG.cc | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index aba9a0c64ee71..b99476a4b7a80 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -3377,14 +3377,24 @@ void ReplicatedPG::sub_op_pull(MOSDSubOp *op) struct stat st; int r = osd->store->stat(coll_t::build_pg_coll(info.pgid), soid, &st); - assert(r == 0); - uint64_t size = st.st_size; + if (r != 0) { + stringstream ss; + char buf[80]; + ss << op->get_source() << " tried to pull " << soid << " in " << info.pgid + << " but got " << strerror_r(-r, buf, sizeof(buf)); + osd->logclient.log(LOG_ERROR, ss); + + // FIXME: do something more intelligent.. mark the pg as needing repair? + + } else { + uint64_t size = st.st_size; - bool complete = false; - if (!op->data_subset.empty() && op->data_subset.end() >= size) - complete = true; + bool complete = false; + if (!op->data_subset.empty() && op->data_subset.end() >= size) + complete = true; - send_push_op(soid, op->get_source().num(), size, op->first, complete, op->data_subset, op->clone_subsets); + send_push_op(soid, op->get_source().num(), size, op->first, complete, op->data_subset, op->clone_subsets); + } op->put(); } -- 2.39.5