From: David Zafman Date: Thu, 15 Oct 2015 03:42:48 +0000 (-0700) Subject: osd: Fix trim_object() to not crash on corrupt snapset X-Git-Tag: v10.0.0~30^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=eb0ca424815e94c78a2d09dbf787d102172f4ddf;p=ceph.git osd: Fix trim_object() to not crash on corrupt snapset Add test case to check for trim_object() osd crash Signed-off-by: David Zafman --- diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 5b11786af20f..e69c05a773ec 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -3175,13 +3175,19 @@ ReplicatedPG::RepGather *ReplicatedPG::trim_object(const hobject_t &coid) object_info_t &coi = obc->obs.oi; set old_snaps(coi.snaps.begin(), coi.snaps.end()); - assert(old_snaps.size()); + if (old_snaps.empty()) { + osd->clog->error() << __func__ << " No object info snaps for " << coid << "\n"; + return NULL; + } SnapSet& snapset = obc->ssc->snapset; dout(10) << coid << " old_snaps " << old_snaps << " old snapset " << snapset << dendl; - assert(snapset.seq); + if (snapset.seq == 0) { + osd->clog->error() << __func__ << " No snapset.seq for " << coid << "\n"; + return NULL; + } RepGather *repop = simple_repop_create(obc); OpContext *ctx = repop->ctx; @@ -3210,7 +3216,11 @@ ReplicatedPG::RepGather *ReplicatedPG::trim_object(const hobject_t &coid) for (p = snapset.clones.begin(); p != snapset.clones.end(); ++p) if (*p == last) break; - assert(p != snapset.clones.end()); + if (p == snapset.clones.end()) { + osd->clog->error() << __func__ << " Snap " << coid.snap << " not in clones" << "\n"; + return NULL; + } + ctx->delta_stats.num_bytes -= snapset.get_clone_bytes(last); if (p != snapset.clones.begin()) { diff --git a/src/test/osd/osd-scrub-snaps.sh b/src/test/osd/osd-scrub-snaps.sh index fbc93be8f446..569a332c9822 100755 --- a/src/test/osd/osd-scrub-snaps.sh +++ b/src/test/osd/osd-scrub-snaps.sh @@ -132,10 +132,23 @@ sleep 5 ./ceph pg scrub 1.0 timeout 30 ./ceph -w -./stop.sh +for i in `seq 1 7` +do + ./rados -p test rmsnap snap$i +done + +sleep 10 ERRORS=0 +if ! killall ceph-osd +then + echo "OSD crash occurred" + ERRORS=$(expr $ERRORS + 1) +fi + +./stop.sh + declare -a err_strings err_strings[0]="log_channel[(]cluster[)] log [[]ERR[]] : scrub 1.0 1/2acecc8b/obj10/1 is missing in clone_overlap" err_strings[1]="log_channel[(]cluster[)] log [[]ERR[]] : scrub 1.0 1/666934a3/obj5/7 no '_' attr"