From: David Zafman Date: Thu, 8 Jun 2017 19:48:02 +0000 (-0700) Subject: osd: On errors during snaptrim stop so pg can be repaired X-Git-Tag: v12.1.2~1^2~6^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=658a2f63b98bab9f23ae31c23b22866e5c92d55d;p=ceph.git osd: On errors during snaptrim stop so pg can be repaired Fix get_object_context() to return errors for trim_object() Have trim_object() return errors to trimming caller Add pg state snaptrim_error to indicate that a trim has been stopped Remove queue_snap_trim bool, just check snap_trimq after scrub finishes Catch errors from snapset decoding in get_snapset_context() Fixes: http://tracker.ceph.com/issues/13837 Signed-off-by: David Zafman --- diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index c0277b7a518a..8c74bb80d86f 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -2589,6 +2589,8 @@ void PGMap::get_health( note["backfill_toofull"] += p->second; if (p->first & PG_STATE_RECOVERY_TOOFULL) note["recovery_toofull"] += p->second; + if (p->first & PG_STATE_SNAPTRIM_ERROR) + note["snaptrim_error"] += p->second; } mempool::pgmap::unordered_map stuck_pgs; diff --git a/src/osd/PG.cc b/src/osd/PG.cc index fbc5fdfd3d5b..e8841a396825 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -994,7 +994,7 @@ void PG::clear_primary_state() PG::Scrubber::Scrubber() : reserved(false), reserve_failed(false), epoch_start(0), - active(false), queue_snap_trim(false), + active(false), waiting_on(0), shallow_errors(0), deep_errors(0), fixed(0), must_scrub(false), must_deep_scrub(false), must_repair(false), auto_repair(false), @@ -4610,6 +4610,11 @@ void PG::chunky_scrub(ThreadPool::TPHandle &handle) scrubber.state = PG::Scrubber::INACTIVE; done = true; + if (!snap_trimq.empty()) { + dout(10) << "scrub finished, requeuing snap_trimmer" << dendl; + snap_trimmer_scrub_complete(); + } + break; default: @@ -4634,11 +4639,6 @@ void PG::scrub_clear_state() requeue_ops(waiting_for_scrub); - if (scrubber.queue_snap_trim) { - dout(10) << "scrub finished, requeuing snap_trimmer" << dendl; - snap_trimmer_scrub_complete(); - } - scrubber.reset(); // type-specific state clear diff --git a/src/osd/PG.h b/src/osd/PG.h index e93549e9f83f..6fe9ba9feb28 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1190,7 +1190,6 @@ public: // common to both scrubs bool active; - bool queue_snap_trim; int waiting_on; set waiting_on_whom; int shallow_errors; @@ -1292,7 +1291,6 @@ public: // clear all state void reset() { active = false; - queue_snap_trim = false; waiting_on = 0; waiting_on_whom.clear(); if (active_rep_scrub) { diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 3f5adc1f33d0..fc89d8eb11f7 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -3612,24 +3612,29 @@ void PrimaryLogPG::do_backfill_remove(OpRequestRef op) assert(r == 0); } -PrimaryLogPG::OpContextUPtr PrimaryLogPG::trim_object( - bool first, const hobject_t &coid) +int PrimaryLogPG::trim_object( + bool first, const hobject_t &coid, PrimaryLogPG::OpContextUPtr *ctxp) { + *ctxp = NULL; // load clone info bufferlist bl; ObjectContextRef obc = get_object_context(coid, false, NULL); - if (!obc) { - derr << __func__ << " could not find coid " << coid << dendl; - ceph_abort(); + if (!obc || !obc->ssc || !obc->ssc->exists) { + osd->clog->error() << __func__ << ": Can not trim " << coid + << " repair needed " << (obc ? "(no obc->ssc or !exists)" : "(no obc)"); + return -ENOENT; } - assert(obc->ssc); hobject_t snapoid( coid.oid, coid.get_key(), obc->ssc->snapset.head_exists ? CEPH_NOSNAP:CEPH_SNAPDIR, coid.get_hash(), info.pgid.pool(), coid.get_namespace()); ObjectContextRef snapset_obc = get_object_context(snapoid, false); - assert(snapset_obc); + if (!snapset_obc) { + osd->clog->error() << __func__ << ": Can not trim " << coid + << " repair needed, no snapset obc for " << snapoid; + return -ENOENT; + } SnapSet& snapset = obc->ssc->snapset; @@ -3645,21 +3650,21 @@ PrimaryLogPG::OpContextUPtr PrimaryLogPG::trim_object( if (p == snapset.clone_snaps.end()) { osd->clog->error() << __func__ << " No clone_snaps in snapset " << snapset << " for " << coid << "\n"; - return NULL; + return -ENOENT; } old_snaps.insert(snapset.clone_snaps[coid.snap].begin(), snapset.clone_snaps[coid.snap].end()); } if (old_snaps.empty()) { osd->clog->error() << __func__ << " No object info snaps for " << coid; - return NULL; + return -ENOENT; } dout(10) << coid << " old_snaps " << old_snaps << " old snapset " << snapset << dendl; if (snapset.seq == 0) { osd->clog->error() << __func__ << " No snapset.seq for " << coid; - return NULL; + return -ENOENT; } set new_snaps; @@ -3676,7 +3681,7 @@ PrimaryLogPG::OpContextUPtr PrimaryLogPG::trim_object( p = std::find(snapset.clones.begin(), snapset.clones.end(), coid.snap); if (p == snapset.clones.end()) { osd->clog->error() << __func__ << " Snap " << coid.snap << " not in clones"; - return NULL; + return -ENOENT; } } @@ -3689,7 +3694,7 @@ PrimaryLogPG::OpContextUPtr PrimaryLogPG::trim_object( first)) { close_op_ctx(ctx.release()); dout(10) << __func__ << ": Unable to get a wlock on " << coid << dendl; - return NULL; + return -ENOLCK; } if (!ctx->lock_manager.get_snaptrimmer_write( @@ -3698,7 +3703,7 @@ PrimaryLogPG::OpContextUPtr PrimaryLogPG::trim_object( first)) { close_op_ctx(ctx.release()); dout(10) << __func__ << ": Unable to get a wlock on " << snapoid << dendl; - return NULL; + return -ENOLCK; } ctx->at_version = get_next_version(); @@ -3885,7 +3890,8 @@ PrimaryLogPG::OpContextUPtr PrimaryLogPG::trim_object( t->setattrs(snapoid, attrs); } - return ctx; + *ctxp = std::move(ctx); + return 0; } void PrimaryLogPG::kick_snap_trim() @@ -9581,6 +9587,7 @@ ObjectContextRef PrimaryLogPG::get_object_context( object_info_t oi(soid); SnapSetContext *ssc = get_snapset_context( soid, true, 0, false); + assert(ssc); obc = create_object_context(oi, ssc); dout(10) << __func__ << ": " << obc << " " << soid << " " << obc->rwstate @@ -9628,7 +9635,13 @@ ObjectContextRef PrimaryLogPG::get_object_context( dout(10) << __func__ << ": creating obc from disk: " << obc << dendl; } - assert(obc->ssc); + + // XXX: Caller doesn't expect this + if (obc->ssc == NULL) { + derr << __func__ << ": obc->ssc not available, not returning context" << dendl; + return ObjectContextRef(); // -ENOENT! + } + dout(10) << __func__ << ": " << obc << " " << soid << " " << obc->rwstate << " oi: " << obc->obs.oi @@ -10005,7 +10018,12 @@ SnapSetContext *PrimaryLogPG::get_snapset_context( _register_snapset_context(ssc); if (bv.length()) { bufferlist::iterator bvp = bv.begin(); - ssc->snapset.decode(bvp); + try { + ssc->snapset.decode(bvp); + } catch (buffer::error& e) { + dout(0) << __func__ << " Can't decode snapset: " << e << dendl; + return NULL; + } ssc->exists = true; } else { ssc->exists = false; @@ -13883,7 +13901,6 @@ boost::statechart::result PrimaryLogPG::NotTrimming::react(const KickTrim&) } if (pg->scrubber.active) { ldout(pg->cct, 10) << " scrubbing, will requeue snap_trimmer after" << dendl; - pg->scrubber.queue_snap_trim = true; return transit< WaitScrub >(); } else { return transit< Trimming >(); @@ -13917,6 +13934,7 @@ PrimaryLogPG::AwaitAsyncWork::AwaitAsyncWork(my_context ctx) context< SnapTrimmer >().log_enter(state_name); context< SnapTrimmer >().pg->osd->queue_for_snap_trim(pg); pg->state_set(PG_STATE_SNAPTRIM); + pg->state_clear(PG_STATE_SNAPTRIM_ERROR); pg->publish_stats_to_osd(); } @@ -13975,18 +13993,26 @@ boost::statechart::result PrimaryLogPG::AwaitAsyncWork::react(const DoSnapWork&) for (auto &&object: to_trim) { // Get next ldout(pg->cct, 10) << "AwaitAsyncWork react trimming " << object << dendl; - OpContextUPtr ctx = pg->trim_object(in_flight.empty(), object); - if (!ctx) { - ldout(pg->cct, 10) << "could not get write lock on obj " - << object << dendl; - if (in_flight.empty()) { + OpContextUPtr ctx; + int error = pg->trim_object(in_flight.empty(), object, &ctx); + if (error) { + if (error == -ENOLCK) { + ldout(pg->cct, 10) << "could not get write lock on obj " + << object << dendl; + } else { + pg->state_set(PG_STATE_SNAPTRIM_ERROR); + ldout(pg->cct, 10) << "Snaptrim error=" << error << dendl; + } + if (!in_flight.empty()) { + ldout(pg->cct, 10) << "letting the ones we already started finish" << dendl; + return transit< WaitRepops >(); + } + if (error == -ENOLCK) { ldout(pg->cct, 10) << "waiting for it to clear" << dendl; return transit< WaitRWLock >(); - } else { - ldout(pg->cct, 10) << "letting the ones we already started finish" << dendl; - return transit< WaitRepops >(); + return transit< NotTrimming >(); } } @@ -13995,8 +14021,13 @@ boost::statechart::result PrimaryLogPG::AwaitAsyncWork::react(const DoSnapWork&) [pg, object, &in_flight]() { assert(in_flight.find(object) != in_flight.end()); in_flight.erase(object); - if (in_flight.empty()) - pg->snap_trimmer_machine.process_event(RepopsComplete()); + if (in_flight.empty()) { + if (pg->state_test(PG_STATE_SNAPTRIM_ERROR)) { + pg->snap_trimmer_machine.process_event(Reset()); + } else { + pg->snap_trimmer_machine.process_event(RepopsComplete()); + } + } }); pg->simple_opc_submit(std::move(ctx)); diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index f6c1b97d8a36..779b0eb05b7f 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -1386,7 +1386,7 @@ public: void handle_backoff(OpRequestRef& op); - OpContextUPtr trim_object(bool first, const hobject_t &coid); + int trim_object(bool first, const hobject_t &coid, OpContextUPtr *ctxp); void snap_trimmer(epoch_t e) override; void kick_snap_trim() override; void snap_trimmer_scrub_complete() override; @@ -1672,6 +1672,7 @@ private: pending = nullptr; auto *pg = context< SnapTrimmer >().pg; pg->state_clear(PG_STATE_SNAPTRIM_WAIT); + pg->state_clear(PG_STATE_SNAPTRIM_ERROR); pg->publish_stats_to_osd(); } }; diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 40d3138f459c..25d615325bda 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -834,6 +834,8 @@ std::string pg_state_string(int state) oss << "snaptrim+"; if (state & PG_STATE_SNAPTRIM_WAIT) oss << "snaptrim_wait+"; + if (state & PG_STATE_SNAPTRIM_ERROR) + oss << "snaptrim_error+"; string ret(oss.str()); if (ret.length() > 0) ret.resize(ret.length() - 1); @@ -891,6 +893,8 @@ int pg_string_state(const std::string& state) type = PG_STATE_SNAPTRIM; else if (state == "snaptrim_wait") type = PG_STATE_SNAPTRIM_WAIT; + else if (state == "snaptrim_error") + type = PG_STATE_SNAPTRIM_ERROR; else type = -1; return type; diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 92cc7c569876..f73d01528293 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -981,6 +981,7 @@ inline ostream& operator<<(ostream& out, const osd_stat_t& s) { #define PG_STATE_SNAPTRIM (1<<26) // trimming snaps #define PG_STATE_SNAPTRIM_WAIT (1<<27) // queued to trim snaps #define PG_STATE_RECOVERY_TOOFULL (1<<28) // recovery can't proceed: too full +#define PG_STATE_SNAPTRIM_ERROR (1<<29) // error stopped trimming snaps std::string pg_state_string(int state); std::string pg_vector_string(const vector &a);