From: Kefu Chai Date: Sat, 30 Jan 2016 07:22:49 +0000 (+0800) Subject: osd: persist inconsistent snapsets using omap X-Git-Tag: v10.1.0~303^2~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2009ed274d4a02a779ec5dc873c3a8ea46e56f1f;p=ceph.git osd: persist inconsistent snapsets using omap the inconsistent snapsets are identified in ReplicatedPG::_scrub() after we compared the authorized objects with their replica/shards. these inconsistent information are stored in the omap of objects with prefix "SCRUB_SS_". Signed-off-by: Kefu Chai --- diff --git a/src/common/scrub_types.cc b/src/common/scrub_types.cc index 04a5a3f855b3..91ac141b847a 100644 --- a/src/common/scrub_types.cc +++ b/src/common/scrub_types.cc @@ -141,3 +141,69 @@ void inconsistent_obj_wrapper::decode(bufferlist::iterator& bp) ::decode(shards, bp); DECODE_FINISH(bp); } + +inconsistent_snapset_wrapper::inconsistent_snapset_wrapper(const hobject_t& hoid) + : inconsistent_snapset_t{object_id_t{hoid.oid.name, + hoid.nspace, + hoid.get_key(), + hoid.snap}} +{} + +using inc_snapset_t = inconsistent_snapset_t; + +void inconsistent_snapset_wrapper::set_headless() +{ + errors |= inc_snapset_t::HEADLESS_CLONE; +} + +void inconsistent_snapset_wrapper::set_ss_attr_missing() +{ + errors |= inc_snapset_t::ATTR_MISSING; +} + +void inconsistent_snapset_wrapper::set_ss_attr_corrupted() +{ + errors |= inc_snapset_t::ATTR_CORRUPTED; +} + +void inconsistent_snapset_wrapper::set_clone_missing(snapid_t snap) +{ + errors |= inc_snapset_t::CLONE_MISSING; + missing.push_back(snap); +} + +void inconsistent_snapset_wrapper::set_snapset_mismatch() +{ + errors |= inc_snapset_t::SNAP_MISMATCH; +} + +void inconsistent_snapset_wrapper::set_head_mismatch() +{ + errors |= inc_snapset_t::HEAD_MISMATCH; +} + +void inconsistent_snapset_wrapper::set_size_mismatch() +{ + errors |= inc_snapset_t::SIZE_MISMATCH; +} + +void inconsistent_snapset_wrapper::encode(bufferlist& bl) const +{ + ENCODE_START(1, 1, bl); + ::encode(errors, bl); + ::encode(object, bl); + ::encode(clones, bl); + ::encode(missing, bl); + ENCODE_FINISH(bl); +} + +void inconsistent_snapset_wrapper::decode(bufferlist::iterator& bp) +{ + DECODE_START(1, bp); + ::decode(errors, bp); + ::decode(object, bp); + ::decode(clones, bp); + ::decode(missing, bp); + DECODE_FINISH(bp); +} + diff --git a/src/common/scrub_types.h b/src/common/scrub_types.h index c4012f7a536f..df219e523ce3 100644 --- a/src/common/scrub_types.h +++ b/src/common/scrub_types.h @@ -89,4 +89,33 @@ inline void decode(librados::inconsistent_obj_t& obj, reinterpret_cast(obj).decode(bp); } +struct inconsistent_snapset_wrapper : public librados::inconsistent_snapset_t { + inconsistent_snapset_wrapper() = default; + inconsistent_snapset_wrapper(const hobject_t& head); + void set_headless(); + // soid claims that it is a head or a snapdir, but its SS_ATTR + // is missing. + void set_ss_attr_missing(); + void set_ss_attr_corrupted(); + // snapset with missing clone + void set_clone_missing(snapid_t); + // the snapset is not consistent with itself + void set_snapset_mismatch(); + // soid.snap inconsistent with snapset + void set_head_mismatch(); + void set_size_mismatch(); + + void encode(bufferlist& bl) const; + void decode(bufferlist::iterator& bp); +}; + +WRITE_CLASS_ENCODER(inconsistent_snapset_wrapper) + +namespace librados { + inline void decode(librados::inconsistent_snapset_t& snapset, + bufferlist::iterator& bp) { + reinterpret_cast(snapset).decode(bp); + } +} + #endif diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 3b8fe54fd6d9..3ef2cde4ad3e 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -20,9 +20,11 @@ #include "ReplicatedPG.h" #include "OSD.h" #include "OpRequest.h" +#include "ScrubStore.h" #include "objclass/objclass.h" #include "common/errno.h" +#include "common/scrub_types.h" #include "common/perf_counters.h" #include "messages/MOSDOp.h" @@ -12194,7 +12196,8 @@ unsigned ReplicatedPG::process_clones_to(const boost::optional &head, const char *mode, bool allow_incomplete_clones, boost::optional target, - vector::reverse_iterator *curclone) + vector::reverse_iterator *curclone, + inconsistent_snapset_wrapper &e) { assert(head); assert(snapset); @@ -12211,6 +12214,7 @@ unsigned ReplicatedPG::process_clones_to(const boost::optional &head, clog->error() << mode << " " << pgid << " " << head.get() << " expected clone " << next_clone; ++scrubber.shallow_errors; + e.set_clone_missing(next_clone.snap); } // Clones are descending ++(*curclone); @@ -12260,12 +12264,14 @@ void ReplicatedPG::_scrub( boost::optional snapset; // If initialized so will head (above) vector::reverse_iterator curclone; // Defined only if snapset initialized unsigned missing = 0; + inconsistent_snapset_wrapper snap_error; bufferlist last_data; for (map::reverse_iterator p = scrubmap.objects.rbegin(); p != scrubmap.objects.rend(); ++p) { const hobject_t& soid = p->first; + snap_error = inconsistent_snapset_wrapper{soid}; object_stat_sum_t stat; boost::optional oi; @@ -12286,6 +12292,7 @@ void ReplicatedPG::_scrub( osd->clog->error() << mode << " " << info.pgid << " " << soid << " no '" << OI_ATTR << "' attr"; ++scrubber.shallow_errors; + snap_error.set_ss_attr_missing(); } else { bufferlist bv; bv.push_back(p->second.attrs[OI_ATTR]); @@ -12297,6 +12304,7 @@ void ReplicatedPG::_scrub( osd->clog->error() << mode << " " << info.pgid << " " << soid << " can't decode '" << OI_ATTR << "' attr " << e.what(); ++scrubber.shallow_errors; + snap_error.set_ss_attr_corrupted(); } } @@ -12308,6 +12316,7 @@ void ReplicatedPG::_scrub( << oi->size << ") adjusted for ondisk to (" << pgbackend->be_get_ondisk_size(oi->size) << ")"; + snap_error.set_size_mismatch(); ++scrubber.shallow_errors; } @@ -12350,7 +12359,8 @@ void ReplicatedPG::_scrub( // Log any clones we were expecting to be there up to target // This will set missing, but will be a no-op if snap.soid == *curclone. missing += process_clones_to(head, snapset, osd->clog, info.pgid, mode, - pool.info.allow_incomplete_clones(), target, &curclone); + pool.info.allow_incomplete_clones(), target, &curclone, + snap_error); } bool expected; // Check doing_clones() again in case we ran process_clones_to() @@ -12371,11 +12381,14 @@ void ReplicatedPG::_scrub( if (head && !snapset) { osd->clog->info() << mode << " " << info.pgid << " " << soid << " clone ignored due to missing snapset"; + scrubber.store->add_snap_error(pool.id, snap_error); continue; } osd->clog->error() << mode << " " << info.pgid << " " << soid << " is an unexpected clone"; ++scrubber.shallow_errors; + snap_error.set_headless(); + scrubber.store->add_snap_error(pool.id, snap_error); continue; } @@ -12385,11 +12398,13 @@ void ReplicatedPG::_scrub( if (missing) { log_missing(missing, head, osd->clog, info.pgid, __func__, mode, pool.info.allow_incomplete_clones()); + scrubber.store->add_snap_error(pool.id, snap_error); } // Set this as a new head object head = soid; missing = 0; + snap_error = inconsistent_snapset_wrapper{head.get()}; dout(20) << __func__ << " " << mode << " new head " << head << dendl; @@ -12398,6 +12413,7 @@ void ReplicatedPG::_scrub( << " no '" << SS_ATTR << "' attr"; ++scrubber.shallow_errors; snapset = boost::none; + snap_error.set_ss_attr_missing(); } else { bufferlist bl; bl.push_back(p->second.attrs[SS_ATTR]); @@ -12410,6 +12426,7 @@ void ReplicatedPG::_scrub( osd->clog->error() << mode << " " << info.pgid << " " << soid << " can't decode '" << SS_ATTR << "' attr " << e.what(); ++scrubber.shallow_errors; + snap_error.set_ss_attr_corrupted(); } } @@ -12423,6 +12440,7 @@ void ReplicatedPG::_scrub( osd->clog->error() << mode << " " << info.pgid << " " << soid << " snaps.seq not set"; ++scrubber.shallow_errors; + snap_error.set_snapset_mismatch(); } } @@ -12430,11 +12448,13 @@ void ReplicatedPG::_scrub( osd->clog->error() << mode << " " << info.pgid << " " << soid << " snapset.head_exists=false, but head exists"; ++scrubber.shallow_errors; + snap_error.set_head_mismatch(); } if (soid.is_snapdir() && snapset->head_exists) { osd->clog->error() << mode << " " << info.pgid << " " << soid << " snapset.head_exists=true, but snapdir exists"; ++scrubber.shallow_errors; + snap_error.set_head_mismatch(); } } } else { @@ -12449,19 +12469,22 @@ void ReplicatedPG::_scrub( osd->clog->error() << mode << " " << info.pgid << " " << soid << " is missing in clone_size"; ++scrubber.shallow_errors; + snap_error.set_size_mismatch(); } else { if (oi && oi->size != snapset->clone_size[soid.snap]) { osd->clog->error() << mode << " " << info.pgid << " " << soid << " size " << oi->size << " != clone_size " << snapset->clone_size[*curclone]; ++scrubber.shallow_errors; + snap_error.set_size_mismatch(); } if (snapset->clone_overlap.count(soid.snap) == 0) { osd->clog->error() << mode << " " << info.pgid << " " << soid << " is missing in clone_overlap"; ++scrubber.shallow_errors; - } else { + snap_error.set_size_mismatch(); + } else { // This checking is based on get_clone_bytes(). The first 2 asserts // can't happen because we know we have a clone_size and // a clone_overlap. Now we check that the interval_set won't @@ -12483,6 +12506,7 @@ void ReplicatedPG::_scrub( osd->clog->error() << mode << " " << info.pgid << " " << soid << " bad interval_set in clone_overlap"; ++scrubber.shallow_errors; + snap_error.set_size_mismatch(); } else { stat.num_bytes += snapset->get_clone_bytes(soid.snap); } @@ -12501,14 +12525,15 @@ void ReplicatedPG::_scrub( << " No more objects while processing " << head.get() << dendl; missing += process_clones_to(head, snapset, osd->clog, info.pgid, mode, - pool.info.allow_incomplete_clones(), all_clones, &curclone); - + pool.info.allow_incomplete_clones(), all_clones, &curclone, + snap_error); } // There could be missing found by the test above or even // before dropping out of the loop for the last head. if (missing) { log_missing(missing, head, osd->clog, info.pgid, __func__, mode, pool.info.allow_incomplete_clones()); + scrubber.store->add_snap_error(pool.id, snap_error); } for (map, hobject_t::BitwiseComparator>::const_iterator p = diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 0ee87a7f5e66..bf95df0c46be 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -59,6 +59,8 @@ void put_with_id(ReplicatedPG *pg, uint64_t id); typedef boost::intrusive_ptr ReplicatedPGRef; #endif +struct inconsistent_snapset_wrapper; + class ReplicatedPG : public PG, public PGBackend::Listener { friend class OSD; friend class Watch; @@ -1509,7 +1511,8 @@ private: const char *mode, bool allow_incomplete_clones, boost::optional target, - vector::reverse_iterator *curclone); + vector::reverse_iterator *curclone, + inconsistent_snapset_wrapper &snap_error); public: coll_t get_coll() { diff --git a/src/osd/ScrubStore.cc b/src/osd/ScrubStore.cc index 32a7b40d7d7b..774569e0d702 100644 --- a/src/osd/ScrubStore.cc +++ b/src/osd/ScrubStore.cc @@ -50,6 +50,45 @@ string last_object_key(int64_t pool) hoid.build_hash_cache(); return "SCRUB_OBJ_" + hoid.to_str(); } + +string first_snap_key(int64_t pool) +{ + // scrub object is per spg_t object, so we can misuse the hash (pg.seed) for + // the representing the minimal and maximum keys. and this relies on how + // hobject_t::to_str() works: hex(pool).hex(revhash). + auto hoid = hobject_t(object_t(), + "", + 0, + 0x00000000, + pool, + ""); + hoid.build_hash_cache(); + return "SCRUB_SS_" + hoid.to_str(); +} + +string to_snap_key(int64_t pool, const librados::object_id_t& oid) +{ + auto hoid = hobject_t(object_t(oid.name), + oid.locator, // key + oid.snap, + 0x77777777, // hash + pool, + oid.nspace); + hoid.build_hash_cache(); + return "SCRUB_SS_" + hoid.to_str(); +} + +string last_snap_key(int64_t pool) +{ + auto hoid = hobject_t(object_t(), + "", + 0, + 0xffffffff, + pool, + ""); + hoid.build_hash_cache(); + return "SCRUB_SS_" + hoid.to_str(); +} } namespace Scrub { @@ -87,6 +126,13 @@ void Store::add_object_error(int64_t pool, const inconsistent_obj_wrapper& e) results[to_object_key(pool, e.object)] = bl; } +void Store::add_snap_error(int64_t pool, const inconsistent_snapset_wrapper& e) +{ + bufferlist bl; + e.encode(bl); + results[to_snap_key(pool, e.object)] = bl; +} + bool Store::empty() const { return results.empty(); @@ -104,6 +150,18 @@ void Store::cleanup(ObjectStore::Transaction* t) t->remove(coll, hoid); } +std::vector +Store::get_snap_errors(ObjectStore* store, + int64_t pool, + const librados::object_id_t& start, + uint64_t max_return) +{ + const string begin = (start.name.empty() ? + first_snap_key(pool) : to_snap_key(pool, start)); + const string end = last_snap_key(pool); + return get_errors(store, begin, end, max_return); +} + std::vector Store::get_object_errors(ObjectStore* store, int64_t pool, @@ -132,4 +190,10 @@ Store::get_errors(ObjectStore* store, } return errors; } +string to_snap_key(int64_t pool, const librados::object_id_t& oid) +{ + return "SCRUB_SS_" + std::to_string(pool) + "." + oid.name + oid.nspace; +} + + } // namespace Scrub diff --git a/src/osd/ScrubStore.h b/src/osd/ScrubStore.h index f14e15f56e3d..59c5d1f5f7b9 100644 --- a/src/osd/ScrubStore.h +++ b/src/osd/ScrubStore.h @@ -12,6 +12,7 @@ namespace librados { } struct inconsistent_obj_wrapper; +struct inconsistent_snapset_wrapper; namespace Scrub { @@ -23,9 +24,14 @@ public: const spg_t& pgid, const coll_t& coll); void add_object_error(int64_t pool, const inconsistent_obj_wrapper& e); + void add_snap_error(int64_t pool, const inconsistent_snapset_wrapper& e); bool empty() const; void flush(ObjectStore::Transaction *); void cleanup(ObjectStore::Transaction *); + std::vector get_snap_errors(ObjectStore* store, + int64_t pool, + const librados::object_id_t& start, + uint64_t max_return); std::vector get_object_errors(ObjectStore* store, int64_t pool, const librados::object_id_t& start,