From 9013efd3a3cf92d1ec8e2a39639214792067d0d2 Mon Sep 17 00:00:00 2001 From: Mike Ryan Date: Mon, 27 Aug 2012 11:16:17 -0700 Subject: [PATCH] osd: deep scrub, read file contents from disk and compare digest Deep scrub reads the contents of every file from the store and computes a crc32 digest. The primary compares the digest of all replicas and will mark the PG inconsistent if any don't match. OSDs that do not support deep scrub simply perform an ordinary chunky scrub. Any subset of OSDs that do support deep scrub will have their digests compared. Signed-off-by: Mike Ryan --- doc/control.rst | 5 +++ src/bash_completion/ceph | 2 +- src/common/config_opts.h | 2 + src/messages/MOSDRepScrub.h | 19 +++++++-- src/messages/MOSDScrub.h | 27 ++++++++---- src/mon/OSDMonitor.cc | 12 ++++-- src/mon/PGMap.cc | 3 +- src/mon/PGMonitor.cc | 7 +++- src/osd/OSD.cc | 4 ++ src/osd/PG.cc | 83 ++++++++++++++++++++++++++++++------- src/osd/PG.h | 15 +++++-- src/osd/ReplicatedPG.cc | 7 +++- src/osd/osd_types.cc | 46 ++++++++++++++++---- src/osd/osd_types.h | 17 +++++++- src/test/cli/ceph/help.t | 1 + src/tools/ceph.cc | 1 + 16 files changed, 204 insertions(+), 47 deletions(-) diff --git a/doc/control.rst b/doc/control.rst index 19c976e995a65..0e4cbdeacbfd2 100644 --- a/doc/control.rst +++ b/doc/control.rst @@ -260,6 +260,11 @@ Get the value of a pool setting. Valid fields are: Sends a scrub command to osdN. To send the command to all osds, use ``*``. TODO: what does this actually do :: + $ ceph osd deep-scrub N + +Sends a deep scrub command to osdN. A deep scrub compares both the +metadata and the contents of objects between replicas. + $ ceph osd repair N Sends a repair command to osdN. To send the command to all osds, use ``*``. diff --git a/src/bash_completion/ceph b/src/bash_completion/ceph index cec2b852e5b94..2ea53c603072e 100644 --- a/src/bash_completion/ceph +++ b/src/bash_completion/ceph @@ -36,7 +36,7 @@ _ceph() return 0 ;; pg) - COMPREPLY=( $(compgen -W "stat dump getmap map send_pg_creates scrub repair" -- ${cur}) ) + COMPREPLY=( $(compgen -W "stat dump getmap map send_pg_creates scrub deep-scrub repair" -- ${cur}) ) return 0 ;; osd) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 2ad3b0a23cadb..8e959f775ca06 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -322,6 +322,8 @@ OPTION(osd_max_scrubs, OPT_INT, 1) OPTION(osd_scrub_load_threshold, OPT_FLOAT, 0.5) OPTION(osd_scrub_min_interval, OPT_FLOAT, 300) OPTION(osd_scrub_max_interval, OPT_FLOAT, 60*60*24) // once a day +OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week +OPTION(osd_deep_scrub_stride, OPT_INT, 524288) OPTION(osd_auto_weight, OPT_BOOL, false) OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored OPTION(osd_check_for_log_corruption, OPT_BOOL, false) diff --git a/src/messages/MOSDRepScrub.h b/src/messages/MOSDRepScrub.h index 184d153bcc99f..2d3a66d96af17 100644 --- a/src/messages/MOSDRepScrub.h +++ b/src/messages/MOSDRepScrub.h @@ -24,7 +24,7 @@ struct MOSDRepScrub : public Message { - static const int HEAD_VERSION = 3; + static const int HEAD_VERSION = 4; static const int COMPAT_VERSION = 2; pg_t pgid; // PG to scrub @@ -34,6 +34,7 @@ struct MOSDRepScrub : public Message { bool chunky; // true for chunky scrubs hobject_t start; // lower bound of scrub, inclusive hobject_t end; // upper bound of scrub, exclusive + bool deep; // true if scrub should be deep MOSDRepScrub() : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION) { } MOSDRepScrub(pg_t pgid, eversion_t scrub_from, eversion_t scrub_to, @@ -43,17 +44,19 @@ struct MOSDRepScrub : public Message { scrub_from(scrub_from), scrub_to(scrub_to), map_epoch(map_epoch), - chunky(false) { } + chunky(false), + deep(false) { } MOSDRepScrub(pg_t pgid, eversion_t scrub_to, epoch_t map_epoch, - hobject_t start, hobject_t end) + hobject_t start, hobject_t end, bool deep) : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION), pgid(pgid), scrub_to(scrub_to), map_epoch(map_epoch), chunky(true), start(start), - end(end) { } + end(end), + deep(deep) { } private: @@ -66,6 +69,7 @@ public: out << pgid << ",from:" << scrub_from << ",to:" << scrub_to << ",epoch:" << map_epoch << ",start:" << start << ",end:" << end << ",chunky:" << chunky + << ",deep:" << deep << ",version:" << header.version; out << ")"; } @@ -78,6 +82,7 @@ public: ::encode(chunky, payload); ::encode(start, payload); ::encode(end, payload); + ::encode(deep, payload); } void decode_payload() { bufferlist::iterator p = payload.begin(); @@ -90,8 +95,14 @@ public: ::decode(chunky, p); ::decode(start, p); ::decode(end, p); + if (header.version >= 4) { + ::decode(deep, p); + } else { + deep = false; + } } else { // v2 scrub: non-chunky chunky = false; + deep = false; } } }; diff --git a/src/messages/MOSDScrub.h b/src/messages/MOSDScrub.h index e4c9bd158688b..72661f8959803 100644 --- a/src/messages/MOSDScrub.h +++ b/src/messages/MOSDScrub.h @@ -23,17 +23,22 @@ */ struct MOSDScrub : public Message { + + static const int HEAD_VERSION = 2; + static const int COMPAT_VERSION = 1; + uuid_d fsid; vector scrub_pgs; bool repair; + bool deep; - MOSDScrub() : Message(MSG_OSD_SCRUB) {} - MOSDScrub(const uuid_d& f, bool r) : - Message(MSG_OSD_SCRUB), - fsid(f), repair(r) {} - MOSDScrub(const uuid_d& f, vector& pgs, bool r) : - Message(MSG_OSD_SCRUB), - fsid(f), scrub_pgs(pgs), repair(r) {} + MOSDScrub() : Message(MSG_OSD_SCRUB, HEAD_VERSION, COMPAT_VERSION) {} + MOSDScrub(const uuid_d& f, bool r, bool d) : + Message(MSG_OSD_SCRUB, HEAD_VERSION, COMPAT_VERSION), + fsid(f), repair(r), deep(d) {} + MOSDScrub(const uuid_d& f, vector& pgs, bool r, bool d) : + Message(MSG_OSD_SCRUB, HEAD_VERSION, COMPAT_VERSION), + fsid(f), scrub_pgs(pgs), repair(r), deep(d) {} private: ~MOSDScrub() {} @@ -47,6 +52,8 @@ public: out << scrub_pgs; if (repair) out << " repair"; + if (deep) + out << " deep"; out << ")"; } @@ -54,12 +61,18 @@ public: ::encode(fsid, payload); ::encode(scrub_pgs, payload); ::encode(repair, payload); + ::encode(deep, payload); } void decode_payload() { bufferlist::iterator p = payload.begin(); ::decode(fsid, p); ::decode(scrub_pgs, p); ::decode(repair, p); + if (header.version >= 2) { + ::decode(deep, p); + } else { + deep = false; + } } }; diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 3e7604f9ef3eb..adfe86a0e35f2 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1649,10 +1649,12 @@ bool OSDMonitor::preprocess_command(MMonCommand *m) r = 0; } } - else if ((m->cmd[1] == "scrub" || m->cmd[1] == "repair")) { + else if ((m->cmd[1] == "scrub" || + m->cmd[1] == "deep-scrub" || + m->cmd[1] == "repair")) { if (m->cmd.size() <= 2) { r = -EINVAL; - ss << "usage: osd [scrub|repair] "; + ss << "usage: osd [scrub|deep-scrub|repair] "; goto out; } if (m->cmd[2] == "*") { @@ -1662,7 +1664,8 @@ bool OSDMonitor::preprocess_command(MMonCommand *m) if (osdmap.is_up(i)) { ss << (c++ ? ",":"") << i; mon->try_send_message(new MOSDScrub(osdmap.get_fsid(), - m->cmd[1] == "repair"), + m->cmd[1] == "repair", + m->cmd[1] == "deep-scrub"), osdmap.get_inst(i)); } r = 0; @@ -1671,7 +1674,8 @@ bool OSDMonitor::preprocess_command(MMonCommand *m) long osd = strtol(m->cmd[2].c_str(), 0, 10); if (osdmap.is_up(osd)) { mon->try_send_message(new MOSDScrub(osdmap.get_fsid(), - m->cmd[1] == "repair"), + m->cmd[1] == "repair", + m->cmd[1] == "deep-scrub"), osdmap.get_inst(osd)); r = 0; ss << "osd." << osd << " instructed to " << m->cmd[1]; diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index accc1b73a200f..f056271e2274b 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -464,7 +464,7 @@ void PGMap::dump_osd_stats(Formatter *f) const void PGMap::dump_pg_stats_plain(ostream& ss, const hash_map& pg_stats) const { - ss << "pg_stat\tobjects\tmip\tdegr\tunf\tbytes\tlog\tdisklog\tstate\tstate_stamp\tv\treported\tup\tacting\tlast_scrub\tscrub_stamp" << std::endl; + ss << "pg_stat\tobjects\tmip\tdegr\tunf\tbytes\tlog\tdisklog\tstate\tstate_stamp\tv\treported\tup\tacting\tlast_scrub\tscrub_stamp\tlast_deep_scrub\tdeep_scrub_stamp" << std::endl; for (hash_map::const_iterator i = pg_stats.begin(); i != pg_stats.end(); ++i) { const pg_stat_t &st(i->second); @@ -484,6 +484,7 @@ void PGMap::dump_pg_stats_plain(ostream& ss, << "\t" << st.up << "\t" << st.acting << "\t" << st.last_scrub << "\t" << st.last_scrub_stamp + << "\t" << st.last_deep_scrub << "\t" << st.last_deep_scrub_stamp << std::endl; } } diff --git a/src/mon/PGMonitor.cc b/src/mon/PGMonitor.cc index d18aeaa226baf..a052bb85bd43b 100644 --- a/src/mon/PGMonitor.cc +++ b/src/mon/PGMonitor.cc @@ -997,7 +997,9 @@ bool PGMonitor::preprocess_command(MMonCommand *m) } else ss << "invalid pgid '" << m->cmd[2] << "'"; } - else if ((m->cmd[1] == "scrub" || m->cmd[1] == "repair") && m->cmd.size() == 3) { + else if ((m->cmd[1] == "scrub" || + m->cmd[1] == "deep-scrub" || + m->cmd[1] == "repair") && m->cmd.size() == 3) { pg_t pgid; r = -EINVAL; if (pgid.parse(m->cmd[2].c_str())) { @@ -1008,7 +1010,8 @@ bool PGMonitor::preprocess_command(MMonCommand *m) vector pgs(1); pgs[0] = pgid; mon->try_send_message(new MOSDScrub(mon->monmap->fsid, pgs, - m->cmd[1] == "repair"), + m->cmd[1] == "repair", + m->cmd[1] == "deep-scrub"), mon->osdmon()->osdmap.get_inst(osd)); ss << "instructing pg " << pgid << " on osd." << osd << " to " << m->cmd[1]; r = 0; diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 271ec8f2d938f..5add675055892 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3257,6 +3257,8 @@ void OSD::handle_scrub(MOSDScrub *m) if (pg->is_primary()) { if (m->repair) pg->state_set(PG_STATE_REPAIR); + if (m->deep) + pg->state_set(PG_STATE_DEEP_SCRUB); if (pg->queue_scrub()) { dout(10) << "queueing " << *pg << " for scrub" << dendl; } @@ -3273,6 +3275,8 @@ void OSD::handle_scrub(MOSDScrub *m) if (pg->is_primary()) { if (m->repair) pg->state_set(PG_STATE_REPAIR); + if (m->deep) + pg->state_set(PG_STATE_DEEP_SCRUB); if (pg->queue_scrub()) { dout(10) << "queueing " << *pg << " for scrub" << dendl; } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 3e8135b47507c..ff8555efc9415 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1926,6 +1926,8 @@ void PG::update_stats() info.stats.created = info.history.epoch_created; info.stats.last_scrub = info.history.last_scrub; info.stats.last_scrub_stamp = info.history.last_scrub_stamp; + info.stats.last_deep_scrub = info.history.last_deep_scrub; + info.stats.last_deep_scrub_stamp = info.history.last_deep_scrub_stamp; info.stats.last_epoch_clean = info.history.last_epoch_clean; utime_t now = ceph_clock_now(g_ceph_context); @@ -2712,6 +2714,11 @@ bool PG::sched_scrub() return true; } + if (ceph_clock_now(g_ceph_context) > info.history.last_deep_scrub_stamp + g_conf->osd_deep_scrub_interval) { + dout(10) << "sched_scrub: scrub will be deep" << dendl; + scrubber.deep = true; + } + bool ret = false; if (!scrubber.reserved) { assert(scrubber.reserved_peers.empty()); @@ -2804,9 +2811,10 @@ void PG::sub_op_scrub_map(OpRequestRef op) /* * pg lock may or may not be held */ -void PG::_scan_list(ScrubMap &map, vector &ls) +void PG::_scan_list(ScrubMap &map, vector &ls, bool deep) { - dout(10) << "_scan_list scanning " << ls.size() << " objects" << dendl; + dout(10) << "_scan_list scanning " << ls.size() << " objects" + << (deep ? " deeply" : "") << dendl; int i = 0; for (vector::iterator p = ls.begin(); p != ls.end(); @@ -2820,6 +2828,23 @@ void PG::_scan_list(ScrubMap &map, vector &ls) o.size = st.st_size; assert(!o.negative); osd->store->getattrs(coll, poid, o.attrs); + + // calculate the CRC32 on deep scrubs + if (deep) { + bufferhash h; + bufferlist bl; + int r; + __u64 pos = 0; + while ( (r = osd->store->read(coll, poid, pos, + g_conf->osd_deep_scrub_stride, bl)) > 0) { + h << bl; + pos += bl.length(); + bl.clear(); + } + o.digest = h.digest(); + o.digest_present = true; + } + dout(25) << "_scan_list " << poid << dendl; } else { dout(25) << "_scan_list " << poid << " got " << r << ", skipping" << dendl; @@ -2840,13 +2865,15 @@ void PG::_request_scrub_map_classic(int replica, eversion_t version) } // send scrub v3 messages (chunky scrub) -void PG::_request_scrub_map(int replica, eversion_t version, hobject_t start, hobject_t end) +void PG::_request_scrub_map(int replica, eversion_t version, + hobject_t start, hobject_t end, + bool deep) { assert(replica != osd->whoami); dout(10) << "scrub requesting scrubmap from osd." << replica << dendl; MOSDRepScrub *repscrubop = new MOSDRepScrub(info.pgid, version, get_osdmap()->get_epoch(), - start, end); + start, end, deep); osd->cluster_messenger->send_message(repscrubop, get_osdmap()->get_cluster_inst(replica)); } @@ -2977,7 +3004,8 @@ void PG::scrub_unreserve_replicas() * build a scrub map over a chunk without releasing the lock * only used by chunky scrub */ -int PG::build_scrub_map_chunk(ScrubMap &map, hobject_t start, hobject_t end) +int PG::build_scrub_map_chunk(ScrubMap &map, + hobject_t start, hobject_t end, bool deep) { dout(10) << "build_scrub_map" << dendl; dout(20) << "scrub_map_chunk [" << start << "," << end << ")" << dendl; @@ -2992,7 +3020,7 @@ int PG::build_scrub_map_chunk(ScrubMap &map, hobject_t start, hobject_t end) return ret; } - _scan_list(map, ls); + _scan_list(map, ls, deep); // pg attrs osd->store->collection_getattrs(coll, map.attrs); @@ -3025,7 +3053,7 @@ void PG::build_scrub_map(ScrubMap &map) vector ls; osd->store->collection_list(coll, ls); - _scan_list(map, ls); + _scan_list(map, ls, false); lock(); if (epoch != info.history.same_interval_since) { @@ -3073,7 +3101,7 @@ void PG::build_inc_scrub_map(ScrubMap &map, eversion_t v) } } - _scan_list(map, ls); + _scan_list(map, ls, false); // pg attrs osd->store->collection_getattrs(coll, map.attrs); @@ -3155,7 +3183,7 @@ void PG::replica_scrub(MOSDRepScrub *msg) return; } - build_scrub_map_chunk(map, msg->start, msg->end); + build_scrub_map_chunk(map, msg->start, msg->end, msg->deep); } else { if (msg->scrub_from > eversion_t()) { @@ -3216,7 +3244,7 @@ void PG::scrub() return; } - // when the scrub is not active, we need to determine which type of scrub to do + // when we're starting a scrub, we need to determine which type of scrub to do if (!scrubber.active) { OSDMapRef curmap = osd->get_osdmap(); scrubber.is_chunky = true; @@ -3231,6 +3259,12 @@ void PG::scrub() } } + if (scrubber.is_chunky) { + scrubber.deep = state_test(PG_STATE_DEEP_SCRUB); + } else { + state_clear(PG_STATE_DEEP_SCRUB); + } + dout(10) << "starting a new " << (scrubber.is_chunky ? "chunky" : "classic") << " scrub" << dendl; } @@ -3548,7 +3582,7 @@ void PG::chunky_scrub() { // request maps from replicas for (unsigned i=1; i= scrubber.subset_last_update); // build my own scrub map - ret = build_scrub_map_chunk(scrubber.primary_scrubmap, scrubber.start, scrubber.end); + ret = build_scrub_map_chunk(scrubber.primary_scrubmap, + scrubber.start, scrubber.end, + scrubber.deep); if (ret < 0) { dout(5) << "error building scrub map: " << ret << ", aborting" << dendl; scrub_clear_state(); @@ -3645,6 +3681,7 @@ void PG::scrub_clear_state() assert(_lock.is_locked()); state_clear(PG_STATE_SCRUBBING); state_clear(PG_STATE_REPAIR); + state_clear(PG_STATE_DEEP_SCRUB); update_stats(); // active -> nothing. @@ -3691,6 +3728,16 @@ bool PG::_compare_scrub_objects(ScrubMap::object &auth, errorstream << "size " << candidate.size << " != known size " << auth.size; } + if (auth.digest_present && candidate.digest_present) { + if (auth.digest != candidate.digest) { + if (!ok) + errorstream << ", "; + ok = false; + + errorstream << "digest " << candidate.digest + << " != known digest " << auth.digest; + } + } for (map::const_iterator i = auth.attrs.begin(); i != auth.attrs.end(); i++) { @@ -3755,6 +3802,7 @@ void PG::_compare_scrubmaps(const map &maps, j->second->objects[*k], ss)) { cur_inconsistent.insert(j->first); + ++scrubber.errors; errorstream << info.pgid << " osd." << acting[j->first] << ": soid " << *k << " " << ss.str() << std::endl; } @@ -3781,7 +3829,8 @@ void PG::_compare_scrubmaps(const map &maps, void PG::scrub_compare_maps() { dout(10) << "scrub_compare_maps has maps, analyzing" << dendl; bool repair = state_test(PG_STATE_REPAIR); - const char *mode = repair ? "repair":"scrub"; + bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); + const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub")); if (acting.size() > 1) { dout(10) << "scrub comparing replica scrub maps" << dendl; @@ -3837,6 +3886,7 @@ void PG::scrub_compare_maps() { &maps[i->second]->objects[i->first], acting[*j], acting[i->second]); + ++scrubber.fixed; } } @@ -3883,7 +3933,8 @@ void PG::scrub_finalize() { // the part that actually finalizes a scrub void PG::scrub_finish() { bool repair = state_test(PG_STATE_REPAIR); - const char *mode = repair ? "repair":"scrub"; + bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); + const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub")); // type-specific finish (can tally more errors) _scrub_finish(); @@ -3911,6 +3962,10 @@ void PG::scrub_finish() { osd->unreg_last_pg_scrub(info.pgid, info.history.last_scrub_stamp); info.history.last_scrub = info.last_update; info.history.last_scrub_stamp = ceph_clock_now(g_ceph_context); + if (scrubber.deep) { + info.history.last_deep_scrub = info.last_update; + info.history.last_deep_scrub_stamp = ceph_clock_now(g_ceph_context); + } osd->reg_last_pg_scrub(info.pgid, info.history.last_scrub_stamp); { diff --git a/src/osd/PG.h b/src/osd/PG.h index 5c39cd89f101c..0ad34514ea8a7 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -778,7 +778,8 @@ public: reserved(false), reserve_failed(false), block_writes(false), active(false), waiting_on(0), errors(0), fixed(0), active_rep_scrub(0), - finalizing(false), is_chunky(false), state(INACTIVE) + finalizing(false), is_chunky(false), state(INACTIVE), + deep(false) { } @@ -818,6 +819,9 @@ public: FINISH, } state; + // deep scrub + bool deep; + static const char *state_string(const PG::Scrubber::State& state) { const char *ret = NULL; switch( state ) @@ -855,6 +859,7 @@ public: subset_last_update = eversion_t(); errors = 0; fixed = 0; + deep = false; } } scrubber; @@ -878,10 +883,12 @@ public: void scrub_finish(); void scrub_clear_state(); bool scrub_gather_replica_maps(); - void _scan_list(ScrubMap &map, vector &ls); + void _scan_list(ScrubMap &map, vector &ls, bool deep); void _request_scrub_map_classic(int replica, eversion_t version); - void _request_scrub_map(int replica, eversion_t version, hobject_t start, hobject_t end); - int build_scrub_map_chunk(ScrubMap &map, hobject_t start, hobject_t end); + void _request_scrub_map(int replica, eversion_t version, + hobject_t start, hobject_t end, bool deep); + int build_scrub_map_chunk(ScrubMap &map, + hobject_t start, hobject_t end, bool deep); void build_scrub_map(ScrubMap &map); void build_inc_scrub_map(ScrubMap &map, eversion_t v); virtual void _scrub(ScrubMap &map) { } diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 01cab81e52b91..332e0501b9e5d 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -6596,7 +6596,8 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap) coll_t c(info.pgid); bool repair = state_test(PG_STATE_REPAIR); - const char *mode = repair ? "repair":"scrub"; + bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); + const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub")); // traverse in reverse order. hobject_t head; @@ -6685,6 +6686,7 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap) //bufferlist data; //osd->store->read(c, poid, 0, 0, data); //assert(data.length() == p->size); + // if (soid.snap == CEPH_NOSNAP) { if (!snapset.head_exists) { @@ -6737,7 +6739,8 @@ void ReplicatedPG::_scrub_clear_state() void ReplicatedPG::_scrub_finish() { bool repair = state_test(PG_STATE_REPAIR); - const char *mode = repair ? "repair":"scrub"; + bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); + const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub")); dout(10) << mode << " got " << scrub_cstat.sum.num_objects << "/" << info.stats.stats.sum.num_objects << " objects, " diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index cdbdfa957048c..a981d02f9006f 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -406,6 +406,8 @@ std::string pg_state_string(int state) oss << "remapped+"; if (state & PG_STATE_SCRUBBING) oss << "scrubbing+"; + if (state & PG_STATE_DEEP_SCRUB) + oss << "deep+"; if (state & PG_STATE_SCRUBQ) oss << "scrubq+"; if (state & PG_STATE_INCONSISTENT) @@ -971,6 +973,8 @@ void pg_stat_t::dump(Formatter *f) const f->dump_unsigned("parent_split_bits", parent_split_bits); f->dump_stream("last_scrub") << last_scrub; f->dump_stream("last_scrub_stamp") << last_scrub_stamp; + f->dump_stream("last_deep_scrub") << last_deep_scrub; + f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp; f->dump_unsigned("log_size", log_size); f->dump_unsigned("ondisk_log_size", ondisk_log_size); stats.dump(f); @@ -986,7 +990,7 @@ void pg_stat_t::dump(Formatter *f) const void pg_stat_t::encode(bufferlist &bl) const { - ENCODE_START(9, 8, bl); + ENCODE_START(10, 8, bl); ::encode(version, bl); ::encode(reported, bl); ::encode(state, bl); @@ -1009,12 +1013,14 @@ void pg_stat_t::encode(bufferlist &bl) const ::encode(last_clean, bl); ::encode(last_unstale, bl); ::encode(mapping_epoch, bl); + ::encode(last_deep_scrub, bl); + ::encode(last_deep_scrub_stamp, bl); ENCODE_FINISH(bl); } void pg_stat_t::decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(9, 8, 8, bl); + DECODE_START_LEGACY_COMPAT_LEN(10, 8, 8, bl); ::decode(version, bl); ::decode(reported, bl); ::decode(state, bl); @@ -1072,6 +1078,10 @@ void pg_stat_t::decode(bufferlist::iterator &bl) ::decode(last_clean, bl); ::decode(last_unstale, bl); ::decode(mapping_epoch, bl); + if (struct_v >= 10) { + ::decode(last_deep_scrub, bl); + ::decode(last_deep_scrub_stamp, bl); + } } } DECODE_FINISH(bl); @@ -1099,6 +1109,8 @@ void pg_stat_t::generate_test_instances(list& o) a.parent_split_bits = 12; a.last_scrub = eversion_t(9, 10); a.last_scrub_stamp = utime_t(11, 12); + a.last_deep_scrub = eversion_t(13, 14); + a.last_deep_scrub_stamp = utime_t(15, 16); list l; object_stat_collection_t::generate_test_instances(l); a.stats = *l.back(); @@ -1177,7 +1189,7 @@ void pool_stat_t::generate_test_instances(list& o) void pg_history_t::encode(bufferlist &bl) const { - ENCODE_START(4, 4, bl); + ENCODE_START(5, 4, bl); ::encode(epoch_created, bl); ::encode(last_epoch_started, bl); ::encode(last_epoch_clean, bl); @@ -1187,12 +1199,14 @@ void pg_history_t::encode(bufferlist &bl) const ::encode(same_primary_since, bl); ::encode(last_scrub, bl); ::encode(last_scrub_stamp, bl); + ::encode(last_deep_scrub, bl); + ::encode(last_deep_scrub_stamp, bl); ENCODE_FINISH(bl); } void pg_history_t::decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, bl); + DECODE_START_LEGACY_COMPAT_LEN(5, 4, 4, bl); ::decode(epoch_created, bl); ::decode(last_epoch_started, bl); if (struct_v >= 3) @@ -1206,6 +1220,10 @@ void pg_history_t::decode(bufferlist::iterator &bl) if (struct_v >= 2) { ::decode(last_scrub, bl); ::decode(last_scrub_stamp, bl); + if (struct_v >= 5) { + ::decode(last_deep_scrub, bl); + ::decode(last_deep_scrub_stamp, bl); + } } DECODE_FINISH(bl); } @@ -1221,6 +1239,8 @@ void pg_history_t::dump(Formatter *f) const f->dump_int("same_primary_since", same_primary_since); f->dump_stream("last_scrub") << last_scrub; f->dump_stream("last_scrub_stamp") << last_scrub_stamp; + f->dump_stream("last_deep_scrub") << last_deep_scrub; + f->dump_stream("last_deep_scrub_stamp") << last_deep_scrub_stamp; } void pg_history_t::generate_test_instances(list& o) @@ -1235,7 +1255,9 @@ void pg_history_t::generate_test_instances(list& o) o.back()->same_interval_since = 6; o.back()->same_primary_since = 7; o.back()->last_scrub = eversion_t(8, 9); - o.back()->last_scrub_stamp = utime_t(10, 11); + o.back()->last_scrub_stamp = utime_t(10, 11); + o.back()->last_deep_scrub = eversion_t(12, 13); + o.back()->last_deep_scrub_stamp = utime_t(14, 15); } @@ -2591,19 +2613,29 @@ void ScrubMap::generate_test_instances(list& o) void ScrubMap::object::encode(bufferlist& bl) const { - ENCODE_START(2, 2, bl); + ENCODE_START(3, 2, bl); ::encode(size, bl); ::encode(negative, bl); ::encode(attrs, bl); + ::encode(digest, bl); + ::encode(digest_present, bl); ENCODE_FINISH(bl); } void ScrubMap::object::decode(bufferlist::iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); + DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl); ::decode(size, bl); ::decode(negative, bl); ::decode(attrs, bl); + if (struct_v >= 3) { + ::decode(digest, bl); + ::decode(digest_present, bl); + } + else { + digest = 0; + digest_present = false; + } DECODE_FINISH(bl); } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 7cb590bdea680..b2b59b33f181d 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -564,6 +564,7 @@ inline ostream& operator<<(ostream& out, const osd_stat_t& s) { #define PG_STATE_INCOMPLETE (1<<16) // incomplete content, peering failed. #define PG_STATE_STALE (1<<17) // our state for this pg is stale, unknown. #define PG_STATE_REMAPPED (1<<18) // pg is explicitly remapped to different OSDs than CRUSH +#define PG_STATE_DEEP_SCRUB (1<<19) // deep scrub: check CRC32 on files std::string pg_state_string(int state); @@ -862,7 +863,9 @@ struct pg_stat_t { __u32 parent_split_bits; eversion_t last_scrub; + eversion_t last_deep_scrub; utime_t last_scrub_stamp; + utime_t last_deep_scrub_stamp; object_stat_collection_t stats; @@ -951,7 +954,9 @@ struct pg_history_t { epoch_t same_primary_since; // same primary at least back through this epoch. eversion_t last_scrub; + eversion_t last_deep_scrub; utime_t last_scrub_stamp; + utime_t last_deep_scrub_stamp; pg_history_t() : epoch_created(0), @@ -985,6 +990,14 @@ struct pg_history_t { last_scrub_stamp = other.last_scrub_stamp; modified = true; } + if (other.last_deep_scrub > last_deep_scrub) { + last_deep_scrub = other.last_deep_scrub; + modified = true; + } + if (other.last_deep_scrub_stamp > last_deep_scrub_stamp) { + last_deep_scrub_stamp = other.last_deep_scrub_stamp; + modified = true; + } return modified; } @@ -1777,8 +1790,10 @@ struct ScrubMap { uint64_t size; bool negative; map attrs; + __u32 digest; + bool digest_present; - object(): size(0), negative(false) {} + object(): size(0), negative(false), digest(0), digest_present(false) {} void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bl); diff --git a/src/test/cli/ceph/help.t b/src/test/cli/ceph/help.t index 47e5b3850089b..bbd7ff76d9a1d 100644 --- a/src/test/cli/ceph/help.t +++ b/src/test/cli/ceph/help.t @@ -56,6 +56,7 @@ ceph osd pool rename ceph osd pool set ceph osd scrub + ceph osd deep-scrub ceph osd repair ceph osd tell N bench [bytes per write] [total bytes] diff --git a/src/tools/ceph.cc b/src/tools/ceph.cc index b82be10b9bf78..278033c46cbee 100644 --- a/src/tools/ceph.cc +++ b/src/tools/ceph.cc @@ -99,6 +99,7 @@ static void usage() cout << " ceph osd pool rename \n"; cout << " ceph osd pool set \n"; cout << " ceph osd scrub \n"; + cout << " ceph osd deep-scrub \n"; cout << " ceph osd repair \n"; cout << " ceph osd tell N bench [bytes per write] [total bytes]\n"; cout << "\n"; -- 2.39.5