#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44)
#define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45)
#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46)
+#define CEPH_FEATURE_OSD_OBJECT_DIGEST (1ULL<<46) /* overlap with fadvise */
#define CEPH_FEATURE_MDS_QUOTA (1ULL<<47)
#define CEPH_FEATURE_RESERVED2 (1ULL<<61) /* slow down, we are almost out... */
CEPH_FEATURE_OSD_POOLRESEND | \
CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 | \
CEPH_FEATURE_OSD_SET_ALLOC_HINT | \
- CEPH_FEATURE_OSD_FADVISE_FLAGS | \
+ CEPH_FEATURE_OSD_FADVISE_FLAGS | \
+ CEPH_FEATURE_OSD_OBJECT_DIGEST | \
CEPH_FEATURE_MDS_QUOTA | \
0ULL)
struct MOSDRepScrub : public Message {
- static const int HEAD_VERSION = 5;
+ static const int HEAD_VERSION = 6;
static const int COMPAT_VERSION = 2;
spg_t pgid; // PG to scrub
hobject_t start; // lower bound of scrub, inclusive
hobject_t end; // upper bound of scrub, exclusive
bool deep; // true if scrub should be deep
+ uint32_t seed; // seed value for digest calculation
- MOSDRepScrub() : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION),
+ MOSDRepScrub()
+ : Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION),
chunky(false),
- deep(false) { }
+ deep(false),
+ seed(0) { }
MOSDRepScrub(spg_t pgid, eversion_t scrub_from, eversion_t scrub_to,
epoch_t map_epoch)
scrub_to(scrub_to),
map_epoch(map_epoch),
chunky(false),
- deep(false) { }
+ deep(false),
+ seed(0) { }
MOSDRepScrub(spg_t pgid, eversion_t scrub_to, epoch_t map_epoch,
- hobject_t start, hobject_t end, bool deep)
+ hobject_t start, hobject_t end, bool deep, uint32_t seed)
: Message(MSG_OSD_REP_SCRUB, HEAD_VERSION, COMPAT_VERSION),
pgid(pgid),
scrub_to(scrub_to),
chunky(true),
start(start),
end(end),
- deep(deep) { }
+ deep(deep),
+ seed(seed) { }
private:
<< ",epoch:" << map_epoch << ",start:" << start << ",end:" << end
<< ",chunky:" << chunky
<< ",deep:" << deep
+ << ",seed:" << seed
<< ",version:" << header.version;
out << ")";
}
::encode(end, payload);
::encode(deep, payload);
::encode(pgid.shard, payload);
+ ::encode(seed, payload);
}
void decode_payload() {
bufferlist::iterator p = payload.begin();
} else {
pgid.shard = shard_id_t::NO_SHARD;
}
+ if (header.version >= 6) {
+ ::decode(seed, p);
+ } else {
+ seed = 0;
+ }
}
};
void ECBackend::be_deep_scrub(
const hobject_t &poid,
+ uint32_t seed,
ScrubMap::object &o,
ThreadPool::TPHandle &handle) {
- bufferhash h(-1);
+ bufferhash h(-1); // we always used -1
int r;
uint64_t stride = cct->_conf->osd_deep_scrub_stride;
if (stride % sinfo.get_chunk_size())
void be_deep_scrub(
const hobject_t &obj,
+ uint32_t seed,
ScrubMap::object &o,
ThreadPool::TPHandle &handle);
uint64_t be_get_ondisk_size(uint64_t logical_size) {
void PG::_request_scrub_map(
pg_shard_t replica, eversion_t version,
hobject_t start, hobject_t end,
- bool deep)
+ bool deep, uint32_t seed)
{
assert(replica != pg_whoami);
- dout(10) << "scrub requesting scrubmap from osd." << replica << dendl;
+ dout(10) << "scrub requesting scrubmap from osd." << replica
+ << " deep " << (int)deep << " seed " << seed << dendl;
MOSDRepScrub *repscrubop = new MOSDRepScrub(
spg_t(info.pgid.pgid, replica.shard), version,
get_osdmap()->get_epoch(),
- start, end, deep);
+ start, end, deep, seed);
osd->send_message_osd_cluster(
replica.osd, repscrubop, get_osdmap()->get_epoch());
}
*/
int PG::build_scrub_map_chunk(
ScrubMap &map,
- hobject_t start, hobject_t end, bool deep,
+ hobject_t start, hobject_t end, bool deep, uint32_t seed,
ThreadPool::TPHandle &handle)
{
- dout(10) << __func__ << " [" << start << "," << end << ")" << dendl;
+ dout(10) << __func__ << " [" << start << "," << end << ") "
+ << " seed " << seed << dendl;
map.valid_through = info.last_update;
}
- get_pgbackend()->be_scan_list(map, ls, deep, handle);
+ get_pgbackend()->be_scan_list(map, ls, deep, seed, handle);
_scan_rollback_obs(rollback_obs, handle);
_scan_snaps(map);
vector<hobject_t> ls;
osd->store->collection_list(coll, ls);
- get_pgbackend()->be_scan_list(map, ls, false, handle);
+ get_pgbackend()->be_scan_list(map, ls, false, 0, handle);
lock();
_scan_snaps(map);
}
}
- get_pgbackend()->be_scan_list(map, ls, false, handle);
+ get_pgbackend()->be_scan_list(map, ls, false, 0, handle);
}
void PG::repair_object(
}
build_scrub_map_chunk(
- map, msg->start, msg->end, msg->deep,
+ map, msg->start, msg->end, msg->deep, msg->seed,
handle);
vector<OSDOp> scrub(1);
oss << info.pgid.pgid << " " << mode << " starts" << std::endl;
osd->clog->info(oss);
}
+
+ if (peer_features & CEPH_FEATURE_OSD_OBJECT_DIGEST)
+ scrubber.seed = -1; // better, and enables oi digest checks
+ else
+ scrubber.seed = 0; // compat
+
break;
case PG::Scrubber::NEW_CHUNK:
++i) {
if (*i == pg_whoami) continue;
_request_scrub_map(*i, scrubber.subset_last_update,
- scrubber.start, scrubber.end, scrubber.deep);
+ scrubber.start, scrubber.end, scrubber.deep,
+ scrubber.seed);
scrubber.waiting_on_whom.insert(*i);
++scrubber.waiting_on;
}
// build my own scrub map
ret = build_scrub_map_chunk(scrubber.primary_scrubmap,
scrubber.start, scrubber.end,
- scrubber.deep,
+ scrubber.deep, scrubber.seed,
handle);
if (ret < 0) {
dout(5) << "error building scrub map: " << ret << ", aborting" << dendl;
active_rep_scrub(0),
must_scrub(false), must_deep_scrub(false), must_repair(false),
state(INACTIVE),
- deep(false)
+ deep(false),
+ seed(0)
{
}
// deep scrub
bool deep;
+ uint32_t seed;
list<Context*> callbacks;
void add_callback(Context *context) {
deep_errors = 0;
fixed = 0;
deep = false;
+ seed = 0;
run_callbacks();
inconsistent.clear();
missing.clear();
ThreadPool::TPHandle &handle);
void _request_scrub_map_classic(pg_shard_t replica, eversion_t version);
void _request_scrub_map(pg_shard_t replica, eversion_t version,
- hobject_t start, hobject_t end, bool deep);
+ hobject_t start, hobject_t end, bool deep,
+ uint32_t seed);
int build_scrub_map_chunk(
ScrubMap &map,
- hobject_t start, hobject_t end, bool deep,
+ hobject_t start, hobject_t end, bool deep, uint32_t seed,
ThreadPool::TPHandle &handle);
void build_scrub_map(ScrubMap &map, ThreadPool::TPHandle &handle);
void build_inc_scrub_map(
* pg lock may or may not be held
*/
void PGBackend::be_scan_list(
- ScrubMap &map, const vector<hobject_t> &ls, bool deep,
+ ScrubMap &map, const vector<hobject_t> &ls, bool deep, uint32_t seed,
ThreadPool::TPHandle &handle)
{
dout(10) << __func__ << " scanning " << ls.size() << " objects"
// calculate the CRC32 on deep scrubs
if (deep) {
- be_deep_scrub(*p, o, handle);
+ be_deep_scrub(*p, seed, o, handle);
}
dout(25) << __func__ << " " << poid << dendl;
virtual bool scrub_supported() { return false; }
void be_scan_list(
- ScrubMap &map, const vector<hobject_t> &ls, bool deep,
+ ScrubMap &map, const vector<hobject_t> &ls, bool deep, uint32_t seed,
ThreadPool::TPHandle &handle);
enum scrub_error_type be_compare_scrub_objects(
const ScrubMap::object &auth,
uint64_t logical_size) { assert(0); return 0; }
virtual void be_deep_scrub(
const hobject_t &poid,
+ uint32_t seed,
ScrubMap::object &o,
ThreadPool::TPHandle &handle) { assert(0); }
void ReplicatedBackend::be_deep_scrub(
const hobject_t &poid,
+ uint32_t seed,
ScrubMap::object &o,
ThreadPool::TPHandle &handle)
{
- bufferhash h, oh;
+ dout(10) << __func__ << " " << poid << " seed " << seed << dendl;
+ bufferhash h(seed), oh(seed);
bufferlist bl, hdrbl;
int r;
__u64 pos = 0;
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
&hdrbl, true);
- if (r == 0) {
+ // NOTE: bobtail to giant, we would crc the head as (len, head).
+ // that changes at the same time we start using a non-zero seed.
+ if (r == 0 && hdrbl.length()) {
dout(25) << "CRC header " << string(hdrbl.c_str(), hdrbl.length())
<< dendl;
- ::encode(hdrbl, bl);
- oh << bl;
- bl.clear();
+ if (seed == 0) {
+ // legacy
+ bufferlist bl;
+ ::encode(hdrbl, bl);
+ oh << bl;
+ } else {
+ oh << hdrbl;
+ }
} else if (r == -EIO) {
dout(25) << __func__ << " " << poid << " got "
<< r << " on omap header read, read_error" << dendl;
void be_deep_scrub(
const hobject_t &obj,
+ uint32_t seed,
ScrubMap::object &o,
ThreadPool::TPHandle &handle);
uint64_t be_get_ondisk_size(uint64_t logical_size) { return logical_size; }