*/
+#include "common/errno.h"
#include "PGBackend.h"
#include "OSD.h"
t->remove(
coll, ghobject_t(hoid, old_version, get_parent()->whoami_shard().shard));
}
+
+/*
+ * pg lock may or may not be held
+ */
+void PGBackend::be_scan_list(
+ ScrubMap &map, const vector<hobject_t> &ls, bool deep,
+ ThreadPool::TPHandle &handle)
+{
+ dout(10) << "_scan_list scanning " << ls.size() << " objects"
+ << (deep ? " deeply" : "") << dendl;
+ int i = 0;
+ for (vector<hobject_t>::const_iterator p = ls.begin();
+ p != ls.end();
+ ++p, i++) {
+ handle.reset_tp_timeout();
+ hobject_t poid = *p;
+
+ struct stat st;
+ int r = store->stat(
+ coll,
+ ghobject_t(
+ poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
+ &st,
+ true);
+ if (r == 0) {
+ ScrubMap::object &o = map.objects[poid];
+ o.size = st.st_size;
+ assert(!o.negative);
+ store->getattrs(
+ coll,
+ ghobject_t(
+ poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
+ o.attrs);
+
+ // calculate the CRC32 on deep scrubs
+ if (deep) {
+ be_deep_scrub(*p, o, handle);
+ }
+
+ dout(25) << "_scan_list " << poid << dendl;
+ } else if (r == -ENOENT) {
+ dout(25) << "_scan_list " << poid << " got " << r << ", skipping" << dendl;
+ } else if (r == -EIO) {
+ dout(25) << "_scan_list " << poid << " got " << r << ", read_error" << dendl;
+ ScrubMap::object &o = map.objects[poid];
+ o.read_error = true;
+ } else {
+ derr << "_scan_list got: " << cpp_strerror(r) << dendl;
+ assert(0);
+ }
+ }
+}
+
+enum scrub_error_type PGBackend::be_compare_scrub_objects(
+ const ScrubMap::object &auth,
+ const ScrubMap::object &candidate,
+ ostream &errorstream)
+{
+ enum scrub_error_type error = CLEAN;
+ if (candidate.read_error) {
+ // This can occur on stat() of a shallow scrub, but in that case size will
+ // be invalid, and this will be over-ridden below.
+ error = DEEP_ERROR;
+ errorstream << "candidate had a read error";
+ }
+ if (auth.digest_present && candidate.digest_present) {
+ if (auth.digest != candidate.digest) {
+ if (error != CLEAN)
+ errorstream << ", ";
+ error = DEEP_ERROR;
+
+ errorstream << "digest " << candidate.digest
+ << " != known digest " << auth.digest;
+ }
+ }
+ if (auth.omap_digest_present && candidate.omap_digest_present) {
+ if (auth.omap_digest != candidate.omap_digest) {
+ if (error != CLEAN)
+ errorstream << ", ";
+ error = DEEP_ERROR;
+
+ errorstream << "omap_digest " << candidate.omap_digest
+ << " != known omap_digest " << auth.omap_digest;
+ }
+ }
+ // Shallow error takes precendence because this will be seen by
+ // both types of scrubs.
+ if (auth.size != candidate.size) {
+ if (error != CLEAN)
+ errorstream << ", ";
+ error = SHALLOW_ERROR;
+ errorstream << "size " << candidate.size
+ << " != known size " << auth.size;
+ }
+ for (map<string,bufferptr>::const_iterator i = auth.attrs.begin();
+ i != auth.attrs.end();
+ ++i) {
+ if (!candidate.attrs.count(i->first)) {
+ if (error != CLEAN)
+ errorstream << ", ";
+ error = SHALLOW_ERROR;
+ errorstream << "missing attr " << i->first;
+ } else if (candidate.attrs.find(i->first)->second.cmp(i->second)) {
+ if (error != CLEAN)
+ errorstream << ", ";
+ error = SHALLOW_ERROR;
+ errorstream << "attr value mismatch " << i->first;
+ }
+ }
+ for (map<string,bufferptr>::const_iterator i = candidate.attrs.begin();
+ i != candidate.attrs.end();
+ ++i) {
+ if (!auth.attrs.count(i->first)) {
+ if (error != CLEAN)
+ errorstream << ", ";
+ error = SHALLOW_ERROR;
+ errorstream << "extra attr " << i->first;
+ }
+ }
+ return error;
+}
+
+map<pg_shard_t, ScrubMap *>::const_iterator
+ PGBackend::be_select_auth_object(
+ const hobject_t &obj,
+ const map<pg_shard_t,ScrubMap*> &maps)
+{
+ map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
+ for (map<pg_shard_t, ScrubMap *>::const_iterator j = maps.begin();
+ j != maps.end();
+ ++j) {
+ map<hobject_t, ScrubMap::object>::iterator i =
+ j->second->objects.find(obj);
+ if (i == j->second->objects.end()) {
+ continue;
+ }
+ if (auth == maps.end()) {
+ // Something is better than nothing
+ // TODO: something is NOT better than nothing, do something like
+ // unfound_lost if no valid copies can be found, or just mark unfound
+ auth = j;
+ dout(10) << __func__ << ": selecting osd " << j->first
+ << " for obj " << obj
+ << ", auth == maps.end()"
+ << dendl;
+ continue;
+ }
+ if (i->second.read_error) {
+ // scrub encountered read error, probably corrupt
+ dout(10) << __func__ << ": rejecting osd " << j->first
+ << " for obj " << obj
+ << ", read_error"
+ << dendl;
+ continue;
+ }
+ map<string, bufferptr>::iterator k = i->second.attrs.find(OI_ATTR);
+ if (k == i->second.attrs.end()) {
+ // no object info on object, probably corrupt
+ dout(10) << __func__ << ": rejecting osd " << j->first
+ << " for obj " << obj
+ << ", no oi attr"
+ << dendl;
+ continue;
+ }
+ bufferlist bl;
+ bl.push_back(k->second);
+ object_info_t oi;
+ try {
+ bufferlist::iterator bliter = bl.begin();
+ ::decode(oi, bliter);
+ } catch (...) {
+ dout(10) << __func__ << ": rejecting osd " << j->first
+ << " for obj " << obj
+ << ", corrupt oi attr"
+ << dendl;
+ // invalid object info, probably corrupt
+ continue;
+ }
+ if (oi.size != i->second.size) {
+ // invalid size, probably corrupt
+ dout(10) << __func__ << ": rejecting osd " << j->first
+ << " for obj " << obj
+ << ", size mismatch"
+ << dendl;
+ // invalid object info, probably corrupt
+ continue;
+ }
+ dout(10) << __func__ << ": selecting osd " << j->first
+ << " for obj " << obj
+ << dendl;
+ auth = j;
+ }
+ return auth;
+}
+
+void PGBackend::be_compare_scrubmaps(
+ const map<pg_shard_t,ScrubMap*> &maps,
+ map<hobject_t, set<pg_shard_t> > &missing,
+ map<hobject_t, set<pg_shard_t> > &inconsistent,
+ map<hobject_t, pg_shard_t> &authoritative,
+ map<hobject_t, set<pg_shard_t> > &invalid_snapcolls,
+ int &shallow_errors, int &deep_errors,
+ const spg_t pgid,
+ const vector<int> &acting,
+ ostream &errorstream)
+{
+ map<hobject_t,ScrubMap::object>::const_iterator i;
+ map<pg_shard_t, ScrubMap *>::const_iterator j;
+ set<hobject_t> master_set;
+
+ // Construct master set
+ for (j = maps.begin(); j != maps.end(); ++j) {
+ for (i = j->second->objects.begin(); i != j->second->objects.end(); ++i) {
+ master_set.insert(i->first);
+ }
+ }
+
+ // Check maps against master set and each other
+ for (set<hobject_t>::const_iterator k = master_set.begin();
+ k != master_set.end();
+ ++k) {
+ map<pg_shard_t, ScrubMap *>::const_iterator auth =
+ be_select_auth_object(*k, maps);
+ assert(auth != maps.end());
+ set<pg_shard_t> cur_missing;
+ set<pg_shard_t> cur_inconsistent;
+ for (j = maps.begin(); j != maps.end(); ++j) {
+ if (j == auth)
+ continue;
+ if (j->second->objects.count(*k)) {
+ // Compare
+ stringstream ss;
+ enum scrub_error_type error = be_compare_scrub_objects(auth->second->objects[*k],
+ j->second->objects[*k],
+ ss);
+ if (error != CLEAN) {
+ cur_inconsistent.insert(j->first);
+ if (error == SHALLOW_ERROR)
+ ++shallow_errors;
+ else
+ ++deep_errors;
+ errorstream << pgid << " shard " << j->first
+ << ": soid " << *k << " " << ss.str() << std::endl;
+ }
+ } else {
+ cur_missing.insert(j->first);
+ ++shallow_errors;
+ errorstream << pgid << " shard " << j->first
+ << " missing " << *k << std::endl;
+ }
+ }
+ assert(auth != maps.end());
+ if (!cur_missing.empty()) {
+ missing[*k] = cur_missing;
+ }
+ if (!cur_inconsistent.empty()) {
+ inconsistent[*k] = cur_inconsistent;
+ }
+ if (!cur_inconsistent.empty() || !cur_missing.empty()) {
+ authoritative[*k] = auth->first;
+ }
+ }
+}
Context *on_complete) = 0;
virtual bool scrub_supported() { return false; }
- virtual void be_scan_list(
+ void be_scan_list(
ScrubMap &map, const vector<hobject_t> &ls, bool deep,
- ThreadPool::TPHandle &handle) { assert(0); }
- virtual enum scrub_error_type be_compare_scrub_objects(
+ ThreadPool::TPHandle &handle);
+ enum scrub_error_type be_compare_scrub_objects(
const ScrubMap::object &auth,
const ScrubMap::object &candidate,
- ostream &errorstream) { assert(0); }
- virtual map<pg_shard_t, ScrubMap *>::const_iterator be_select_auth_object(
+ ostream &errorstream);
+ map<pg_shard_t, ScrubMap *>::const_iterator be_select_auth_object(
const hobject_t &obj,
- const map<pg_shard_t,ScrubMap*> &maps) { assert(0); }
- virtual void be_compare_scrubmaps(
+ const map<pg_shard_t,ScrubMap*> &maps);
+ void be_compare_scrubmaps(
const map<pg_shard_t,ScrubMap*> &maps,
map<hobject_t, set<pg_shard_t> > &missing,
map<hobject_t, set<pg_shard_t> > &inconsistent,
int &shallow_errors, int &deep_errors,
const spg_t pgid,
const vector<int> &acting,
- ostream &errorstream) { assert(0); }
+ ostream &errorstream);
+
+ virtual void be_deep_scrub(
+ const hobject_t &poid,
+ ScrubMap::object &o,
+ ThreadPool::TPHandle &handle) { assert(0); }
};
struct PG_SendMessageOnConn: public Context {
o.omap_digest = oh.digest();
o.omap_digest_present = true;
}
-
-/*
- * pg lock may or may not be held
- */
-void ReplicatedBackend::be_scan_list(
- ScrubMap &map, const vector<hobject_t> &ls, bool deep,
- ThreadPool::TPHandle &handle)
-{
- dout(10) << "_scan_list scanning " << ls.size() << " objects"
- << (deep ? " deeply" : "") << dendl;
- int i = 0;
- for (vector<hobject_t>::const_iterator p = ls.begin();
- p != ls.end();
- ++p, i++) {
- handle.reset_tp_timeout();
- hobject_t poid = *p;
-
- struct stat st;
- int r = store->stat(
- coll,
- ghobject_t(
- poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
- &st,
- true);
- if (r == 0) {
- ScrubMap::object &o = map.objects[poid];
- o.size = st.st_size;
- assert(!o.negative);
- store->getattrs(
- coll,
- ghobject_t(
- poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
- o.attrs);
-
- // calculate the CRC32 on deep scrubs
- if (deep) {
- be_deep_scrub(*p, o, handle);
- }
-
- dout(25) << "_scan_list " << poid << dendl;
- } else if (r == -ENOENT) {
- dout(25) << "_scan_list " << poid << " got " << r << ", skipping" << dendl;
- } else if (r == -EIO) {
- dout(25) << "_scan_list " << poid << " got " << r << ", read_error" << dendl;
- ScrubMap::object &o = map.objects[poid];
- o.read_error = true;
- } else {
- derr << "_scan_list got: " << cpp_strerror(r) << dendl;
- assert(0);
- }
- }
-}
-
-enum scrub_error_type ReplicatedBackend::be_compare_scrub_objects(
- const ScrubMap::object &auth,
- const ScrubMap::object &candidate,
- ostream &errorstream)
-{
- enum scrub_error_type error = CLEAN;
- if (candidate.read_error) {
- // This can occur on stat() of a shallow scrub, but in that case size will
- // be invalid, and this will be over-ridden below.
- error = DEEP_ERROR;
- errorstream << "candidate had a read error";
- }
- if (auth.digest_present && candidate.digest_present) {
- if (auth.digest != candidate.digest) {
- if (error != CLEAN)
- errorstream << ", ";
- error = DEEP_ERROR;
-
- errorstream << "digest " << candidate.digest
- << " != known digest " << auth.digest;
- }
- }
- if (auth.omap_digest_present && candidate.omap_digest_present) {
- if (auth.omap_digest != candidate.omap_digest) {
- if (error != CLEAN)
- errorstream << ", ";
- error = DEEP_ERROR;
-
- errorstream << "omap_digest " << candidate.omap_digest
- << " != known omap_digest " << auth.omap_digest;
- }
- }
- // Shallow error takes precendence because this will be seen by
- // both types of scrubs.
- if (auth.size != candidate.size) {
- if (error != CLEAN)
- errorstream << ", ";
- error = SHALLOW_ERROR;
- errorstream << "size " << candidate.size
- << " != known size " << auth.size;
- }
- for (map<string,bufferptr>::const_iterator i = auth.attrs.begin();
- i != auth.attrs.end();
- ++i) {
- if (!candidate.attrs.count(i->first)) {
- if (error != CLEAN)
- errorstream << ", ";
- error = SHALLOW_ERROR;
- errorstream << "missing attr " << i->first;
- } else if (candidate.attrs.find(i->first)->second.cmp(i->second)) {
- if (error != CLEAN)
- errorstream << ", ";
- error = SHALLOW_ERROR;
- errorstream << "attr value mismatch " << i->first;
- }
- }
- for (map<string,bufferptr>::const_iterator i = candidate.attrs.begin();
- i != candidate.attrs.end();
- ++i) {
- if (!auth.attrs.count(i->first)) {
- if (error != CLEAN)
- errorstream << ", ";
- error = SHALLOW_ERROR;
- errorstream << "extra attr " << i->first;
- }
- }
- return error;
-}
-
-map<pg_shard_t, ScrubMap *>::const_iterator
- ReplicatedBackend::be_select_auth_object(
- const hobject_t &obj,
- const map<pg_shard_t,ScrubMap*> &maps)
-{
- map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
- for (map<pg_shard_t, ScrubMap *>::const_iterator j = maps.begin();
- j != maps.end();
- ++j) {
- map<hobject_t, ScrubMap::object>::iterator i =
- j->second->objects.find(obj);
- if (i == j->second->objects.end()) {
- continue;
- }
- if (auth == maps.end()) {
- // Something is better than nothing
- // TODO: something is NOT better than nothing, do something like
- // unfound_lost if no valid copies can be found, or just mark unfound
- auth = j;
- dout(10) << __func__ << ": selecting osd " << j->first
- << " for obj " << obj
- << ", auth == maps.end()"
- << dendl;
- continue;
- }
- if (i->second.read_error) {
- // scrub encountered read error, probably corrupt
- dout(10) << __func__ << ": rejecting osd " << j->first
- << " for obj " << obj
- << ", read_error"
- << dendl;
- continue;
- }
- map<string, bufferptr>::iterator k = i->second.attrs.find(OI_ATTR);
- if (k == i->second.attrs.end()) {
- // no object info on object, probably corrupt
- dout(10) << __func__ << ": rejecting osd " << j->first
- << " for obj " << obj
- << ", no oi attr"
- << dendl;
- continue;
- }
- bufferlist bl;
- bl.push_back(k->second);
- object_info_t oi;
- try {
- bufferlist::iterator bliter = bl.begin();
- ::decode(oi, bliter);
- } catch (...) {
- dout(10) << __func__ << ": rejecting osd " << j->first
- << " for obj " << obj
- << ", corrupt oi attr"
- << dendl;
- // invalid object info, probably corrupt
- continue;
- }
- if (oi.size != i->second.size) {
- // invalid size, probably corrupt
- dout(10) << __func__ << ": rejecting osd " << j->first
- << " for obj " << obj
- << ", size mismatch"
- << dendl;
- // invalid object info, probably corrupt
- continue;
- }
- dout(10) << __func__ << ": selecting osd " << j->first
- << " for obj " << obj
- << dendl;
- auth = j;
- }
- return auth;
-}
-
-void ReplicatedBackend::be_compare_scrubmaps(
- const map<pg_shard_t,ScrubMap*> &maps,
- map<hobject_t, set<pg_shard_t> > &missing,
- map<hobject_t, set<pg_shard_t> > &inconsistent,
- map<hobject_t, pg_shard_t> &authoritative,
- map<hobject_t, set<pg_shard_t> > &invalid_snapcolls,
- int &shallow_errors, int &deep_errors,
- const spg_t pgid,
- const vector<int> &acting,
- ostream &errorstream)
-{
- map<hobject_t,ScrubMap::object>::const_iterator i;
- map<pg_shard_t, ScrubMap *>::const_iterator j;
- set<hobject_t> master_set;
-
- // Construct master set
- for (j = maps.begin(); j != maps.end(); ++j) {
- for (i = j->second->objects.begin(); i != j->second->objects.end(); ++i) {
- master_set.insert(i->first);
- }
- }
-
- // Check maps against master set and each other
- for (set<hobject_t>::const_iterator k = master_set.begin();
- k != master_set.end();
- ++k) {
- map<pg_shard_t, ScrubMap *>::const_iterator auth =
- be_select_auth_object(*k, maps);
- assert(auth != maps.end());
- set<pg_shard_t> cur_missing;
- set<pg_shard_t> cur_inconsistent;
- for (j = maps.begin(); j != maps.end(); ++j) {
- if (j == auth)
- continue;
- if (j->second->objects.count(*k)) {
- // Compare
- stringstream ss;
- enum scrub_error_type error = be_compare_scrub_objects(auth->second->objects[*k],
- j->second->objects[*k],
- ss);
- if (error != CLEAN) {
- cur_inconsistent.insert(j->first);
- if (error == SHALLOW_ERROR)
- ++shallow_errors;
- else
- ++deep_errors;
- errorstream << pgid << " shard " << j->first
- << ": soid " << *k << " " << ss.str() << std::endl;
- }
- } else {
- cur_missing.insert(j->first);
- ++shallow_errors;
- errorstream << pgid << " shard " << j->first
- << " missing " << *k << std::endl;
- }
- }
- assert(auth != maps.end());
- if (!cur_missing.empty()) {
- missing[*k] = cur_missing;
- }
- if (!cur_inconsistent.empty()) {
- inconsistent[*k] = cur_inconsistent;
- }
- if (!cur_inconsistent.empty() || !cur_missing.empty()) {
- authoritative[*k] = auth->first;
- }
- }
-}
void sub_op_modify_commit(RepModifyRef rm);
bool scrub_supported() { return true; }
- void be_scan_list(
- ScrubMap &map, const vector<hobject_t> &ls, bool deep,
- ThreadPool::TPHandle &handle);
- enum scrub_error_type be_compare_scrub_objects(
- const ScrubMap::object &auth,
- const ScrubMap::object &candidate,
- ostream &errorstream);
- map<pg_shard_t, ScrubMap *>::const_iterator be_select_auth_object(
- const hobject_t &obj,
- const map<pg_shard_t,ScrubMap*> &maps);
- void be_compare_scrubmaps(
- const map<pg_shard_t,ScrubMap*> &maps,
- map<hobject_t, set<pg_shard_t> > &missing,
- map<hobject_t, set<pg_shard_t> > &inconsistent,
- map<hobject_t, pg_shard_t> &authoritative,
- map<hobject_t, set<pg_shard_t> > &invalid_snapcolls,
- int &shallow_errors, int &deep_errors,
- const spg_t pgid,
- const vector<int> &acting,
- ostream &errorstream);
void be_deep_scrub(
const hobject_t &obj,
ScrubMap::object &o,