* dump-trace < --trace-file arg >
* replay-trace
* random-gen
+ * rewrite-crush
*
* wanted syntax:
*
<< " (replay-trace -- --help for more info)\n"
<< " random-gen [-- options] add randomly generated ops to the store\n"
<< " (random-gen -- --help for more info)\n"
+ << " rewrite-crush [-- options] add a rewrite commit to the store\n"
+ << " (rewrite-crush -- --help for more info)\n"
<< std::endl;
std::cerr << d << std::endl;
std::cerr
<< std::endl;
}
+int update_osdmap(MonitorDBStore& store, version_t ver, bool copy,
+ ceph::shared_ptr<CrushWrapper> crush,
+ MonitorDBStore::Transaction* t) {
+ const string prefix("osdmap");
+
+ // full
+ bufferlist bl;
+ int r = 0;
+ r = store.get(prefix, store.combine_strings("full", ver), bl);
+ if (r) {
+ std::cerr << "Error getting full map: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ OSDMap osdmap;
+ osdmap.decode(bl);
+ osdmap.crush = crush;
+ if (copy) {
+ osdmap.inc_epoch();
+ }
+ bl.clear();
+ // be consistent with OSDMonitor::update_from_paxos()
+ osdmap.encode(bl, CEPH_FEATURES_ALL|CEPH_FEATURE_RESERVED);
+ t->put(prefix, store.combine_strings("full", osdmap.get_epoch()), bl);
+
+ // incremental
+ OSDMap::Incremental inc;
+ if (copy) {
+ inc.epoch = osdmap.get_epoch();
+ inc.fsid = osdmap.get_fsid();
+ } else {
+ bl.clear();
+ r = store.get(prefix, ver, bl);
+ if (r) {
+ std::cerr << "Error getting inc map: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ OSDMap::Incremental inc(bl);
+ if (inc.crush.length()) {
+ inc.crush.clear();
+ crush->encode(inc.crush);
+ }
+ if (inc.fullmap.length()) {
+ OSDMap fullmap;
+ fullmap.decode(inc.fullmap);
+ fullmap.crush = crush;
+ inc.fullmap.clear();
+ fullmap.encode(inc.fullmap);
+ }
+ }
+ assert(osdmap.have_crc());
+ inc.full_crc = osdmap.get_crc();
+ bl.clear();
+ // be consistent with OSDMonitor::update_from_paxos()
+ inc.encode(bl, CEPH_FEATURES_ALL|CEPH_FEATURE_RESERVED);
+ t->put(prefix, inc.epoch, bl);
+ return 0;
+}
+
+int rewrite_transaction(MonitorDBStore& store, int version,
+ const string& crush_file,
+ MonitorDBStore::Transaction* t) {
+ const string prefix("osdmap");
+
+ // calc the known-good epoch
+ version_t last_committed = store.get(prefix, "last_committed");
+ version_t good_version = 0;
+ if (version <= 0) {
+ if (last_committed >= (unsigned)-version) {
+ good_version = last_committed + version;
+ } else {
+ std::cerr << "osdmap-version is less than: -" << last_committed << std::endl;
+ return EINVAL;
+ }
+ } else {
+ good_version = version;
+ }
+ if (good_version >= last_committed) {
+ std::cout << "good epoch is greater or equal to the last committed one: "
+ << good_version << " >= " << last_committed << std::endl;
+ return 0;
+ }
+
+ // load/extract the crush map
+ int r = 0;
+ ceph::shared_ptr<CrushWrapper> crush(new CrushWrapper);
+ if (crush_file.empty()) {
+ bufferlist bl;
+ r = store.get(prefix, store.combine_strings("full", good_version), bl);
+ if (r) {
+ std::cerr << "Error getting map: " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ OSDMap osdmap;
+ osdmap.decode(bl);
+ crush = osdmap.crush;
+ } else {
+ string err;
+ bufferlist bl;
+ r = bl.read_file(crush_file.c_str(), &err);
+ if (r) {
+ std::cerr << err << ": " << cpp_strerror(r) << std::endl;
+ return r;
+ }
+ bufferlist::iterator p = bl.begin();
+ crush->decode(p);
+ }
+
+ // prepare a transaction to rewrite the epochs
+ // (good_version, last_committed]
+ // with the good crush map.
+ // XXX: may need to break this into several paxos versions?
+ assert(good_version < last_committed);
+ for (version_t v = good_version + 1; v <= last_committed; v++) {
+ cout << "rewriting epoch #" << v << "/" << last_committed << std::endl;
+ r = update_osdmap(store, v, false, crush, t);
+ if (r)
+ return r;
+ }
+
+ // add a new osdmap epoch to store, so monitors will update their current osdmap
+ // in addition to the ones stored in epochs.
+ cout << "adding a new epoch #" << last_committed+1 << std::endl;
+ r = update_osdmap(store, last_committed++, true, crush, t);
+ if (r)
+ return r;
+ t->put(prefix, store.combine_strings("full", "latest"), last_committed);
+ t->put(prefix, "last_committed", last_committed);
+ return 0;
+}
+
+/**
+ * create a new paxos version which carries a proposal to rewrite all epochs
+ * of incremental and full map of "osdmap" after a faulty crush map is injected.
+ * so the leader will trigger a recovery and propagate this fix to its peons,
+ * after the proposal is accepted, and the transaction in it is applied. all
+ * monitors will rewrite the bad crush map with the good one, and have a new
+ * osdmap epoch with the good crush map in it.
+ */
+int rewrite_crush(const char* progname,
+ vector<string>& subcmds,
+ MonitorDBStore& store) {
+ po::options_description op_desc("Allowed 'rewrite-crush' options");
+ int version = -1;
+ string crush_file;
+ op_desc.add_options()
+ ("help,h", "produce this help message")
+ ("crush", po::value<string>(&crush_file),
+ ("path to the crush map file "
+ "(default: will instead extract it from the known-good osdmap)"))
+ ("good-epoch", po::value<int>(&version),
+ "known-good epoch of osdmap, if a negative number '-N' is given, the "
+ "$last_committed-N is used instead (default: -1). "
+ "Please note, -1 is not necessarily a good epoch, because there are "
+ "good chance that we have more epochs slipped into the monstore after "
+ "the one where the crushmap is firstly injected.")
+ ;
+ po::variables_map op_vm;
+ int r = parse_cmd_args(&op_desc, NULL, NULL, subcmds, &op_vm);
+ if (r) {
+ return -r;
+ }
+ if (op_vm.count("help")) {
+ usage(progname, op_desc);
+ return 0;
+ }
+
+ MonitorDBStore::Transaction rewrite_txn;
+ r = rewrite_transaction(store, version, crush_file, &rewrite_txn);
+ if (r) {
+ return r;
+ }
+
+ // store the transaction into store as a proposal
+ const string prefix("paxos");
+ version_t pending_v = store.get(prefix, "last_committed") + 1;
+ MonitorDBStore::TransactionRef t(new MonitorDBStore::Transaction);
+ bufferlist bl;
+ rewrite_txn.encode(bl);
+ cout << "adding pending commit " << pending_v
+ << " " << bl.length() << " bytes" << std::endl;
+ t->put(prefix, pending_v, bl);
+ t->put(prefix, "pending_v", pending_v);
+ // a large enough yet unique proposal number will probably do the trick
+ version_t pending_pn = (store.get(prefix, "accepted_pn") / 100 + 4) * 100 + 1;
+ t->put(prefix, "pending_pn", pending_pn);
+ store.apply_transaction(t);
+ return 0;
+}
+
int main(int argc, char **argv) {
int err = 0;
po::options_description desc("Allowed options");
<< stringify(si_t(total_size)) << std::endl;
std::cout << "from '" << store_path << "' to '" << out_path << "'"
<< std::endl;
+ } else if (cmd == "rewrite-crush") {
+ err = rewrite_crush(argv[0], subcmds, st);
} else {
std::cerr << "Unrecognized command: " << cmd << std::endl;
usage(argv[0], desc);