}
return true;
}
-
-#undef dout_prefix
-#define dout_prefix *_dout
-
-void Monitor::StoreConverter::_convert_finish_features(
- MonitorDBStore::TransactionRef t)
-{
- dout(20) << __func__ << dendl;
-
- assert(db->exists(MONITOR_NAME, COMPAT_SET_LOC));
- bufferlist features_bl;
- db->get(MONITOR_NAME, COMPAT_SET_LOC, features_bl);
- assert(features_bl.length());
-
- CompatSet features;
- bufferlist::iterator p = features_bl.begin();
- features.decode(p);
-
- assert(features.incompat.contains(CEPH_MON_FEATURE_INCOMPAT_GV));
- features.incompat.remove(CEPH_MON_FEATURE_INCOMPAT_GV);
- assert(!features.incompat.contains(CEPH_MON_FEATURE_INCOMPAT_GV));
-
- features.incompat.insert(CEPH_MON_FEATURE_INCOMPAT_SINGLE_PAXOS);
- assert(features.incompat.contains(CEPH_MON_FEATURE_INCOMPAT_SINGLE_PAXOS));
-
- features_bl.clear();
- features.encode(features_bl);
-
- dout(20) << __func__ << " new features " << features << dendl;
- t->put(MONITOR_NAME, COMPAT_SET_LOC, features_bl);
-}
-
-
-bool Monitor::StoreConverter::_check_gv_store()
-{
- dout(20) << __func__ << dendl;
- if (!store->exists_bl_ss(COMPAT_SET_LOC, 0))
- return false;
-
- bufferlist features_bl;
- store->get_bl_ss_safe(features_bl, COMPAT_SET_LOC, 0);
- if (!features_bl.length()) {
- dout(20) << __func__ << " on-disk features length is zero" << dendl;
- return false;
- }
- CompatSet features;
- bufferlist::iterator p = features_bl.begin();
- features.decode(p);
- return (features.incompat.contains(CEPH_MON_FEATURE_INCOMPAT_GV));
-}
-
-int Monitor::StoreConverter::needs_conversion()
-{
- bufferlist magicbl;
- int ret = 0;
-
- dout(10) << "check if store needs conversion from legacy format" << dendl;
- _init();
-
- int err = store->mount();
- if (err < 0) {
- if (err == -ENOENT) {
- derr << "unable to mount monitor store: "
- << cpp_strerror(err) << dendl;
- } else {
- derr << "it appears that another monitor is running: "
- << cpp_strerror(err) << dendl;
- }
- ret = err;
- goto out;
- }
- assert(err == 0);
-
- if (store->exists_bl_ss("magic", 0)) {
- if (_check_gv_store()) {
- dout(1) << "found old GV monitor store format "
- << "-- should convert!" << dendl;
- ret = 1;
- } else {
- dout(0) << "Existing monitor store has not been converted "
- << "to 0.52 (bobtail) format" << dendl;
- assert(0 == "Existing store has not been converted to 0.52 format");
- }
- }
- assert(!store->umount());
-
-out:
- _deinit();
- return ret;
-}
-
-int Monitor::StoreConverter::convert()
-{
- _init();
- assert(!store->mount());
- if (db->exists("mon_convert", "on_going")) {
- dout(0) << __func__ << " found a mon store in mid-convertion; abort!"
- << dendl;
- return -EEXIST;
- }
-
- _mark_convert_start();
- _convert_monitor();
- _convert_machines();
- _convert_paxos();
- _mark_convert_finish();
-
- store->umount();
- _deinit();
-
- dout(0) << __func__ << " finished conversion" << dendl;
-
- return 0;
-}
-
-void Monitor::StoreConverter::_convert_monitor()
-{
- dout(10) << __func__ << dendl;
-
- assert(store->exists_bl_ss("magic"));
- assert(store->exists_bl_ss("keyring"));
- assert(store->exists_bl_ss("feature_set"));
- assert(store->exists_bl_ss("election_epoch"));
-
- MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
-
- if (store->exists_bl_ss("joined")) {
- version_t joined = store->get_int("joined");
- tx->put(MONITOR_NAME, "joined", joined);
- }
-
- vector<string> keys;
- keys.push_back("magic");
- keys.push_back("feature_set");
- keys.push_back("cluster_uuid");
-
- vector<string>::iterator it;
- for (it = keys.begin(); it != keys.end(); ++it) {
- if (!store->exists_bl_ss((*it).c_str()))
- continue;
-
- bufferlist bl;
- int r = store->get_bl_ss(bl, (*it).c_str(), 0);
- assert(r > 0);
- tx->put(MONITOR_NAME, *it, bl);
- }
- version_t election_epoch = store->get_int("election_epoch");
- tx->put(MONITOR_NAME, "election_epoch", election_epoch);
-
- assert(!tx->empty());
- db->apply_transaction(tx);
- dout(10) << __func__ << " finished" << dendl;
-}
-
-void Monitor::StoreConverter::_convert_machines(string machine)
-{
- dout(10) << __func__ << " " << machine << dendl;
-
- version_t first_committed =
- store->get_int(machine.c_str(), "first_committed");
- version_t last_committed =
- store->get_int(machine.c_str(), "last_committed");
-
- version_t accepted_pn = store->get_int(machine.c_str(), "accepted_pn");
- version_t last_pn = store->get_int(machine.c_str(), "last_pn");
-
- if (accepted_pn > highest_accepted_pn)
- highest_accepted_pn = accepted_pn;
- if (last_pn > highest_last_pn)
- highest_last_pn = last_pn;
-
- string machine_gv(machine);
- machine_gv.append("_gv");
- bool has_gv = true;
-
- if (!store->exists_bl_ss(machine_gv.c_str())) {
- dout(1) << __func__ << " " << machine
- << " no gv dir '" << machine_gv << "'" << dendl;
- has_gv = false;
- }
-
- for (version_t ver = first_committed; ver <= last_committed; ver++) {
- if (!store->exists_bl_sn(machine.c_str(), ver)) {
- dout(20) << __func__ << " " << machine
- << " ver " << ver << " dne" << dendl;
- continue;
- }
-
- bufferlist bl;
- int r = store->get_bl_sn(bl, machine.c_str(), ver);
- assert(r >= 0);
- dout(20) << __func__ << " " << machine
- << " ver " << ver << " bl " << bl.length() << dendl;
-
- MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
- tx->put(machine, ver, bl);
- tx->put(machine, "last_committed", ver);
-
- if (has_gv && store->exists_bl_sn(machine_gv.c_str(), ver)) {
- stringstream s;
- s << ver;
- string ver_str = s.str();
-
- version_t gv = store->get_int(machine_gv.c_str(), ver_str.c_str());
- dout(20) << __func__ << " " << machine
- << " ver " << ver << " -> " << gv << dendl;
-
- MonitorDBStore::TransactionRef paxos_tx(new MonitorDBStore::Transaction);
-
- if (gvs.count(gv) == 0) {
- gvs.insert(gv);
- } else {
- dout(0) << __func__ << " " << machine
- << " gv " << gv << " already exists"
- << dendl;
-
- // Duplicates aren't supposed to happen, but an old bug introduced
- // them and the mds state machine wasn't ever trimmed, so many users
- // will see them. So we'll just merge them all in one
- // single paxos version.
- // We know that they are either from another paxos machine or
- // they are from the same paxos machine but their version is
- // lower than ours -- given that we are iterating all versions
- // from the lowest to the highest, duh!
- // We'll just append our stuff to the existing paxos transaction
- // as if nothing had happened.
-
- // Just make sure we are correct. This shouldn't take long and
- // should never be triggered!
- set<pair<string,version_t> >& s = gv_map[gv];
- for (set<pair<string,version_t> >::iterator it = s.begin();
- it != s.end(); ++it) {
- if (it->first == machine)
- assert(it->second + 1 == ver);
- }
-
- bufferlist paxos_bl;
- int r = db->get("paxos", gv, paxos_bl);
- assert(r >= 0);
- paxos_tx->append_from_encoded(paxos_bl);
- }
- gv_map[gv].insert(make_pair(machine,ver));
-
- bufferlist tx_bl;
- tx->encode(tx_bl);
- paxos_tx->append_from_encoded(tx_bl);
- bufferlist paxos_bl;
- paxos_tx->encode(paxos_bl);
- tx->put("paxos", gv, paxos_bl);
- }
- db->apply_transaction(tx);
- }
-
- version_t lc = db->get(machine, "last_committed");
- dout(20) << __func__ << " lc " << lc << " last_committed " << last_committed << dendl;
- assert(lc == last_committed);
-
- MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
- tx->put(machine, "first_committed", first_committed);
- tx->put(machine, "last_committed", last_committed);
- tx->put(machine, "conversion_first", first_committed);
-
- if (store->exists_bl_ss(machine.c_str(), "latest")) {
- bufferlist latest_bl_raw;
- int r = store->get_bl_ss(latest_bl_raw, machine.c_str(), "latest");
- assert(r >= 0);
- if (!latest_bl_raw.length()) {
- dout(20) << __func__ << " machine " << machine
- << " skip latest with size 0" << dendl;
- goto out;
- }
-
- tx->put(machine, "latest", latest_bl_raw);
-
- bufferlist::iterator lbl_it = latest_bl_raw.begin();
- bufferlist latest_bl;
- version_t latest_ver;
- ::decode(latest_ver, lbl_it);
- ::decode(latest_bl, lbl_it);
-
- dout(20) << __func__ << " machine " << machine
- << " latest ver " << latest_ver << dendl;
-
- tx->put(machine, "full_latest", latest_ver);
- stringstream os;
- os << "full_" << latest_ver;
- tx->put(machine, os.str(), latest_bl);
- }
-out:
- db->apply_transaction(tx);
- dout(10) << __func__ << " machine " << machine << " finished" << dendl;
-}
-
-void Monitor::StoreConverter::_convert_osdmap_full()
-{
- dout(10) << __func__ << dendl;
- version_t first_committed =
- store->get_int("osdmap", "first_committed");
- version_t last_committed =
- store->get_int("osdmap", "last_committed");
-
- int err = 0;
- for (version_t ver = first_committed; ver <= last_committed; ver++) {
- if (!store->exists_bl_sn("osdmap_full", ver)) {
- dout(20) << __func__ << " osdmap_full ver " << ver << " dne" << dendl;
- err++;
- continue;
- }
-
- bufferlist bl;
- int r = store->get_bl_sn(bl, "osdmap_full", ver);
- assert(r >= 0);
- dout(20) << __func__ << " osdmap_full ver " << ver
- << " bl " << bl.length() << " bytes" << dendl;
-
- string full_key = "full_" + stringify(ver);
- MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
- tx->put("osdmap", full_key, bl);
- db->apply_transaction(tx);
- }
- dout(10) << __func__ << " found " << err << " conversion errors!" << dendl;
- assert(err == 0);
-}
-
-void Monitor::StoreConverter::_convert_paxos()
-{
- dout(10) << __func__ << dendl;
- assert(!gvs.empty());
-
- set<version_t>::reverse_iterator rit = gvs.rbegin();
- version_t highest_gv = *rit;
- version_t last_gv = highest_gv;
-
- int n = 0;
- int max_versions = (g_conf->paxos_max_join_drift*2);
- for (; (rit != gvs.rend()) && (n < max_versions); ++rit, ++n) {
- version_t gv = *rit;
-
- if (last_gv == gv)
- continue;
- if ((last_gv - gv) > 1) {
- // we are done; we found a gap and we are only interested in keeping
- // contiguous paxos versions.
- break;
- }
- last_gv = gv;
- }
-
- // erase all paxos versions between [first, last_gv[, with first being the
- // first gv in the map.
- MonitorDBStore::TransactionRef tx(new MonitorDBStore::Transaction);
- set<version_t>::iterator it = gvs.begin();
- dout(1) << __func__ << " first gv " << (*it)
- << " last gv " << last_gv << dendl;
- for (; it != gvs.end() && (*it < last_gv); ++it) {
- tx->erase("paxos", *it);
- }
- tx->put("paxos", "first_committed", last_gv);
- tx->put("paxos", "last_committed", highest_gv);
- tx->put("paxos", "accepted_pn", highest_accepted_pn);
- tx->put("paxos", "last_pn", highest_last_pn);
- tx->put("paxos", "conversion_first", last_gv);
- db->apply_transaction(tx);
-
- dout(10) << __func__ << " finished" << dendl;
-}
-
-void Monitor::StoreConverter::_convert_machines()
-{
- dout(10) << __func__ << dendl;
- set<string> machine_names = _get_machines_names();
- set<string>::iterator it = machine_names.begin();
-
- for (; it != machine_names.end(); ++it) {
- _convert_machines(*it);
- }
- // convert osdmap full versions
- // this stays here as these aren't really an independent paxos
- // machine, but rather machine-specific and don't fit on the
- // _convert_machines(string) function.
- _convert_osdmap_full();
-
- dout(10) << __func__ << " finished" << dendl;
-}