if ((lock->get_type() == CEPH_LOCK_ISNAP ||
lock->get_type() == CEPH_LOCK_IPOLICY) &&
mds->is_cluster_degraded() &&
- mdr->is_master() &&
+ mdr->is_leader() &&
!mdr->is_queued_for_replay()) {
// waiting for recovering mds, to guarantee replayed requests and mksnap/setlayout
// get processed in proper order.
CDentry *dn = static_cast<CDentry*>(object);
if (!dn->is_auth())
continue;
- if (mdr->is_master()) {
- // master. wrlock versionlock so we can pipeline dentry updates to journal.
+ if (mdr->is_leader()) {
+ // leader. wrlock versionlock so we can pipeline dentry updates to journal.
lov.add_wrlock(&dn->versionlock, i + 1);
} else {
// slave. exclusively lock the dentry version (i.e. block other journal updates).
CInode *in = static_cast<CInode*>(object);
if (!in->is_auth())
continue;
- if (mdr->is_master()) {
- // master. wrlock versionlock so we can pipeline inode updates to journal.
+ if (mdr->is_leader()) {
+ // leader. wrlock versionlock so we can pipeline inode updates to journal.
lov.add_wrlock(&in->versionlock, i + 1);
} else {
// slave. exclusively lock the inode version (i.e. block other journal updates).
mustpin.insert(object);
} else if (!object->is_auth() &&
!lock->can_wrlock(_client) && // we might have to request a scatter
- !mdr->is_slave()) { // if we are slave (remote_wrlock), the master already authpinned
+ !mdr->is_slave()) { // if we are slave (remote_wrlock), the leader already authpinned
dout(15) << " will also auth_pin " << *object
<< " in case we need to request a scatter" << dendl;
mustpin.insert(object);
continue;
}
- ceph_assert(mdr->is_master());
+ ceph_assert(mdr->is_leader());
if (lock->needs_recover()) {
if (mds->is_cluster_degraded()) {
if (!mdr->is_queued_for_replay()) {
*
* We can defer while freezing without causing a deadlock. Honor
* scatter_wanted flag here. This will never get deferred by the
- * checks above due to the auth_pin held by the master.
+ * checks above due to the auth_pin held by the leader.
*/
if (lock->is_scatterlock()) {
ScatterLock *slock = static_cast<ScatterLock *>(lock);
MDSContext* purged_cb = nullptr;
map<int, ceph::unordered_set<version_t> > pending_commit_tids; // mdstable
- set<metareqid_t> uncommitted_masters;
+ set<metareqid_t> uncommitted_leaders;
set<metareqid_t> uncommitted_slaves;
set<dirfrag_t> uncommitted_fragments;
/*
- * some handlers for master requests with slaves. we need to make
- * sure slaves journal commits before we forget we mastered them and
- * remove them from the uncommitted_masters map (used during recovery
+ * some handlers for leader requests with slaves. we need to make
+ * sure slaves journal commits before we forget we leadered them and
+ * remove them from the uncommitted_leaders map (used during recovery
* to commit|abort slaves).
*/
-struct C_MDC_CommittedMaster : public MDCacheLogContext {
+struct C_MDC_CommittedLeader : public MDCacheLogContext {
metareqid_t reqid;
- C_MDC_CommittedMaster(MDCache *s, metareqid_t r) : MDCacheLogContext(s), reqid(r) {}
+ C_MDC_CommittedLeader(MDCache *s, metareqid_t r) : MDCacheLogContext(s), reqid(r) {}
void finish(int r) override {
- mdcache->_logged_master_commit(reqid);
+ mdcache->_logged_leader_commit(reqid);
}
};
-void MDCache::log_master_commit(metareqid_t reqid)
+void MDCache::log_leader_commit(metareqid_t reqid)
{
- dout(10) << "log_master_commit " << reqid << dendl;
- uncommitted_masters[reqid].committing = true;
+ dout(10) << "log_leader_commit " << reqid << dendl;
+ uncommitted_leaders[reqid].committing = true;
mds->mdlog->start_submit_entry(new ECommitted(reqid),
- new C_MDC_CommittedMaster(this, reqid));
+ new C_MDC_CommittedLeader(this, reqid));
}
-void MDCache::_logged_master_commit(metareqid_t reqid)
+void MDCache::_logged_leader_commit(metareqid_t reqid)
{
- dout(10) << "_logged_master_commit " << reqid << dendl;
- ceph_assert(uncommitted_masters.count(reqid));
- uncommitted_masters[reqid].ls->uncommitted_masters.erase(reqid);
- mds->queue_waiters(uncommitted_masters[reqid].waiters);
- uncommitted_masters.erase(reqid);
+ dout(10) << "_logged_leader_commit " << reqid << dendl;
+ ceph_assert(uncommitted_leaders.count(reqid));
+ uncommitted_leaders[reqid].ls->uncommitted_leaders.erase(reqid);
+ mds->queue_waiters(uncommitted_leaders[reqid].waiters);
+ uncommitted_leaders.erase(reqid);
}
// while active...
-void MDCache::committed_master_slave(metareqid_t r, mds_rank_t from)
+void MDCache::committed_leader_slave(metareqid_t r, mds_rank_t from)
{
- dout(10) << "committed_master_slave mds." << from << " on " << r << dendl;
- ceph_assert(uncommitted_masters.count(r));
- uncommitted_masters[r].slaves.erase(from);
- if (!uncommitted_masters[r].recovering && uncommitted_masters[r].slaves.empty())
- log_master_commit(r);
+ dout(10) << "committed_leader_slave mds." << from << " on " << r << dendl;
+ ceph_assert(uncommitted_leaders.count(r));
+ uncommitted_leaders[r].slaves.erase(from);
+ if (!uncommitted_leaders[r].recovering && uncommitted_leaders[r].slaves.empty())
+ log_leader_commit(r);
}
-void MDCache::logged_master_update(metareqid_t reqid)
+void MDCache::logged_leader_update(metareqid_t reqid)
{
- dout(10) << "logged_master_update " << reqid << dendl;
- ceph_assert(uncommitted_masters.count(reqid));
- uncommitted_masters[reqid].safe = true;
- auto p = pending_masters.find(reqid);
- if (p != pending_masters.end()) {
- pending_masters.erase(p);
- if (pending_masters.empty())
+ dout(10) << "logged_leader_update " << reqid << dendl;
+ ceph_assert(uncommitted_leaders.count(reqid));
+ uncommitted_leaders[reqid].safe = true;
+ auto p = pending_leaders.find(reqid);
+ if (p != pending_leaders.end()) {
+ pending_leaders.erase(p);
+ if (pending_leaders.empty())
process_delayed_resolve();
}
}
/*
- * Master may crash after receiving all slaves' commit acks, but before journalling
+ * Leader may crash after receiving all slaves' commit acks, but before journalling
* the final commit. Slaves may crash after journalling the slave commit, but before
- * sending commit ack to the master. Commit masters with no uncommitted slave when
+ * sending commit ack to the leader. Commit leaders with no uncommitted slave when
* resolve finishes.
*/
-void MDCache::finish_committed_masters()
+void MDCache::finish_committed_leaders()
{
- for (map<metareqid_t, umaster>::iterator p = uncommitted_masters.begin();
- p != uncommitted_masters.end();
+ for (map<metareqid_t, uleader>::iterator p = uncommitted_leaders.begin();
+ p != uncommitted_leaders.end();
++p) {
p->second.recovering = false;
if (!p->second.committing && p->second.slaves.empty()) {
- dout(10) << "finish_committed_masters " << p->first << dendl;
- log_master_commit(p->first);
+ dout(10) << "finish_committed_leaders " << p->first << dendl;
+ log_leader_commit(p->first);
}
}
}
* at end of resolve... we must journal a commit|abort for all slave
* updates, before moving on.
*
- * this is so that the master can safely journal ECommitted on ops it
- * masters when it reaches up:active (all other recovering nodes must
+ * this is so that the leader can safely journal ECommitted on ops it
+ * leaders when it reaches up:active (all other recovering nodes must
* complete resolve before that happens).
*/
struct C_MDC_SlaveCommit : public MDCacheLogContext {
for (map<metareqid_t, uslave>::iterator p = uncommitted_slaves.begin();
p != uncommitted_slaves.end();
++p) {
- mds_rank_t master = p->second.master;
- auto &m = resolves[master];
+ mds_rank_t leader = p->second.leader;
+ auto &m = resolves[leader];
if (!m) m = make_message<MMDSResolve>();
m->add_slave_request(p->first, false);
}
if (!mdr->slave_did_prepare() && !mdr->committing) {
continue;
}
- mds_rank_t master = mdr->slave_to_mds;
- if (resolve_set.count(master) || is_ambiguous_slave_update(p->first, master)) {
+ mds_rank_t leader = mdr->slave_to_mds;
+ if (resolve_set.count(leader) || is_ambiguous_slave_update(p->first, leader)) {
dout(10) << " including uncommitted " << *mdr << dendl;
- if (!resolves.count(master))
- resolves[master] = make_message<MMDSResolve>();
+ if (!resolves.count(leader))
+ resolves[leader] = make_message<MMDSResolve>();
if (!mdr->committing &&
mdr->has_more() && mdr->more()->is_inode_exporter) {
// re-send cap exports
bufferlist bl;
MMDSResolve::slave_inode_cap inode_caps(in->ino(), cap_map);
encode(inode_caps, bl);
- resolves[master]->add_slave_request(p->first, bl);
+ resolves[leader]->add_slave_request(p->first, bl);
} else {
- resolves[master]->add_slave_request(p->first, mdr->committing);
+ resolves[leader]->add_slave_request(p->first, mdr->committing);
}
}
}
}
// failed node is slave?
- if (mdr->is_master() && !mdr->committing) {
+ if (mdr->is_leader() && !mdr->committing) {
if (mdr->more()->srcdn_auth_mds == who) {
- dout(10) << " master request " << *mdr << " waiting for rename srcdn's auth mds."
+ dout(10) << " leader request " << *mdr << " waiting for rename srcdn's auth mds."
<< who << " to recover" << dendl;
ceph_assert(mdr->more()->witnessed.count(who) == 0);
if (mdr->more()->is_ambiguous_auth)
mdr->clear_ambiguous_auth();
// rename srcdn's auth mds failed, all witnesses will rollback
mdr->more()->witnessed.clear();
- pending_masters.erase(p->first);
+ pending_leaders.erase(p->first);
}
if (mdr->more()->witnessed.count(who)) {
mds_rank_t srcdn_auth = mdr->more()->srcdn_auth_mds;
if (srcdn_auth >= 0 && mdr->more()->waiting_on_slave.count(srcdn_auth)) {
- dout(10) << " master request " << *mdr << " waiting for rename srcdn's auth mds."
+ dout(10) << " leader request " << *mdr << " waiting for rename srcdn's auth mds."
<< mdr->more()->srcdn_auth_mds << " to reply" << dendl;
// waiting for the slave (rename srcdn's auth mds), delay sending resolve ack
// until either the request is committing or the slave also fails.
ceph_assert(mdr->more()->waiting_on_slave.size() == 1);
- pending_masters.insert(p->first);
+ pending_leaders.insert(p->first);
} else {
- dout(10) << " master request " << *mdr << " no longer witnessed by slave mds."
+ dout(10) << " leader request " << *mdr << " no longer witnessed by slave mds."
<< who << " to recover" << dendl;
if (srcdn_auth >= 0)
ceph_assert(mdr->more()->witnessed.count(srcdn_auth) == 0);
}
if (mdr->more()->waiting_on_slave.count(who)) {
- dout(10) << " master request " << *mdr << " waiting for slave mds." << who
+ dout(10) << " leader request " << *mdr << " waiting for slave mds." << who
<< " to recover" << dendl;
// retry request when peer recovers
mdr->more()->waiting_on_slave.erase(who);
}
}
- for (map<metareqid_t, umaster>::iterator p = uncommitted_masters.begin();
- p != uncommitted_masters.end();
+ for (map<metareqid_t, uleader>::iterator p = uncommitted_leaders.begin();
+ p != uncommitted_leaders.end();
++p) {
// The failed MDS may have already committed the slave update
if (p->second.slaves.count(who)) {
if (!m->slave_requests.empty()) {
if (mds->is_clientreplay() || mds->is_active() || mds->is_stopping()) {
for (auto p = m->slave_requests.begin(); p != m->slave_requests.end(); ++p) {
- if (uncommitted_masters.count(p->first) && !uncommitted_masters[p->first].safe) {
+ if (uncommitted_leaders.count(p->first) && !uncommitted_leaders[p->first].safe) {
ceph_assert(!p->second.committing);
- pending_masters.insert(p->first);
+ pending_leaders.insert(p->first);
}
}
- if (!pending_masters.empty()) {
+ if (!pending_leaders.empty()) {
dout(10) << " still have pending updates, delay processing slave resolve" << dendl;
delayed_resolve[from] = m;
return;
auto ack = make_message<MMDSResolveAck>();
for (const auto &p : m->slave_requests) {
- if (uncommitted_masters.count(p.first)) { //mds->sessionmap.have_completed_request(p.first)) {
+ if (uncommitted_leaders.count(p.first)) { //mds->sessionmap.have_completed_request(p.first)) {
// COMMIT
if (p.second.committing) {
// already committing, waiting for the OP_COMMITTED slave reply
dout(10) << " ambiguous slave request " << p << " will COMMIT" << dendl;
ack->add_commit(p.first);
}
- uncommitted_masters[p.first].slaves.insert(from); // wait for slave OP_COMMITTED before we log ECommitted
+ uncommitted_leaders[p.first].slaves.insert(from); // wait for slave OP_COMMITTED before we log ECommitted
if (p.second.inode_caps.length() > 0) {
// slave wants to export caps (rename)
dout(10) << "maybe_resolve_finish got all resolves+resolve_acks, done." << dendl;
disambiguate_my_imports();
- finish_committed_masters();
+ finish_committed_leaders();
if (resolve_done) {
ceph_assert(mds->is_resolve());
finish_uncommitted_slave(p.first);
} else {
MDRequestRef mdr = request_get(p.first);
- // information about master imported caps
+ // information about leader imported caps
if (p.second.length() > 0)
mdr->more()->inode_import.share(p.second);
}
}
-void MDCache::add_uncommitted_slave(metareqid_t reqid, LogSegment *ls, mds_rank_t master, MDSlaveUpdate *su)
+void MDCache::add_uncommitted_slave(metareqid_t reqid, LogSegment *ls, mds_rank_t leader, MDSlaveUpdate *su)
{
auto const &ret = uncommitted_slaves.emplace(std::piecewise_construct,
std::forward_as_tuple(reqid),
ceph_assert(ret.second);
ls->uncommitted_slaves.insert(reqid);
uslave &u = ret.first->second;
- u.master = master;
+ u.leader = leader;
u.ls = ls;
u.su = su;
if (su == nullptr) {
delete su;
}
-MDSlaveUpdate* MDCache::get_uncommitted_slave(metareqid_t reqid, mds_rank_t master)
+MDSlaveUpdate* MDCache::get_uncommitted_slave(metareqid_t reqid, mds_rank_t leader)
{
MDSlaveUpdate* su = nullptr;
auto it = uncommitted_slaves.find(reqid);
if (it != uncommitted_slaves.end() &&
- it->second.master == master) {
+ it->second.leader == leader) {
su = it->second.su;
}
return su;
snapid_t follows=CEPH_NOSNAP);
// slaves
- void add_uncommitted_master(metareqid_t reqid, LogSegment *ls, set<mds_rank_t> &slaves, bool safe=false) {
- uncommitted_masters[reqid].ls = ls;
- uncommitted_masters[reqid].slaves = slaves;
- uncommitted_masters[reqid].safe = safe;
+ void add_uncommitted_leader(metareqid_t reqid, LogSegment *ls, set<mds_rank_t> &slaves, bool safe=false) {
+ uncommitted_leaders[reqid].ls = ls;
+ uncommitted_leaders[reqid].slaves = slaves;
+ uncommitted_leaders[reqid].safe = safe;
}
- void wait_for_uncommitted_master(metareqid_t reqid, MDSContext *c) {
- uncommitted_masters[reqid].waiters.push_back(c);
+ void wait_for_uncommitted_leader(metareqid_t reqid, MDSContext *c) {
+ uncommitted_leaders[reqid].waiters.push_back(c);
}
- bool have_uncommitted_master(metareqid_t reqid, mds_rank_t from) {
- auto p = uncommitted_masters.find(reqid);
- return p != uncommitted_masters.end() && p->second.slaves.count(from) > 0;
+ bool have_uncommitted_leader(metareqid_t reqid, mds_rank_t from) {
+ auto p = uncommitted_leaders.find(reqid);
+ return p != uncommitted_leaders.end() && p->second.slaves.count(from) > 0;
}
- void log_master_commit(metareqid_t reqid);
- void logged_master_update(metareqid_t reqid);
- void _logged_master_commit(metareqid_t reqid);
- void committed_master_slave(metareqid_t r, mds_rank_t from);
- void finish_committed_masters();
+ void log_leader_commit(metareqid_t reqid);
+ void logged_leader_update(metareqid_t reqid);
+ void _logged_leader_commit(metareqid_t reqid);
+ void committed_leader_slave(metareqid_t r, mds_rank_t from);
+ void finish_committed_leaders();
void add_uncommitted_slave(metareqid_t reqid, LogSegment*, mds_rank_t, MDSlaveUpdate *su=nullptr);
void wait_for_uncommitted_slave(metareqid_t reqid, MDSContext *c) {
uncommitted_slaves.at(reqid).waiters.push_back(c);
}
void finish_uncommitted_slave(metareqid_t reqid, bool assert_exist=true);
- MDSlaveUpdate* get_uncommitted_slave(metareqid_t reqid, mds_rank_t master);
+ MDSlaveUpdate* get_uncommitted_slave(metareqid_t reqid, mds_rank_t leader);
void _logged_slave_commit(mds_rank_t from, metareqid_t reqid);
void set_recovery_set(set<mds_rank_t>& s);
void recalc_auth_bits(bool replay);
void remove_inode_recursive(CInode *in);
- bool is_ambiguous_slave_update(metareqid_t reqid, mds_rank_t master) {
- auto p = ambiguous_slave_updates.find(master);
+ bool is_ambiguous_slave_update(metareqid_t reqid, mds_rank_t leader) {
+ auto p = ambiguous_slave_updates.find(leader);
return p != ambiguous_slave_updates.end() && p->second.count(reqid);
}
- void add_ambiguous_slave_update(metareqid_t reqid, mds_rank_t master) {
- ambiguous_slave_updates[master].insert(reqid);
+ void add_ambiguous_slave_update(metareqid_t reqid, mds_rank_t leader) {
+ ambiguous_slave_updates[leader].insert(reqid);
}
- void remove_ambiguous_slave_update(metareqid_t reqid, mds_rank_t master) {
- auto p = ambiguous_slave_updates.find(master);
+ void remove_ambiguous_slave_update(metareqid_t reqid, mds_rank_t leader) {
+ auto p = ambiguous_slave_updates.find(leader);
auto q = p->second.find(reqid);
ceph_assert(q != p->second.end());
p->second.erase(q);
ambiguous_slave_updates.erase(p);
}
- void add_rollback(metareqid_t reqid, mds_rank_t master) {
- resolve_need_rollback[reqid] = master;
+ void add_rollback(metareqid_t reqid, mds_rank_t leader) {
+ resolve_need_rollback[reqid] = leader;
}
void finish_rollback(metareqid_t reqid, MDRequestRef& mdr);
void repair_dirfrag_stats(CDir *dir);
void upgrade_inode_snaprealm(CInode *in);
- // my master
+ // my leader
MDSRank *mds;
// -- my cache --
double export_ephemeral_random_max = 0.0;
protected:
- // track master requests whose slaves haven't acknowledged commit
- struct umaster {
- umaster() {}
+ // track leader requests whose slaves haven't acknowledged commit
+ struct uleader {
+ uleader() {}
set<mds_rank_t> slaves;
LogSegment *ls = nullptr;
MDSContext::vec waiters;
struct uslave {
uslave() {}
- mds_rank_t master;
+ mds_rank_t leader;
LogSegment *ls = nullptr;
MDSlaveUpdate *su = nullptr;
MDSContext::vec waiters;
map<CInode*, int> uncommitted_slave_rename_olddir; // slave: preserve the non-auth dir until seeing commit.
map<CInode*, int> uncommitted_slave_unlink; // slave: preserve the unlinked inode until seeing commit.
- map<metareqid_t, umaster> uncommitted_masters; // master: req -> slave set
+ map<metareqid_t, uleader> uncommitted_leaders; // leader: req -> slave set
map<metareqid_t, uslave> uncommitted_slaves; // slave: preserve the slave req until seeing commit.
- set<metareqid_t> pending_masters;
+ set<metareqid_t> pending_leaders;
map<int, set<metareqid_t> > ambiguous_slave_updates;
bool resolves_pending = false;
f->close_section(); // client_info
} else if (is_slave() && _slave_request) { // replies go to an existing mdr
f->dump_string("op_type", "slave_request");
- f->open_object_section("master_info");
- f->dump_stream("master") << _slave_request->get_orig_source();
- f->close_section(); // master_info
+ f->open_object_section("leader_info");
+ f->dump_stream("leader") << _slave_request->get_orig_source();
+ f->close_section(); // leader_info
f->open_object_section("request_info");
f->dump_int("attempt", _slave_request->get_attempt());
return lock == last_locked;
}
- bool is_master() const { return slave_to_mds == MDS_RANK_NONE; }
+ bool is_leader() const { return slave_to_mds == MDS_RANK_NONE; }
bool is_slave() const { return slave_to_mds != MDS_RANK_NONE; }
client_t get_client() const {
Session *session = nullptr;
elist<MDRequestImpl*>::item item_session_request; // if not on list, op is aborted.
- // -- i am a client (master) request
+ // -- i am a client (leader) request
ceph::cref_t<MClientRequest> client_request; // client request (if any)
// tree and depth info of path1 and path2
if (!mds->is_clientreplay() && !mds->is_active() && !mds->is_stopping()) {
metareqid_t r = m->get_reqid();
- if (!mdcache->have_uncommitted_master(r, from)) {
+ if (!mdcache->have_uncommitted_leader(r, from)) {
dout(10) << "handle_slave_request_reply ignoring slave reply from mds."
<< from << " reqid " << r << dendl;
return;
if (m->get_op() == MMDSSlaveRequest::OP_COMMITTED) {
metareqid_t r = m->get_reqid();
- mdcache->committed_master_slave(r, from);
+ mdcache->committed_leader_slave(r, from);
return;
}
switch (m->get_op()) {
case MMDSSlaveRequest::OP_XLOCKACK:
{
- // identify lock, master request
+ // identify lock, leader request
SimpleLock *lock = mds->locker->get_lock(m->get_lock_type(),
m->get_object_info());
mdr->more()->slaves.insert(from);
case MMDSSlaveRequest::OP_WRLOCKACK:
{
- // identify lock, master request
+ // identify lock, leader request
SimpleLock *lock = mds->locker->get_lock(m->get_lock_type(),
m->get_object_info());
mdr->more()->slaves.insert(from);
dout(20) << " noting uncommitted_slaves " << mdr->more()->witnessed << dendl;
le->reqid = mdr->reqid;
le->had_slaves = true;
- mdcache->add_uncommitted_master(mdr->reqid, mdr->ls, mdr->more()->witnessed);
+ mdcache->add_uncommitted_leader(mdr->reqid, mdr->ls, mdr->more()->witnessed);
}
if (inc) {
ceph_assert(g_conf()->mds_kill_link_at != 3);
if (!mdr->more()->witnessed.empty())
- mdcache->logged_master_update(mdr->reqid);
+ mdcache->logged_leader_update(mdr->reqid);
if (inc) {
// link the new dentry
mdr->auth_pin(targeti);
- //ceph_abort(); // test hack: make sure master can handle a slave that fails to prepare...
+ //ceph_abort(); // test hack: make sure leader can handle a slave that fails to prepare...
ceph_assert(g_conf()->mds_kill_link_at != 5);
// journal it
}
};
-void Server::do_link_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr)
+void Server::do_link_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef& mdr)
{
link_rollback rollback;
auto p = rbl.cbegin();
ceph_assert(g_conf()->mds_kill_link_at != 9);
- mdcache->add_rollback(rollback.reqid, master); // need to finish this update before resolve finishes
+ mdcache->add_rollback(rollback.reqid, leader); // need to finish this update before resolve finishes
ceph_assert(mdr || mds->is_resolve());
MutationRef mut(new MutationImpl(nullptr, utime_t(), rollback.reqid));
}
// journal it
- ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_link_rollback", rollback.reqid, master,
+ ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_link_rollback", rollback.reqid, leader,
ESlaveUpdate::OP_ROLLBACK, ESlaveUpdate::LINK);
mdlog->start_entry(le);
le->commit.add_dir_context(parent);
dout(20) << " noting uncommitted_slaves " << mdr->more()->witnessed << dendl;
le->reqid = mdr->reqid;
le->had_slaves = true;
- mdcache->add_uncommitted_master(mdr->reqid, mdr->ls, mdr->more()->witnessed);
+ mdcache->add_uncommitted_leader(mdr->reqid, mdr->ls, mdr->more()->witnessed);
}
if (straydn) {
dout(10) << "_unlink_local_finish " << *dn << dendl;
if (!mdr->more()->witnessed.empty())
- mdcache->logged_master_update(mdr->reqid);
+ mdcache->logged_leader_update(mdr->reqid);
CInode *strayin = NULL;
bool hadrealm = false;
}
};
-void Server::do_rmdir_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr)
+void Server::do_rmdir_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef& mdr)
{
// unlink the other rollback methods, the rmdir rollback is only
// needed to record the subtree changes in the journal for inode
decode(rollback, p);
dout(10) << "do_rmdir_rollback on " << rollback.reqid << dendl;
- mdcache->add_rollback(rollback.reqid, master); // need to finish this update before resolve finishes
+ mdcache->add_rollback(rollback.reqid, leader); // need to finish this update before resolve finishes
ceph_assert(mdr || mds->is_resolve());
CDir *dir = mdcache->get_dirfrag(rollback.src_dir);
}
- ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_rmdir_rollback", rollback.reqid, master,
+ ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_rmdir_rollback", rollback.reqid, leader,
ESlaveUpdate::OP_ROLLBACK, ESlaveUpdate::RMDIR);
mdlog->start_entry(le);
/** handle_client_rename
*
- * rename master is the destdn auth. this is because cached inodes
+ * rename leader is the destdn auth. this is because cached inodes
* must remain connected. thus, any replica of srci, must also
* replicate destdn, and possibly straydn, so that srci (and
* destdn->inode) remain connected during the rename.
*
- * to do this, we freeze srci, then master (destdn auth) verifies that
+ * to do this, we freeze srci, then leader (destdn auth) verifies that
* all other nodes have also replciated destdn and straydn. note that
* destdn replicas need not also replicate srci. this only works when
- * destdn is master.
+ * destdn is leader.
*
* This function takes responsibility for the passed mdr.
*/
le->reqid = mdr->reqid;
le->had_slaves = true;
- mdcache->add_uncommitted_master(mdr->reqid, mdr->ls, mdr->more()->witnessed);
+ mdcache->add_uncommitted_leader(mdr->reqid, mdr->ls, mdr->more()->witnessed);
// no need to send frozen auth pin to recovring auth MDS of srci
mdr->more()->is_remote_frozen_authpin = false;
}
dout(10) << "_rename_finish " << *mdr << dendl;
if (!mdr->more()->witnessed.empty())
- mdcache->logged_master_update(mdr->reqid);
+ mdcache->logged_leader_update(mdr->reqid);
// apply
_rename_apply(mdr, srcdn, destdn, straydn);
mdr->set_ambiguous_auth(srcdnl->get_inode());
// just mark the source inode as ambiguous auth if more than two MDS are involved.
- // the master will send another OP_RENAMEPREP slave request later.
+ // the leader will send another OP_RENAMEPREP slave request later.
if (mdr->slave_request->witnesses.size() > 1) {
dout(10) << " set srci ambiguous auth; providing srcdn replica list" << dendl;
reply_witness = true;
// abort
// rollback_bl may be empty if we froze the inode but had to provide an expanded
- // witness list from the master, and they failed before we tried prep again.
+ // witness list from the leader, and they failed before we tried prep again.
if (mdr->more()->rollback_bl.length()) {
if (mdr->more()->is_inode_exporter) {
dout(10) << " reversing inode export of " << *in << dendl;
} else
do_rename_rollback(mdr->more()->rollback_bl, mdr->slave_to_mds, mdr, true);
} else {
- dout(10) << " rollback_bl empty, not rollback back rename (master failed after getting extra witnesses?)" << dendl;
+ dout(10) << " rollback_bl empty, not rollback back rename (leader failed after getting extra witnesses?)" << dendl;
// singleauth
if (mdr->more()->is_ambiguous_auth) {
if (srcdn->is_auth())
}
};
-void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr,
+void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef& mdr,
bool finish_mdr)
{
rename_rollback rollback;
dout(10) << "do_rename_rollback on " << rollback.reqid << dendl;
// need to finish this update before sending resolve to claim the subtree
- mdcache->add_rollback(rollback.reqid, master);
+ mdcache->add_rollback(rollback.reqid, leader);
MutationRef mut(new MutationImpl(nullptr, utime_t(), rollback.reqid));
mut->ls = mds->mdlog->get_current_segment();
dout(0) << " desti back to " << *target << dendl;
// journal it
- ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_rename_rollback", rollback.reqid, master,
+ ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_rename_rollback", rollback.reqid, leader,
ESlaveUpdate::OP_ROLLBACK, ESlaveUpdate::RENAME);
mdlog->start_entry(le);
void _commit_slave_link(MDRequestRef& mdr, int r, CInode *targeti);
void _committed_slave(MDRequestRef& mdr); // use for rename, too
void handle_slave_link_prep_ack(MDRequestRef& mdr, const cref_t<MMDSSlaveRequest> &m);
- void do_link_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr);
+ void do_link_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef& mdr);
void _link_rollback_finish(MutationRef& mut, MDRequestRef& mdr,
map<client_t,ref_t<MClientSnap>>& split);
void _logged_slave_rmdir(MDRequestRef& mdr, CDentry *srcdn, CDentry *straydn);
void _commit_slave_rmdir(MDRequestRef& mdr, int r, CDentry *straydn);
void handle_slave_rmdir_prep_ack(MDRequestRef& mdr, const cref_t<MMDSSlaveRequest> &ack);
- void do_rmdir_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr);
+ void do_rmdir_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef& mdr);
void _rmdir_rollback_finish(MDRequestRef& mdr, metareqid_t reqid, CDentry *dn, CDentry *straydn);
// rename
void _slave_rename_sessions_flushed(MDRequestRef& mdr);
void _logged_slave_rename(MDRequestRef& mdr, CDentry *srcdn, CDentry *destdn, CDentry *straydn);
void _commit_slave_rename(MDRequestRef& mdr, int r, CDentry *srcdn, CDentry *destdn, CDentry *straydn);
- void do_rename_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& mdr, bool finish_mdr=false);
+ void do_rename_rollback(bufferlist &rbl, mds_rank_t leader, MDRequestRef& mdr, bool finish_mdr=false);
void _rename_rollback_finish(MutationRef& mut, MDRequestRef& mdr, CDentry *srcdn, version_t srcdnpv,
CDentry *destdn, CDentry *staydn, map<client_t,ref_t<MClientSnap>> splits[2],
bool finish_mdr);
void put_xlock() {
ceph_assert(state == LOCK_XLOCK || state == LOCK_XLOCKDONE ||
state == LOCK_XLOCKSNAP || state == LOCK_LOCK_XLOCK ||
- state == LOCK_LOCK || /* if we are a master of a slave */
+ state == LOCK_LOCK || /* if we are a leader of a slave */
is_locallock());
--more()->num_xlock;
parent->put(MDSCacheObject::PIN_LOCK);
/*
* rollback records, for remote/slave updates, which may need to be manually
- * rolled back during journal replay. (or while active if master fails, but in
+ * rolled back during journal replay. (or while active if leader fails, but in
* that case these records aren't needed.)
*/
struct link_rollback {
bufferlist rollback;
string type;
metareqid_t reqid;
- mds_rank_t master;
+ mds_rank_t leader;
__u8 op; // prepare, commit, abort
__u8 origop; // link | rename
- ESlaveUpdate() : LogEvent(EVENT_SLAVEUPDATE), master(0), op(0), origop(0) { }
- ESlaveUpdate(MDLog *mdlog, std::string_view s, metareqid_t ri, int mastermds, int o, int oo) :
+ ESlaveUpdate() : LogEvent(EVENT_SLAVEUPDATE), leader(0), op(0), origop(0) { }
+ ESlaveUpdate(MDLog *mdlog, std::string_view s, metareqid_t ri, int leadermds, int o, int oo) :
LogEvent(EVENT_SLAVEUPDATE),
type(s),
reqid(ri),
- master(mastermds),
+ leader(leadermds),
op(o), origop(oo) { }
void print(ostream& out) const override {
if (origop == LINK) out << " link";
if (origop == RENAME) out << " rename";
out << " " << reqid;
- out << " for mds." << master;
+ out << " for mds." << leader;
out << commit;
}
}
}
- // master ops with possibly uncommitted slaves
- for (set<metareqid_t>::iterator p = uncommitted_masters.begin();
- p != uncommitted_masters.end();
+ // leader ops with possibly uncommitted slaves
+ for (set<metareqid_t>::iterator p = uncommitted_leaders.begin();
+ p != uncommitted_leaders.end();
++p) {
dout(10) << "try_to_expire waiting for slaves to ack commit on " << *p << dendl;
- mds->mdcache->wait_for_uncommitted_master(*p, gather_bld.new_sub());
+ mds->mdcache->wait_for_uncommitted_leader(*p, gather_bld.new_sub());
}
// slave ops that haven't been committed
for (set<metareqid_t>::iterator p = uncommitted_slaves.begin();
p != uncommitted_slaves.end();
++p) {
- dout(10) << "try_to_expire waiting for master to ack OP_FINISH on " << *p << dendl;
+ dout(10) << "try_to_expire waiting for leader to ack OP_FINISH on " << *p << dendl;
mds->mdcache->wait_for_uncommitted_slave(*p, gather_bld.new_sub());
}
segment->sessionmapv = cmapv;
if (had_slaves)
- segment->uncommitted_masters.insert(reqid);
+ segment->uncommitted_leaders.insert(reqid);
}
void EUpdate::replay(MDSRank *mds)
if (had_slaves) {
dout(10) << "EUpdate.replay " << reqid << " had slaves, expecting a matching ECommitted" << dendl;
- segment->uncommitted_masters.insert(reqid);
+ segment->uncommitted_leaders.insert(reqid);
set<mds_rank_t> slaves;
- mds->mdcache->add_uncommitted_master(reqid, segment, slaves, true);
+ mds->mdcache->add_uncommitted_leader(reqid, segment, slaves, true);
}
if (client_map.length()) {
void ECommitted::replay(MDSRank *mds)
{
- if (mds->mdcache->uncommitted_masters.count(reqid)) {
+ if (mds->mdcache->uncommitted_leaders.count(reqid)) {
dout(10) << "ECommitted.replay " << reqid << dendl;
- mds->mdcache->uncommitted_masters[reqid].ls->uncommitted_masters.erase(reqid);
- mds->mdcache->uncommitted_masters.erase(reqid);
+ mds->mdcache->uncommitted_leaders[reqid].ls->uncommitted_leaders.erase(reqid);
+ mds->mdcache->uncommitted_leaders.erase(reqid);
} else {
dout(10) << "ECommitted.replay " << reqid << " -- didn't see original op" << dendl;
}
encode(stamp, bl);
encode(type, bl);
encode(reqid, bl);
- encode(master, bl);
+ encode(leader, bl);
encode(op, bl);
encode(origop, bl);
encode(commit, bl, features);
decode(stamp, bl);
decode(type, bl);
decode(reqid, bl);
- decode(master, bl);
+ decode(leader, bl);
decode(op, bl);
decode(origop, bl);
decode(commit, bl);
f->dump_int("rollback length", rollback.length());
f->dump_string("type", type);
f->dump_stream("metareqid") << reqid;
- f->dump_int("master", master);
+ f->dump_int("leader", leader);
f->dump_int("op", op);
f->dump_int("original op", origop);
}
auto&& segment = get_segment();
switch (op) {
case ESlaveUpdate::OP_PREPARE:
- dout(10) << "ESlaveUpdate.replay prepare " << reqid << " for mds." << master
+ dout(10) << "ESlaveUpdate.replay prepare " << reqid << " for mds." << leader
<< ": applying commit, saving rollback info" << dendl;
su = new MDSlaveUpdate(origop, rollback);
commit.replay(mds, segment, su);
- mds->mdcache->add_uncommitted_slave(reqid, segment, master, su);
+ mds->mdcache->add_uncommitted_slave(reqid, segment, leader, su);
break;
case ESlaveUpdate::OP_COMMIT:
- dout(10) << "ESlaveUpdate.replay commit " << reqid << " for mds." << master << dendl;
+ dout(10) << "ESlaveUpdate.replay commit " << reqid << " for mds." << leader << dendl;
mds->mdcache->finish_uncommitted_slave(reqid, false);
break;
case ESlaveUpdate::OP_ROLLBACK:
- dout(10) << "ESlaveUpdate.replay abort " << reqid << " for mds." << master
+ dout(10) << "ESlaveUpdate.replay abort " << reqid << " for mds." << leader
<< ": applying rollback commit blob" << dendl;
commit.replay(mds, segment);
mds->mdcache->finish_uncommitted_slave(reqid, false);