From: Xiubo Li Date: Fri, 27 May 2022 05:11:44 +0000 (+0800) Subject: client: choose auth MDS for getxattr with the Xs caps X-Git-Tag: v17.2.6~130^2~31^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=d3333009a1fe91ccd7b092a3cbe676eafe0b5106;p=ceph.git client: choose auth MDS for getxattr with the Xs caps If any 'x' caps is issued we can just choose the auth MDS instead of the random replica MDSes. Because only when the Locker is in LOCK_EXEC state will the loner client could get the 'x' caps. And if we send the getattr requests to any replica MDS it must auth pin and tries to rdlock from the auth MDS, and then the auth MDS need to do the Locker state transition to LOCK_SYNC. And after that the lock state will change back. This cost much when doing the Locker state transition and usually will need to revoke caps from clients. And for the 'Xs' caps for getxattr we will also choose the auth MDS, because the MDS side code is buggy due to setxattr won't notify the increased xattr_version to replicated MDSes when the values changed and the replica MDS will return the old xattr_version value. The client will just drop the xattr values since it sees the xattr_version doesn't change. Fixes: https://tracker.ceph.com/issues/55778 Signed-off-by: Xiubo Li (cherry picked from commit 241608df24bd1d53f7cafada53f14ccf74c3e946) --- diff --git a/src/client/Client.cc b/src/client/Client.cc index 33253ed10c0d1..e5f8801fca2f2 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -1523,6 +1523,7 @@ mds_rank_t Client::choose_target_mds(MetaRequest *req, Inode** phash_diri) mds_rank_t mds = MDS_RANK_NONE; __u32 hash = 0; bool is_hash = false; + int issued = 0; Inode *in = NULL; Dentry *de = NULL; @@ -1583,9 +1584,12 @@ mds_rank_t Client::choose_target_mds(MetaRequest *req, Inode** phash_diri) ldout(cct, 20) << __func__ << " " << *in << " is_hash=" << is_hash << " hash=" << hash << dendl; + if (req->get_op() == CEPH_MDS_OP_GETATTR) + issued = req->inode()->caps_issued(); + if (is_hash && S_ISDIR(in->mode) && (!in->fragmap.empty() || !in->frag_repmap.empty())) { frag_t fg = in->dirfragtree[hash]; - if (!req->auth_is_best()) { + if (!req->auth_is_best(issued)) { auto repmapit = in->frag_repmap.find(fg); if (repmapit != in->frag_repmap.end()) { auto& repmap = repmapit->second; @@ -1606,7 +1610,7 @@ mds_rank_t Client::choose_target_mds(MetaRequest *req, Inode** phash_diri) } } - if (in->auth_cap && req->auth_is_best()) { + if (in->auth_cap && req->auth_is_best(issued)) { mds = in->auth_cap->session->mds_num; } else if (!in->caps.empty()) { mds = in->caps.begin()->second.session->mds_num; diff --git a/src/client/MetaRequest.h b/src/client/MetaRequest.h index 630f5ee15bc87..56ba32253d9fe 100644 --- a/src/client/MetaRequest.h +++ b/src/client/MetaRequest.h @@ -186,12 +186,45 @@ public: return false; return true; } - bool auth_is_best() { - if ((head.op & CEPH_MDS_OP_WRITE) || head.op == CEPH_MDS_OP_OPEN || - (head.op == CEPH_MDS_OP_GETATTR && (head.args.getattr.mask & CEPH_STAT_RSTAT)) || - head.op == CEPH_MDS_OP_READDIR || send_to_auth) + bool auth_is_best(int issued) { + if (send_to_auth) return true; - return false; + + /* Any write op ? */ + if (head.op & CEPH_MDS_OP_WRITE) + return true; + + switch (head.op) { + case CEPH_MDS_OP_OPEN: + case CEPH_MDS_OP_READDIR: + return true; + case CEPH_MDS_OP_GETATTR: + /* + * If any 'x' caps is issued we can just choose the auth MDS + * instead of the random replica MDSes. Because only when the + * Locker is in LOCK_EXEC state will the loner client could + * get the 'x' caps. And if we send the getattr requests to + * any replica MDS it must auth pin and tries to rdlock from + * the auth MDS, and then the auth MDS need to do the Locker + * state transition to LOCK_SYNC. And after that the lock state + * will change back. + * + * This cost much when doing the Locker state transition and + * usually will need to revoke caps from clients. + * + * And for the 'Xs' caps for getxattr we will also choose the + * auth MDS, because the MDS side code is buggy due to setxattr + * won't notify the replica MDSes when the values changed and + * the replica MDS will return the old values. Though we will + * fix it in MDS code, but this still makes sense for old ceph. + */ + if (((head.args.getattr.mask & CEPH_CAP_ANY_SHARED) && + (issued & CEPH_CAP_ANY_EXCL)) || + (head.args.getattr.mask & (CEPH_STAT_RSTAT | CEPH_STAT_CAP_XATTR))) + return true; + default: + return false; + } } void dump(Formatter *f) const;