mds_rank_t mds = MDS_RANK_NONE;
__u32 hash = 0;
bool is_hash = false;
+ int issued = 0;
Inode *in = NULL;
Dentry *de = NULL;
ldout(cct, 20) << __func__ << " " << *in << " is_hash=" << is_hash
<< " hash=" << hash << dendl;
+ if (req->get_op() == CEPH_MDS_OP_GETATTR)
+ issued = req->inode()->caps_issued();
+
if (is_hash && S_ISDIR(in->mode) && (!in->fragmap.empty() || !in->frag_repmap.empty())) {
frag_t fg = in->dirfragtree[hash];
- if (!req->auth_is_best()) {
+ if (!req->auth_is_best(issued)) {
auto repmapit = in->frag_repmap.find(fg);
if (repmapit != in->frag_repmap.end()) {
auto& repmap = repmapit->second;
}
}
- if (in->auth_cap && req->auth_is_best()) {
+ if (in->auth_cap && req->auth_is_best(issued)) {
mds = in->auth_cap->session->mds_num;
} else if (!in->caps.empty()) {
mds = in->caps.begin()->second.session->mds_num;
return false;
return true;
}
- bool auth_is_best() {
- if ((head.op & CEPH_MDS_OP_WRITE) || head.op == CEPH_MDS_OP_OPEN ||
- (head.op == CEPH_MDS_OP_GETATTR && (head.args.getattr.mask & CEPH_STAT_RSTAT)) ||
- head.op == CEPH_MDS_OP_READDIR || send_to_auth)
+ bool auth_is_best(int issued) {
+ if (send_to_auth)
return true;
- return false;
+
+ /* Any write op ? */
+ if (head.op & CEPH_MDS_OP_WRITE)
+ return true;
+
+ switch (head.op) {
+ case CEPH_MDS_OP_OPEN:
+ case CEPH_MDS_OP_READDIR:
+ return true;
+ case CEPH_MDS_OP_GETATTR:
+ /*
+ * If any 'x' caps is issued we can just choose the auth MDS
+ * instead of the random replica MDSes. Because only when the
+ * Locker is in LOCK_EXEC state will the loner client could
+ * get the 'x' caps. And if we send the getattr requests to
+ * any replica MDS it must auth pin and tries to rdlock from
+ * the auth MDS, and then the auth MDS need to do the Locker
+ * state transition to LOCK_SYNC. And after that the lock state
+ * will change back.
+ *
+ * This cost much when doing the Locker state transition and
+ * usually will need to revoke caps from clients.
+ *
+ * And for the 'Xs' caps for getxattr we will also choose the
+ * auth MDS, because the MDS side code is buggy due to setxattr
+ * won't notify the replica MDSes when the values changed and
+ * the replica MDS will return the old values. Though we will
+ * fix it in MDS code, but this still makes sense for old ceph.
+ */
+ if (((head.args.getattr.mask & CEPH_CAP_ANY_SHARED) &&
+ (issued & CEPH_CAP_ANY_EXCL)) ||
+ (head.args.getattr.mask & (CEPH_STAT_RSTAT | CEPH_STAT_CAP_XATTR)))
+ return true;
+ default:
+ return false;
+ }
}
void dump(Formatter *f) const;