ceph_fs.h: add separate owner_{u,g}id fields

author Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>

Thu, 3 Aug 2023 12:15:28 +0000 (14:15 +0200)

committer Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>

Mon, 7 Aug 2023 11:36:25 +0000 (13:36 +0200)
author Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Thu, 3 Aug 2023 12:15:28 +0000 (14:15 +0200)
committer Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Mon, 7 Aug 2023 11:36:25 +0000 (13:36 +0200)
diff --git a/src/client/Client.cc b/src/client/Client.cc

index 66e964658ef01eea1741aa06e4e87d419ac65e73..017c1d2e4bde0d519e338df651cbb90584997453 100644 (file)
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -2592,7 +2592,7 @@ ref_t<MClientRequest> Client::build_client_request(MetaRequest *request, mds_ran
      }
    }
  
-  auto req = make_message<MClientRequest>(request->get_op(), old_version);
+  auto req = make_message<MClientRequest>(request->get_op(), session->mds_features);
    req->set_tid(request->tid);
    req->set_stamp(request->op_stamp);
    memcpy(&req->head, &request->head, sizeof(ceph_mds_request_head));
@@ -14443,6 +14443,8 @@ int Client::_mknod(Inode *dir, const char *name, mode_t mode, dev_t rdev,
  
    MetaRequest *req = new MetaRequest(CEPH_MDS_OP_MKNOD);
  
+  req->set_inode_owner_uid_gid(perms.uid(), perms.gid());
+
    filepath path;
    dir->make_nosnap_relative_path(path);
    path.push_dentry(name);
@@ -14587,6 +14589,8 @@ int Client::_create(Inode *dir, const char *name, int flags, mode_t mode,
  
    MetaRequest *req = new MetaRequest(CEPH_MDS_OP_CREATE);
  
+  req->set_inode_owner_uid_gid(perms.uid(), perms.gid());
+
    filepath path;
    dir->make_nosnap_relative_path(path);
    path.push_dentry(name);
@@ -14664,6 +14668,9 @@ int Client::_mkdir(Inode *dir, const char *name, mode_t mode, const UserPerm& pe
    MetaRequest *req = new MetaRequest(is_snap_op ?
                                      CEPH_MDS_OP_MKSNAP : CEPH_MDS_OP_MKDIR);
  
+  if (!is_snap_op)
+    req->set_inode_owner_uid_gid(perm.uid(), perm.gid());
+
    filepath path;
    dir->make_nosnap_relative_path(path);
    path.push_dentry(name);
@@ -14802,6 +14809,8 @@ int Client::_symlink(Inode *dir, const char *name, const char *target,
  
    MetaRequest *req = new MetaRequest(CEPH_MDS_OP_SYMLINK);
  
+  req->set_inode_owner_uid_gid(perms.uid(), perms.gid());
+
    filepath path;
    dir->make_nosnap_relative_path(path);
    path.push_dentry(name);
diff --git a/src/client/MetaRequest.cc b/src/client/MetaRequest.cc

index 3994424e79360800a24badf79f322cb88d691d22..6d709db5831d20319dc03dfd79937f6f166ed320 100644 (file)
--- a/src/client/MetaRequest.cc
+++ b/src/client/MetaRequest.cc
@@ -51,6 +51,9 @@ void MetaRequest::dump(Formatter *f) const
    f->dump_unsigned("num_releases", head.num_releases);
  
    f->dump_int("abort_rc", abort_rc);
+
+  f->dump_unsigned("owner_uid", head.owner_uid);
+  f->dump_unsigned("owner_gid", head.owner_gid);
  }
  
  MetaRequest::~MetaRequest()
diff --git a/src/client/MetaRequest.h b/src/client/MetaRequest.h

index e8ff0aa4df2eb5fe6e993fe1e1876082dadfd2d5..240c0cd02a39578abdcee6dfc8c6619041d9550f 100644 (file)
--- a/src/client/MetaRequest.h
+++ b/src/client/MetaRequest.h
@@ -80,6 +80,8 @@ public:
      unsafe_target_item(this) {
      memset(&head, 0, sizeof(head));
      head.op = op;
+    head.owner_uid = -1;
+    head.owner_gid = -1;
    }
    ~MetaRequest();
  
@@ -153,6 +155,13 @@ public:
      return v == 0;
    }
  
+  void set_inode_owner_uid_gid(unsigned u, unsigned g) {
+    /* it makes sense to set owner_{u,g}id only for OPs which create inodes */
+    ceph_assert(IS_CEPH_MDS_OP_NEWINODE(head.op));
+    head.owner_uid = u;
+    head.owner_gid = g;
+  }
+
    // normal fields
    void set_tid(ceph_tid_t t) { tid = t; }
    void set_oldest_client_tid(ceph_tid_t t) { head.oldest_client_tid = t; }
diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h

index 42e5e53b438a39c62e2581d0b8387adbe8bd2569..28440c820dcfb8f17d0a04fcc301401025ec80fd 100644 (file)
--- a/src/include/ceph_fs.h
+++ b/src/include/ceph_fs.h
@@ -430,6 +430,11 @@ enum {
         CEPH_MDS_OP_RDLOCK_FRAGSSTATS = 0x01507
  };
  
+#define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE     || \
+                                    op == CEPH_MDS_OP_MKNOD      || \
+                                    op == CEPH_MDS_OP_MKDIR      || \
+                                    op == CEPH_MDS_OP_SYMLINK)
+
  extern const char *ceph_mds_op_name(int op);
  
  // setattr mask is an int
@@ -633,7 +638,7 @@ union ceph_mds_request_args {
         } __attribute__ ((packed)) snapdiff;
  } __attribute__ ((packed));
  
-#define CEPH_MDS_REQUEST_HEAD_VERSION  2
+#define CEPH_MDS_REQUEST_HEAD_VERSION  3
  
  /*
   * Note that any change to this structure must ensure that it is compatible
@@ -654,9 +659,12 @@ struct ceph_mds_request_head {
  
         __le32 ext_num_retry;          /* new count retry attempts */
         __le32 ext_num_fwd;            /* new count fwd attempts */
+
+       __le32 struct_len;             /* to store size of struct ceph_mds_request_head */
+       __le32 owner_uid, owner_gid;   /* used for OPs which create inodes */
  } __attribute__ ((packed));
  
-void inline encode(const struct ceph_mds_request_head& h, ceph::buffer::list& bl, bool old_version) {
+void inline encode(const struct ceph_mds_request_head& h, ceph::buffer::list& bl) {
    using ceph::encode;
    encode(h.version, bl);
    encode(h.oldest_client_tid, bl);
@@ -676,14 +684,30 @@ void inline encode(const struct ceph_mds_request_head& h, ceph::buffer::list& bl
    encode(h.ino, bl);
    bl.append((char*)&h.args, sizeof(h.args));
  
-  if (!old_version) {
+  if (h.version >= 2) {
      encode(h.ext_num_retry, bl);
      encode(h.ext_num_fwd, bl);
    }
+
+  if (h.version >= 3) {
+    __u32 struct_len = sizeof(struct ceph_mds_request_head);
+    encode(struct_len, bl);
+    encode(h.owner_uid, bl);
+    encode(h.owner_gid, bl);
+
+    /*
+     * Please, add new fields handling here.
+     * You don't need to check h.version as we do it
+     * in decode(), because decode can properly skip
+     * all unsupported fields if h.version >= 3.
+     */
+  }
  }
  
  void inline decode(struct ceph_mds_request_head& h, ceph::buffer::list::const_iterator& bl) {
    using ceph::decode;
+  unsigned struct_end = bl.get_off();
+
    decode(h.version, bl);
    decode(h.oldest_client_tid, bl);
    decode(h.mdsmap_epoch, bl);
@@ -704,6 +728,42 @@ void inline decode(struct ceph_mds_request_head& h, ceph::buffer::list::const_it
      h.ext_num_retry = h.num_retry;
      h.ext_num_fwd = h.num_fwd;
    }
+
+  if (h.version >= 3) {
+    decode(h.struct_len, bl);
+    struct_end += h.struct_len;
+
+    decode(h.owner_uid, bl);
+    decode(h.owner_gid, bl);
+  } else {
+    /*
+     * client is old: let's take caller_{u,g}id as owner_{u,g}id
+     * this is how it worked before adding of owner_{u,g}id fields.
+     */
+    h.owner_uid = h.caller_uid;
+    h.owner_gid = h.caller_gid;
+  }
+
+  /* add new fields handling here */
+
+  /*
+   * From version 3 we have struct_len field.
+   * It allows us to properly handle a case
+   * when client send struct ceph_mds_request_head
+   * bigger in size than MDS supports. In this
+   * case we just want to skip all remaining bytes
+   * at the end.
+   *
+   * See also DECODE_FINISH macro. Unfortunately,
+   * we can't start using it right now as it will be
+   * an incompatible protocol change.
+   */
+  if (h.version >= 3) {
+    if (bl.get_off() > struct_end)
+      throw ::ceph::buffer::malformed_input(DECODE_ERR_PAST(__PRETTY_FUNCTION__));
+    if (bl.get_off() < struct_end)
+      bl += struct_end - bl.get_off();
+  }
  }
  
  /* cap/lease release record */
diff --git a/src/mds/Server.cc b/src/mds/Server.cc

index 91dd0a3bd7d623755b1f479a4ed1d1009b3fc4df..b7be85571f73af09d5051066e931a2ff1315f3d4 100644 (file)
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -3506,10 +3506,12 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino
        _inode->mode |= S_ISGID;
      }
    } else {
-    _inode->gid = mdr->client_request->get_caller_gid();
+    _inode->gid = mdr->client_request->get_owner_gid();
+    ceph_assert(_inode->gid != (unsigned)-1);
    }
  
-  _inode->uid = mdr->client_request->get_caller_uid();
+  _inode->uid = mdr->client_request->get_owner_uid();
+  ceph_assert(_inode->uid != (unsigned)-1);
  
    _inode->btime = _inode->ctime = _inode->mtime = _inode->atime =
      mdr->get_op_stamp();
diff --git a/src/mds/cephfs_features.cc b/src/mds/cephfs_features.cc

index a19ff80ac727e61598f983d6cf74f97f23bb127c..4a864076b9de3e132d0e8a49730efa1ef3f31843 100644 (file)
--- a/src/mds/cephfs_features.cc
+++ b/src/mds/cephfs_features.cc
@@ -29,6 +29,7 @@ static const std::array feature_names
    "op_getvxattr",
    "32bits_retry_fwd",
    "new_snaprealm_info",
+  "has_owner_uidgid",
  };
  static_assert(feature_names.size() == CEPHFS_FEATURE_MAX + 1);
  
diff --git a/src/mds/cephfs_features.h b/src/mds/cephfs_features.h

index 9c16388ecd2880b2e97eae0440fdb3bbc8f680e7..7d215e2a3e5aeaaf48c24f265e4c77de9aa3b474 100644 (file)
--- a/src/mds/cephfs_features.h
+++ b/src/mds/cephfs_features.h
@@ -47,7 +47,8 @@ namespace ceph {
  #define CEPHFS_FEATURE_OP_GETVXATTR         17
  #define CEPHFS_FEATURE_32BITS_RETRY_FWD     18
  #define CEPHFS_FEATURE_NEW_SNAPREALM_INFO   19
-#define CEPHFS_FEATURE_MAX                  19
+#define CEPHFS_FEATURE_HAS_OWNER_UIDGID     20
+#define CEPHFS_FEATURE_MAX                  20
  
  #define CEPHFS_FEATURES_ALL {          \
    0, 1, 2, 3, 4,                       \
@@ -67,7 +68,8 @@ namespace ceph {
    CEPHFS_FEATURE_NOTIFY_SESSION_STATE,  \
    CEPHFS_FEATURE_OP_GETVXATTR,          \
    CEPHFS_FEATURE_32BITS_RETRY_FWD,      \
-  CEPHFS_FEATURE_NEW_SNAPREALM_INFO     \
+  CEPHFS_FEATURE_NEW_SNAPREALM_INFO,    \
+  CEPHFS_FEATURE_HAS_OWNER_UIDGID,      \
  }
  
  #define CEPHFS_METRIC_FEATURES_ALL {           \
diff --git a/src/messages/MClientRequest.h b/src/messages/MClientRequest.h

index d8cec31531a9640e567361eac571307da9f62fa8..c62e183a756336bfdd82961fb0be04d30b978475 100644 (file)
--- a/src/messages/MClientRequest.h
+++ b/src/messages/MClientRequest.h
@@ -38,6 +38,7 @@
  #include "include/filepath.h"
  #include "mds/mdstypes.h"
  #include "include/ceph_features.h"
+#include "mds/cephfs_features.h"
  #include "messages/MMDSOp.h"
  
  #include <sys/types.h>
@@ -73,7 +74,7 @@ private:
  public:
    mutable struct ceph_mds_request_head head; /* XXX HACK! */
    utime_t stamp;
-  bool peer_old_version = false;
+  feature_bitset_t mds_features;
  
    struct Release {
      mutable ceph_mds_request_release item;
@@ -113,12 +114,16 @@ protected:
    MClientRequest()
      : MMDSOp(CEPH_MSG_CLIENT_REQUEST, HEAD_VERSION, COMPAT_VERSION) {
      memset(&head, 0, sizeof(head));
+    head.owner_uid = -1;
+    head.owner_gid = -1;
    }
-  MClientRequest(int op, bool over=true)
+  MClientRequest(int op, feature_bitset_t features = 0)
      : MMDSOp(CEPH_MSG_CLIENT_REQUEST, HEAD_VERSION, COMPAT_VERSION) {
      memset(&head, 0, sizeof(head));
      head.op = op;
-    peer_old_version = over;
+    mds_features = features;
+    head.owner_uid = -1;
+    head.owner_gid = -1;
    }
    ~MClientRequest() final {}
  
@@ -201,6 +206,8 @@ public:
    int get_op() const { return head.op; }
    unsigned get_caller_uid() const { return head.caller_uid; }
    unsigned get_caller_gid() const { return head.caller_gid; }
+  unsigned get_owner_uid() const { return head.owner_uid; }
+  unsigned get_owner_gid() const { return head.owner_gid; }
    const std::vector<uint64_t>& get_caller_gid_list() const { return gid_list; }
  
    const std::string& get_path() const { return path.get_path(); }
@@ -262,14 +269,16 @@ public:
       * client will just copy the 'head' memory and isn't
       * that smart to skip them.
       */
-    if (peer_old_version) {
+    if (!mds_features.test(CEPHFS_FEATURE_32BITS_RETRY_FWD)) {
        head.version = 1;
+    } else if (!mds_features.test(CEPHFS_FEATURE_HAS_OWNER_UIDGID)) {
+      head.version = 2;
      } else {
        head.version = CEPH_MDS_REQUEST_HEAD_VERSION;
      }
  
      if (features & CEPH_FEATURE_FS_BTIME) {
-      encode(head, payload, peer_old_version);
+      encode(head, payload);
      } else {
        struct ceph_mds_request_head_legacy old_mds_head;
  
@@ -292,6 +301,10 @@ public:
      out << "client_request(" << get_orig_source()
         << ":" << get_tid()
         << " " << ceph_mds_op_name(get_op());
+    if (IS_CEPH_MDS_OP_NEWINODE(head.op)) {
+      out << " owner_uid=" << head.owner_uid
+         << ", owner_gid=" << head.owner_gid;
+    }
      if (head.op == CEPH_MDS_OP_GETATTR)
        out << " " << ccap_string(head.args.getattr.mask);
      if (head.op == CEPH_MDS_OP_SETATTR) {
author	Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
	Thu, 3 Aug 2023 12:15:28 +0000 (14:15 +0200)
committer	Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
	Mon, 7 Aug 2023 11:36:25 +0000 (13:36 +0200)
src/client/Client.cc		patch \| blob \| history
src/client/MetaRequest.cc		patch \| blob \| history
src/client/MetaRequest.h		patch \| blob \| history
src/include/ceph_fs.h		patch \| blob \| history
src/mds/Server.cc		patch \| blob \| history
src/mds/cephfs_features.cc		patch \| blob \| history
src/mds/cephfs_features.h		patch \| blob \| history
src/messages/MClientRequest.h		patch \| blob \| history