ceph_fs.h: add separate owner_{u,g}id fields

author Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>

Thu, 3 Aug 2023 12:15:28 +0000 (14:15 +0200)

committer Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>

Thu, 24 Aug 2023 12:54:11 +0000 (14:54 +0200)
author Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Thu, 3 Aug 2023 12:15:28 +0000 (14:15 +0200)
committer Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Thu, 24 Aug 2023 12:54:11 +0000 (14:54 +0200)
diff --git a/src/client/Client.cc b/src/client/Client.cc

index 6d5958a40a0b9c4e0b735c02d42a491f57bfcd41..4459c5cab71eb1c89b9754d90221b50aa551da2b 100644 (file)
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -2553,7 +2553,7 @@ ref_t<MClientRequest> Client::build_client_request(MetaRequest *request, mds_ran
      }
    }
  
-  auto req = make_message<MClientRequest>(request->get_op(), old_version);
+  auto req = make_message<MClientRequest>(request->get_op(), session->mds_features);
    req->set_tid(request->tid);
    req->set_stamp(request->op_stamp);
    memcpy(&req->head, &request->head, sizeof(ceph_mds_request_head));
@@ -13439,6 +13439,8 @@ int Client::_mknod(Inode *dir, const char *name, mode_t mode, dev_t rdev,
  
    MetaRequest *req = new MetaRequest(CEPH_MDS_OP_MKNOD);
  
+  req->set_inode_owner_uid_gid(perms.uid(), perms.gid());
+
    filepath path;
    dir->make_nosnap_relative_path(path);
    path.push_dentry(name);
@@ -13588,6 +13590,8 @@ int Client::_create(Inode *dir, const char *name, int flags, mode_t mode,
  
    MetaRequest *req = new MetaRequest(CEPH_MDS_OP_CREATE);
  
+  req->set_inode_owner_uid_gid(perms.uid(), perms.gid());
+
    filepath path;
    dir->make_nosnap_relative_path(path);
    path.push_dentry(name);
@@ -13670,6 +13674,9 @@ int Client::_mkdir(Inode *dir, const char *name, mode_t mode, const UserPerm& pe
    MetaRequest *req = new MetaRequest(is_snap_op ?
                                      CEPH_MDS_OP_MKSNAP : CEPH_MDS_OP_MKDIR);
  
+  if (!is_snap_op)
+    req->set_inode_owner_uid_gid(perm.uid(), perm.gid());
+
    filepath path;
    dir->make_nosnap_relative_path(path);
    path.push_dentry(name);
@@ -13813,6 +13820,8 @@ int Client::_symlink(Inode *dir, const char *name, const char *target,
  
    MetaRequest *req = new MetaRequest(CEPH_MDS_OP_SYMLINK);
  
+  req->set_inode_owner_uid_gid(perms.uid(), perms.gid());
+
    filepath path;
    dir->make_nosnap_relative_path(path);
    path.push_dentry(name);
diff --git a/src/client/MetaRequest.cc b/src/client/MetaRequest.cc

index 3994424e79360800a24badf79f322cb88d691d22..6d709db5831d20319dc03dfd79937f6f166ed320 100644 (file)
--- a/src/client/MetaRequest.cc
+++ b/src/client/MetaRequest.cc
@@ -51,6 +51,9 @@ void MetaRequest::dump(Formatter *f) const
    f->dump_unsigned("num_releases", head.num_releases);
  
    f->dump_int("abort_rc", abort_rc);
+
+  f->dump_unsigned("owner_uid", head.owner_uid);
+  f->dump_unsigned("owner_gid", head.owner_gid);
  }
  
  MetaRequest::~MetaRequest()
diff --git a/src/client/MetaRequest.h b/src/client/MetaRequest.h

index dd3c47481426f68e4180ad20644e10cfe7153bf1..d6d27ee009fe6bdc0ce964602bcce9111d4bd72b 100644 (file)
--- a/src/client/MetaRequest.h
+++ b/src/client/MetaRequest.h
@@ -78,6 +78,8 @@ public:
      unsafe_target_item(this) {
      memset(&head, 0, sizeof(head));
      head.op = op;
+    head.owner_uid = -1;
+    head.owner_gid = -1;
    }
    ~MetaRequest();
  
@@ -151,6 +153,13 @@ public:
      return v == 0;
    }
  
+  void set_inode_owner_uid_gid(unsigned u, unsigned g) {
+    /* it makes sense to set owner_{u,g}id only for OPs which create inodes */
+    ceph_assert(IS_CEPH_MDS_OP_NEWINODE(head.op));
+    head.owner_uid = u;
+    head.owner_gid = g;
+  }
+
    // normal fields
    void set_tid(ceph_tid_t t) { tid = t; }
    void set_oldest_client_tid(ceph_tid_t t) { head.oldest_client_tid = t; }
diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h

index 07ba579978eab475427aae5c6f66e6f164083342..542c1d155ad94c4cbfbb0d136793dd8a8ea85a8e 100644 (file)
--- a/src/include/ceph_fs.h
+++ b/src/include/ceph_fs.h
@@ -428,6 +428,11 @@ enum {
         CEPH_MDS_OP_RDLOCK_FRAGSSTATS = 0x01507
  };
  
+#define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE     || \
+                                    op == CEPH_MDS_OP_MKNOD      || \
+                                    op == CEPH_MDS_OP_MKDIR      || \
+                                    op == CEPH_MDS_OP_SYMLINK)
+
  extern const char *ceph_mds_op_name(int op);
  
  #ifndef CEPH_SETATTR_MODE
@@ -622,7 +627,7 @@ union ceph_mds_request_args {
         } __attribute__ ((packed)) lookupino;
  } __attribute__ ((packed));
  
-#define CEPH_MDS_REQUEST_HEAD_VERSION  2
+#define CEPH_MDS_REQUEST_HEAD_VERSION  3
  
  /*
   * Note that any change to this structure must ensure that it is compatible
@@ -643,9 +648,12 @@ struct ceph_mds_request_head {
  
         __le32 ext_num_retry;          /* new count retry attempts */
         __le32 ext_num_fwd;            /* new count fwd attempts */
+
+       __le32 struct_len;             /* to store size of struct ceph_mds_request_head */
+       __le32 owner_uid, owner_gid;   /* used for OPs which create inodes */
  } __attribute__ ((packed));
  
-void inline encode(const struct ceph_mds_request_head& h, ceph::buffer::list& bl, bool old_version) {
+void inline encode(const struct ceph_mds_request_head& h, ceph::buffer::list& bl) {
    using ceph::encode;
    encode(h.version, bl);
    encode(h.oldest_client_tid, bl);
@@ -665,14 +673,30 @@ void inline encode(const struct ceph_mds_request_head& h, ceph::buffer::list& bl
    encode(h.ino, bl);
    bl.append((char*)&h.args, sizeof(h.args));
  
-  if (!old_version) {
+  if (h.version >= 2) {
      encode(h.ext_num_retry, bl);
      encode(h.ext_num_fwd, bl);
    }
+
+  if (h.version >= 3) {
+    __u32 struct_len = sizeof(struct ceph_mds_request_head);
+    encode(struct_len, bl);
+    encode(h.owner_uid, bl);
+    encode(h.owner_gid, bl);
+
+    /*
+     * Please, add new fields handling here.
+     * You don't need to check h.version as we do it
+     * in decode(), because decode can properly skip
+     * all unsupported fields if h.version >= 3.
+     */
+  }
  }
  
  void inline decode(struct ceph_mds_request_head& h, ceph::buffer::list::const_iterator& bl) {
    using ceph::decode;
+  unsigned struct_end = bl.get_off();
+
    decode(h.version, bl);
    decode(h.oldest_client_tid, bl);
    decode(h.mdsmap_epoch, bl);
@@ -693,6 +717,42 @@ void inline decode(struct ceph_mds_request_head& h, ceph::buffer::list::const_it
      h.ext_num_retry = h.num_retry;
      h.ext_num_fwd = h.num_fwd;
    }
+
+  if (h.version >= 3) {
+    decode(h.struct_len, bl);
+    struct_end += h.struct_len;
+
+    decode(h.owner_uid, bl);
+    decode(h.owner_gid, bl);
+  } else {
+    /*
+     * client is old: let's take caller_{u,g}id as owner_{u,g}id
+     * this is how it worked before adding of owner_{u,g}id fields.
+     */
+    h.owner_uid = h.caller_uid;
+    h.owner_gid = h.caller_gid;
+  }
+
+  /* add new fields handling here */
+
+  /*
+   * From version 3 we have struct_len field.
+   * It allows us to properly handle a case
+   * when client send struct ceph_mds_request_head
+   * bigger in size than MDS supports. In this
+   * case we just want to skip all remaining bytes
+   * at the end.
+   *
+   * See also DECODE_FINISH macro. Unfortunately,
+   * we can't start using it right now as it will be
+   * an incompatible protocol change.
+   */
+  if (h.version >= 3) {
+    if (bl.get_off() > struct_end)
+      throw ::ceph::buffer::malformed_input(DECODE_ERR_PAST(__PRETTY_FUNCTION__));
+    if (bl.get_off() < struct_end)
+      bl += struct_end - bl.get_off();
+  }
  }
  
  /* cap/lease release record */
diff --git a/src/mds/Server.cc b/src/mds/Server.cc

index 2236129c564186b8df2104bade46bff9b8b5a778..404646df28ac146e9df81af66e6d63c35896a03e 100644 (file)
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -3445,10 +3445,12 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino
        _inode->mode |= S_ISGID;
      }
    } else {
-    _inode->gid = mdr->client_request->get_caller_gid();
+    _inode->gid = mdr->client_request->get_owner_gid();
+    ceph_assert(_inode->gid != (unsigned)-1);
    }
  
-  _inode->uid = mdr->client_request->get_caller_uid();
+  _inode->uid = mdr->client_request->get_owner_uid();
+  ceph_assert(_inode->uid != (unsigned)-1);
  
    _inode->btime = _inode->ctime = _inode->mtime = _inode->atime =
      mdr->get_op_stamp();
diff --git a/src/mds/cephfs_features.cc b/src/mds/cephfs_features.cc

index 3ea19e93fab492c412768f8c2611f4221039f443..bf7b1f3cc72e28d4962f1d3f3d511a95c66c5c03 100644 (file)
--- a/src/mds/cephfs_features.cc
+++ b/src/mds/cephfs_features.cc
@@ -26,6 +26,8 @@ static const std::array feature_names
    "notify_session_state",
    "op_getvxattr",
    "32bits_retry_fwd",
+  "new_snaprealm_info",
+  "has_owner_uidgid",
  };
  static_assert(feature_names.size() == CEPHFS_FEATURE_MAX + 1);
  
diff --git a/src/mds/cephfs_features.h b/src/mds/cephfs_features.h

index 44c7b223ab30f087efb2d9ae08f03fd39a0359ee..6fe114274bb61e4da321e909e8843c3509ed0e4b 100644 (file)
--- a/src/mds/cephfs_features.h
+++ b/src/mds/cephfs_features.h
@@ -46,7 +46,9 @@ namespace ceph {
  #define CEPHFS_FEATURE_NOTIFY_SESSION_STATE 16
  #define CEPHFS_FEATURE_OP_GETVXATTR         17
  #define CEPHFS_FEATURE_32BITS_RETRY_FWD     18
-#define CEPHFS_FEATURE_MAX                  18
+#define CEPHFS_FEATURE_NEW_SNAPREALM_INFO   19
+#define CEPHFS_FEATURE_HAS_OWNER_UIDGID     20
+#define CEPHFS_FEATURE_MAX                  20
  
  #define CEPHFS_FEATURES_ALL {          \
    0, 1, 2, 3, 4,                       \
@@ -66,6 +68,7 @@ namespace ceph {
    CEPHFS_FEATURE_NOTIFY_SESSION_STATE,  \
    CEPHFS_FEATURE_OP_GETVXATTR,          \
    CEPHFS_FEATURE_32BITS_RETRY_FWD,      \
+  CEPHFS_FEATURE_HAS_OWNER_UIDGID,      \
  }
  
  #define CEPHFS_METRIC_FEATURES_ALL {           \
diff --git a/src/messages/MClientRequest.h b/src/messages/MClientRequest.h

index 4a6a21d3c5503195fba93997888ded3863d8c74d..3a192efd8f8adcadbb841344997a33384a4862f2 100644 (file)
--- a/src/messages/MClientRequest.h
+++ b/src/messages/MClientRequest.h
@@ -38,6 +38,7 @@
  #include "include/filepath.h"
  #include "mds/mdstypes.h"
  #include "include/ceph_features.h"
+#include "mds/cephfs_features.h"
  #include "messages/MMDSOp.h"
  
  #include <sys/types.h>
@@ -73,7 +74,7 @@ private:
  public:
    mutable struct ceph_mds_request_head head; /* XXX HACK! */
    utime_t stamp;
-  bool peer_old_version = false;
+  feature_bitset_t mds_features;
  
    struct Release {
      mutable ceph_mds_request_release item;
@@ -110,12 +111,16 @@ protected:
    MClientRequest()
      : MMDSOp(CEPH_MSG_CLIENT_REQUEST, HEAD_VERSION, COMPAT_VERSION) {
      memset(&head, 0, sizeof(head));
+    head.owner_uid = -1;
+    head.owner_gid = -1;
    }
-  MClientRequest(int op, bool over=true)
+  MClientRequest(int op, feature_bitset_t features = 0)
      : MMDSOp(CEPH_MSG_CLIENT_REQUEST, HEAD_VERSION, COMPAT_VERSION) {
      memset(&head, 0, sizeof(head));
      head.op = op;
-    peer_old_version = over;
+    mds_features = features;
+    head.owner_uid = -1;
+    head.owner_gid = -1;
    }
    ~MClientRequest() final {}
  
@@ -198,6 +203,8 @@ public:
    int get_op() const { return head.op; }
    unsigned get_caller_uid() const { return head.caller_uid; }
    unsigned get_caller_gid() const { return head.caller_gid; }
+  unsigned get_owner_uid() const { return head.owner_uid; }
+  unsigned get_owner_gid() const { return head.owner_gid; }
    const std::vector<uint64_t>& get_caller_gid_list() const { return gid_list; }
  
    const std::string& get_path() const { return path.get_path(); }
@@ -255,14 +262,16 @@ public:
       * client will just copy the 'head' memory and isn't
       * that smart to skip them.
       */
-    if (peer_old_version) {
+    if (!mds_features.test(CEPHFS_FEATURE_32BITS_RETRY_FWD)) {
        head.version = 1;
+    } else if (!mds_features.test(CEPHFS_FEATURE_HAS_OWNER_UIDGID)) {
+      head.version = 2;
      } else {
        head.version = CEPH_MDS_REQUEST_HEAD_VERSION;
      }
  
      if (features & CEPH_FEATURE_FS_BTIME) {
-      encode(head, payload, peer_old_version);
+      encode(head, payload);
      } else {
        struct ceph_mds_request_head_legacy old_mds_head;
  
@@ -283,6 +292,10 @@ public:
      out << "client_request(" << get_orig_source()
         << ":" << get_tid()
         << " " << ceph_mds_op_name(get_op());
+    if (IS_CEPH_MDS_OP_NEWINODE(head.op)) {
+      out << " owner_uid=" << head.owner_uid
+         << ", owner_gid=" << head.owner_gid;
+    }
      if (head.op == CEPH_MDS_OP_GETATTR)
        out << " " << ccap_string(head.args.getattr.mask);
      if (head.op == CEPH_MDS_OP_SETATTR) {
author	Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
	Thu, 3 Aug 2023 12:15:28 +0000 (14:15 +0200)
committer	Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
	Thu, 24 Aug 2023 12:54:11 +0000 (14:54 +0200)
src/client/Client.cc		patch \| blob \| history
src/client/MetaRequest.cc		patch \| blob \| history
src/client/MetaRequest.h		patch \| blob \| history
src/include/ceph_fs.h		patch \| blob \| history
src/mds/Server.cc		patch \| blob \| history
src/mds/cephfs_features.cc		patch \| blob \| history
src/mds/cephfs_features.h		patch \| blob \| history
src/messages/MClientRequest.h		patch \| blob \| history