]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: send delegated_inos in openc response 31817/head
authorJeff Layton <jlayton@redhat.com>
Mon, 14 Oct 2019 16:13:56 +0000 (12:13 -0400)
committerJeff Layton <jlayton@redhat.com>
Mon, 13 Jan 2020 17:37:19 +0000 (12:37 -0500)
If the client supports octopus cephfs features, then hand it a set of
inode numbers in the first create reply. The client code is similarly
changed to expect a trailing set of inode numbers in that case, though
the userland client doesn't do anything with them yet.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
src/client/Client.cc
src/client/Client.h
src/common/options.cc
src/mds/Server.cc
src/mds/cephfs_features.h
src/messages/MClientReply.h

index 06d71d3537a6f4d4bfae22f6662622e85aa791df..3ee6dc0c4428e00dc4191b33eba23525b1688e5e 100644 (file)
@@ -1544,7 +1544,7 @@ void Client::dump_mds_requests(Formatter *f)
   }
 }
 
-int Client::verify_reply_trace(int r,
+int Client::verify_reply_trace(int r, MetaSession *session,
                               MetaRequest *request, const MConstRef<MClientReply>& reply,
                               InodeRef *ptarget, bool *pcreated,
                               const UserPerm& perms)
@@ -1557,12 +1557,24 @@ int Client::verify_reply_trace(int r,
 
   extra_bl = reply->get_extra_bl();
   if (extra_bl.length() >= 8) {
-    // if the extra bufferlist has a buffer, we assume its the created inode
-    // and that this request to create succeeded in actually creating
-    // the inode (won the race with other create requests)
-    decode(created_ino, extra_bl);
-    got_created_ino = true;
+    if (session->mds_features.test(CEPHFS_FEATURE_DELEG_INO)) {
+     struct openc_response_t   ocres;
+
+     decode(ocres, extra_bl);
+     created_ino = ocres.created_ino;
+     /*
+      * The userland cephfs client doesn't have a way to do an async create
+      * (yet), so just discard delegated_inos for now. Eventually we should
+      * store them and use them in create calls, even if they are synchronous,
+      * if only for testing purposes.
+      */
+     ldout(cct, 10) << "delegated_inos: " << ocres.delegated_inos << dendl;
+    } else {
+     // u64 containing number of created ino
+     decode(created_ino, extra_bl);
+    }
     ldout(cct, 10) << "make_request created ino " << created_ino << dendl;
+    got_created_ino = true;
   }
 
   if (pcreated)
@@ -1670,6 +1682,7 @@ int Client::make_request(MetaRequest *request,
   if (use_mds >= 0)
     request->resend_mds = use_mds;
 
+  MetaSession *session = NULL;
   while (1) {
     if (request->aborted())
       break;
@@ -1704,7 +1717,6 @@ int Client::make_request(MetaRequest *request,
     }
 
     // open a session?
-    MetaSession *session = NULL;
     if (!have_open_session(mds)) {
       session = _get_or_open_mds_session(mds);
 
@@ -1769,7 +1781,7 @@ int Client::make_request(MetaRequest *request,
   request->dispatch_cond = 0;
   
   if (r >= 0 && ptarget)
-    r = verify_reply_trace(r, request, reply, ptarget, pcreated, perms);
+    r = verify_reply_trace(r, session, request, reply, ptarget, pcreated, perms);
 
   if (pdirbl)
     *pdirbl = reply->get_extra_bl();
index a88abb5d86e8489e47ee0b48ca76bed72d4d19d4..50e91940a5da712ddec7a5306e7e5f034cce1a6a 100644 (file)
@@ -802,7 +802,8 @@ protected:
   void put_request(MetaRequest *request);
   void unregister_request(MetaRequest *request);
 
-  int verify_reply_trace(int r, MetaRequest *request, const MConstRef<MClientReply>& reply,
+  int verify_reply_trace(int r, MetaSession *session, MetaRequest *request,
+                        const MConstRef<MClientReply>& reply,
                         InodeRef *ptarget, bool *pcreated,
                         const UserPerm& perms);
   void encode_cap_releases(MetaRequest *request, mds_rank_t mds);
index fbd171c743b70326bb80215bb86fff55378b58ff..6d948b7d1d23edb2807edba03f18eef5c09f5c6d 100644 (file)
@@ -7719,6 +7719,11 @@ std::vector<Option> get_mds_options() {
     .set_default(1000)
     .set_description("number of unused inodes to pre-allocate to clients for file creation"),
 
+    Option("mds_client_delegate_inos_pct", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(50)
+    .set_flag(Option::FLAG_RUNTIME)
+    .set_description("percentage of preallocated inos to delegate to client"),
+
     Option("mds_early_reply", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
     .set_default(true)
     .set_description("additional reply to clients that metadata requests are complete but not yet durable"),
index bd4b409ae693581c3e9ac375462a9642fff97886..0ad4a10e3ccd06ec0dd0fc64da43f06a91a95f12 100644 (file)
@@ -4370,7 +4370,22 @@ void Server::handle_client_openc(MDRequestRef& mdr)
 
   C_MDS_openc_finish *fin = new C_MDS_openc_finish(this, mdr, dn, in);
 
-  if (mdr->client_request->get_connection()->has_feature(CEPH_FEATURE_REPLY_CREATE_INODE)) {
+  if (mdr->session->info.has_feature(CEPHFS_FEATURE_DELEG_INO)) {
+    openc_response_t   ocresp;
+
+    dout(10) << "adding created_ino and delegated_inos" << dendl;
+    ocresp.created_ino = in->inode.ino;
+
+    // Try to delegate some prealloc_inos to the client, if it's down to half the max
+    auto pct = g_conf().get_val<uint64_t>("mds_client_delegate_inos_pct");
+    if (pct) {
+      unsigned frac = 100 / pct;
+      if (mdr->session->delegated_inos.size() < (unsigned)g_conf()->mds_client_prealloc_inos / frac / 2)
+       mdr->session->delegate_inos(g_conf()->mds_client_prealloc_inos / frac, ocresp.delegated_inos);
+    }
+
+    encode(ocresp, mdr->reply_extra_bl);
+  } else if (mdr->client_request->get_connection()->has_feature(CEPH_FEATURE_REPLY_CREATE_INODE)) {
     dout(10) << "adding ino to reply to indicate inode was created" << dendl;
     // add the file created flag onto the reply if create_flags features is supported
     encode(in->inode.ino, mdr->reply_extra_bl);
index 1e09cca71d64c026a567fbca2496315775b9d8ee..87648915cb5cb554cd42be7b1c1e87f072af902c 100644 (file)
@@ -32,6 +32,7 @@
 #define CEPHFS_FEATURE_LAZY_CAP_WANTED  11
 #define CEPHFS_FEATURE_MULTI_RECONNECT  12
 #define CEPHFS_FEATURE_NAUTILUS         12
+#define CEPHFS_FEATURE_DELEG_INO        13
 #define CEPHFS_FEATURE_OCTOPUS          13
 
 #define CEPHFS_FEATURES_ALL {          \
@@ -45,6 +46,7 @@
   CEPHFS_FEATURE_LAZY_CAP_WANTED,      \
   CEPHFS_FEATURE_MULTI_RECONNECT,      \
   CEPHFS_FEATURE_NAUTILUS,              \
+  CEPHFS_FEATURE_DELEG_INO,             \
   CEPHFS_FEATURE_OCTOPUS,               \
 }
 
index fb08b24c88a1ce37d6d585ed55a2d75d53e0a646..ed45185a9dd17fdb8c8c22173410fad11ab9a201 100644 (file)
@@ -262,6 +262,27 @@ struct InodeStat {
   // see CInode::encode_inodestat for encoder.
 };
 
+struct openc_response_t {
+  _inodeno_t                   created_ino;
+  interval_set<inodeno_t>      delegated_inos;
+
+public:
+  void encode(ceph::buffer::list& bl) const {
+    using ceph::encode;
+    ENCODE_START(1, 1, bl);
+    encode(created_ino, bl);
+    encode(delegated_inos, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(bufferlist::const_iterator &p) {
+    using ceph::decode;
+    DECODE_START(1, p);
+    decode(created_ino, p);
+    decode(delegated_inos, p);
+    DECODE_FINISH(p);
+  }
+} __attribute__ ((__may_alias__));
+WRITE_CLASS_ENCODER(openc_response_t)
 
 class MClientReply : public SafeMessage {
 public: