}
}
-int Client::verify_reply_trace(int r,
+int Client::verify_reply_trace(int r, MetaSession *session,
MetaRequest *request, const MConstRef<MClientReply>& reply,
InodeRef *ptarget, bool *pcreated,
const UserPerm& perms)
extra_bl = reply->get_extra_bl();
if (extra_bl.length() >= 8) {
- // if the extra bufferlist has a buffer, we assume its the created inode
- // and that this request to create succeeded in actually creating
- // the inode (won the race with other create requests)
- decode(created_ino, extra_bl);
- got_created_ino = true;
+ if (session->mds_features.test(CEPHFS_FEATURE_DELEG_INO)) {
+ struct openc_response_t ocres;
+
+ decode(ocres, extra_bl);
+ created_ino = ocres.created_ino;
+ /*
+ * The userland cephfs client doesn't have a way to do an async create
+ * (yet), so just discard delegated_inos for now. Eventually we should
+ * store them and use them in create calls, even if they are synchronous,
+ * if only for testing purposes.
+ */
+ ldout(cct, 10) << "delegated_inos: " << ocres.delegated_inos << dendl;
+ } else {
+ // u64 containing number of created ino
+ decode(created_ino, extra_bl);
+ }
ldout(cct, 10) << "make_request created ino " << created_ino << dendl;
+ got_created_ino = true;
}
if (pcreated)
if (use_mds >= 0)
request->resend_mds = use_mds;
+ MetaSession *session = NULL;
while (1) {
if (request->aborted())
break;
}
// open a session?
- MetaSession *session = NULL;
if (!have_open_session(mds)) {
session = _get_or_open_mds_session(mds);
request->dispatch_cond = 0;
if (r >= 0 && ptarget)
- r = verify_reply_trace(r, request, reply, ptarget, pcreated, perms);
+ r = verify_reply_trace(r, session, request, reply, ptarget, pcreated, perms);
if (pdirbl)
*pdirbl = reply->get_extra_bl();
void put_request(MetaRequest *request);
void unregister_request(MetaRequest *request);
- int verify_reply_trace(int r, MetaRequest *request, const MConstRef<MClientReply>& reply,
+ int verify_reply_trace(int r, MetaSession *session, MetaRequest *request,
+ const MConstRef<MClientReply>& reply,
InodeRef *ptarget, bool *pcreated,
const UserPerm& perms);
void encode_cap_releases(MetaRequest *request, mds_rank_t mds);
.set_default(1000)
.set_description("number of unused inodes to pre-allocate to clients for file creation"),
+ Option("mds_client_delegate_inos_pct", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+ .set_default(50)
+ .set_flag(Option::FLAG_RUNTIME)
+ .set_description("percentage of preallocated inos to delegate to client"),
+
Option("mds_early_reply", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(true)
.set_description("additional reply to clients that metadata requests are complete but not yet durable"),
C_MDS_openc_finish *fin = new C_MDS_openc_finish(this, mdr, dn, in);
- if (mdr->client_request->get_connection()->has_feature(CEPH_FEATURE_REPLY_CREATE_INODE)) {
+ if (mdr->session->info.has_feature(CEPHFS_FEATURE_DELEG_INO)) {
+ openc_response_t ocresp;
+
+ dout(10) << "adding created_ino and delegated_inos" << dendl;
+ ocresp.created_ino = in->inode.ino;
+
+ // Try to delegate some prealloc_inos to the client, if it's down to half the max
+ auto pct = g_conf().get_val<uint64_t>("mds_client_delegate_inos_pct");
+ if (pct) {
+ unsigned frac = 100 / pct;
+ if (mdr->session->delegated_inos.size() < (unsigned)g_conf()->mds_client_prealloc_inos / frac / 2)
+ mdr->session->delegate_inos(g_conf()->mds_client_prealloc_inos / frac, ocresp.delegated_inos);
+ }
+
+ encode(ocresp, mdr->reply_extra_bl);
+ } else if (mdr->client_request->get_connection()->has_feature(CEPH_FEATURE_REPLY_CREATE_INODE)) {
dout(10) << "adding ino to reply to indicate inode was created" << dendl;
// add the file created flag onto the reply if create_flags features is supported
encode(in->inode.ino, mdr->reply_extra_bl);
#define CEPHFS_FEATURE_LAZY_CAP_WANTED 11
#define CEPHFS_FEATURE_MULTI_RECONNECT 12
#define CEPHFS_FEATURE_NAUTILUS 12
+#define CEPHFS_FEATURE_DELEG_INO 13
#define CEPHFS_FEATURE_OCTOPUS 13
#define CEPHFS_FEATURES_ALL { \
CEPHFS_FEATURE_LAZY_CAP_WANTED, \
CEPHFS_FEATURE_MULTI_RECONNECT, \
CEPHFS_FEATURE_NAUTILUS, \
+ CEPHFS_FEATURE_DELEG_INO, \
CEPHFS_FEATURE_OCTOPUS, \
}
// see CInode::encode_inodestat for encoder.
};
+struct openc_response_t {
+ _inodeno_t created_ino;
+ interval_set<inodeno_t> delegated_inos;
+
+public:
+ void encode(ceph::buffer::list& bl) const {
+ using ceph::encode;
+ ENCODE_START(1, 1, bl);
+ encode(created_ino, bl);
+ encode(delegated_inos, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(bufferlist::const_iterator &p) {
+ using ceph::decode;
+ DECODE_START(1, p);
+ decode(created_ino, p);
+ decode(delegated_inos, p);
+ DECODE_FINISH(p);
+ }
+} __attribute__ ((__may_alias__));
+WRITE_CLASS_ENCODER(openc_response_t)
class MClientReply : public SafeMessage {
public: