From 67bc849c68478262003f9dc4b97d0ea1ed9f7c9d Mon Sep 17 00:00:00 2001 From: Sam Lang Date: Wed, 19 Dec 2012 10:17:29 -1000 Subject: [PATCH] mds: Return created inode in mds reply to create If multiple clients race to create a file, multiple clients will send a create request and get back a valid dentry+inode, but only one client will actually win the race to create the file. All other clients should treat the reply as an open of an existing file and check permissions. This patch adds the created inode number to the mds create reply if that request actually created the inode/file (and the feature is supported), so the client can properly check permissions if the inode number isn't returned. Fixes #3625. Signed-off-by: Sam Lang --- src/client/Client.cc | 59 ++++++++++++++++++++++++++----------- src/client/Client.h | 4 +-- src/include/ceph_features.h | 6 ++-- src/mds/Mutation.h | 2 ++ src/mds/Server.cc | 8 +++++ 5 files changed, 58 insertions(+), 21 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 060b20eeae18..c066a2bd90c5 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -5124,8 +5124,7 @@ int Client::open(const char *relpath, int flags, mode_t mode, int stripe_unit, if (r < 0) return r; r = _create(dir, dname.c_str(), flags, mode, &in, &fh, stripe_unit, - stripe_count, object_size, data_pool); - created = true; + stripe_count, object_size, data_pool, &created); } if (r < 0) goto out; @@ -6630,10 +6629,10 @@ int Client::ll_mknod(vinodeno_t parent, const char *name, mode_t mode, dev_t rde } int Client::_create(Inode *dir, const char *name, int flags, mode_t mode, Inode **inp, Fh **fhp, - int stripe_unit, int stripe_count, int object_size, const char *data_pool, int uid, int gid) -{ + int stripe_unit, int stripe_count, int object_size, const char *data_pool, bool *created, int uid, int gid) +{ ldout(cct, 3) << "_create(" << dir->ino << " " << name << ", 0" << oct << mode << dec << ")" << dendl; - + if (strlen(name) > NAME_MAX) return -ENAMETOOLONG; if (dir->snapid != CEPH_NOSNAP) { @@ -6662,7 +6661,7 @@ int Client::_create(Inode *dir, const char *name, int flags, mode_t mode, Inode req->inode = dir; req->head.args.open.flags = flags | O_CREAT; req->head.args.open.mode = mode; - + req->head.args.open.stripe_unit = stripe_unit; req->head.args.open.stripe_count = stripe_count; req->head.args.open.object_size = object_size; @@ -6670,20 +6669,47 @@ int Client::_create(Inode *dir, const char *name, int flags, mode_t mode, Inode req->dentry_drop = CEPH_CAP_FILE_SHARED; req->dentry_unless = CEPH_CAP_FILE_EXCL; + bufferlist extra_bl; + inodeno_t created_ino; + bool got_created_ino = false; + int res = get_or_create(dir, name, &req->dentry); if (res < 0) goto fail; - res = make_request(req, uid, gid); - - if (res >= 0) { - res = _lookup(dir, name, inp); - if (res >= 0) { - (*inp)->get_open_ref(cmode); - *fhp = _create_fh(*inp, flags, cmode); - } + res = make_request(req, uid, gid, 0, -1, &extra_bl); + if (res < 0) { + goto reply_error; } - + + // make sure we have a reply to inspect + assert(req->reply); + + // check whether this request actually did the create, and set created flag + if (req->reply->get_connection()->has_feature(CEPH_FEATURE_REPLY_CREATE_INODE) && extra_bl.length() == 1) { + // if the extra bufferlist has a buffer, we assume its the created inode + // and that this request to create succeeded in actually creating + // the inode (won the race with other create requests) + ::decode(created_ino, extra_bl); + got_created_ino = true; + } + + if (created) + *created = got_created_ino; + + res = _lookup(dir, name, inp); + if (res < 0) { + goto reply_error; + } + + // verify ino returned in reply and trace_dist are the same + if (got_created_ino) + assert(created_ino.val == (*inp)->ino.val); + + (*inp)->get_open_ref(cmode); + *fhp = _create_fh(*inp, flags, cmode); + + reply_error: trim_cache(); ldout(cct, 3) << "create(" << path << ", 0" << oct << mode << dec @@ -7150,10 +7176,9 @@ int Client::ll_create(vinodeno_t parent, const char *name, mode_t mode, int flag if (r == 0 && (flags & O_CREAT) && (flags & O_EXCL)) return -EEXIST; if (r == -ENOENT && (flags & O_CREAT)) { - created = true; r = _create(dir, name, flags, mode, &in, fhp, 0, 0, 0, - NULL, uid, gid); + NULL, &created, uid, gid); if (r < 0) goto out; diff --git a/src/client/Client.h b/src/client/Client.h index b1d943f42e53..4f33d3c342ac 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -526,8 +526,8 @@ private: int _removexattr(Inode *in, const char *nm, int uid=-1, int gid=-1); int _open(Inode *in, int flags, mode_t mode, Fh **fhp, int uid=-1, int gid=-1); int _create(Inode *in, const char *name, int flags, mode_t mode, Inode **inp, Fh **fhp, - int stripe_unit, int stripe_count, int object_size, const char *data_pool, - int uid=-1, int gid=-1); + int stripe_unit, int stripe_count, int object_size, const char *data_pool, + bool *created = NULL, int uid=-1, int gid=-1); loff_t _lseek(Fh *fh, loff_t offset, int whence); int _read(Fh *fh, int64_t offset, uint64_t size, bufferlist *bl); int _write(Fh *fh, int64_t offset, uint64_t size, const char *buf); diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h index 24c082d67a1f..0561bc4afae8 100644 --- a/src/include/ceph_features.h +++ b/src/include/ceph_features.h @@ -31,6 +31,7 @@ #define CEPH_FEATURE_RECOVERY_RESERVATION (1<<24) #define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25) #define CEPH_FEATURE_CREATEPOOLID (1<<26) +#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) /* * Features supported. Should be everything above. @@ -59,10 +60,11 @@ CEPH_FEATURE_MON_NULLROUTE | \ CEPH_FEATURE_MON_GV | \ CEPH_FEATURE_BACKFILL_RESERVATION | \ - CEPH_FEATURE_MSG_AUTH | \ + CEPH_FEATURE_MSG_AUTH | \ CEPH_FEATURE_RECOVERY_RESERVATION | \ CEPH_FEATURE_CRUSH_TUNABLES2 | \ - CEPH_FEATURE_CREATEPOOLID) + CEPH_FEATURE_CREATEPOOLID | \ + CEPH_FEATURE_REPLY_CREATE_INODE) #define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL diff --git a/src/mds/Mutation.h b/src/mds/Mutation.h index 37cc764254dc..83a1196b9809 100644 --- a/src/mds/Mutation.h +++ b/src/mds/Mutation.h @@ -180,6 +180,8 @@ struct MDRequest : public Mutation { int snap_caps; bool did_early_reply; + bufferlist reply_extra_bl; + // inos we did a embedded cap release on, and may need to eval if we haven't since reissued map cap_releases; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 717c98a80cf8..dc14bbe2b8d7 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -829,6 +829,7 @@ void Server::early_reply(MDRequest *mdr, CInode *tracei, CDentry *tracedn) mdr->client_request->get_dentry_wanted()); } + reply->set_extra_bl(mdr->reply_extra_bl); messenger->send_message(reply, req->get_connection()); mdr->did_early_reply = true; @@ -2541,6 +2542,7 @@ void Server::handle_client_open(MDRequest *mdr) assert(mdr->dn[0].size()); dn = mdr->dn[0].back(); } + reply_request(mdr, 0, cur, dn); } @@ -2721,6 +2723,12 @@ void Server::handle_client_openc(MDRequest *mdr) ls->open_files.push_back(&in->item_open_file); C_MDS_openc_finish *fin = new C_MDS_openc_finish(mds, mdr, dn, in, follows); + + if (mdr->client_request->get_connection()->has_feature(CEPH_FEATURE_REPLY_CREATE_INODE)) { + // add the file created flag onto the reply if create_flags features is supported + ::encode(in->inode.ino, mdr->reply_extra_bl); + } + journal_and_reply(mdr, in, dn, le, fin); } -- 2.47.3