From a1b2c8ff955b30807ac53ce6bdc97cf61a7262ca Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 2 Oct 2014 19:07:41 +0800 Subject: [PATCH] client: posix file lock support Signed-off-by: Yan, Zheng --- src/client/Client.cc | 299 +++++++++++++++++++++++++++++++- src/client/Client.h | 12 ++ src/client/Fh.h | 8 +- src/client/Inode.h | 9 +- src/client/fuse_ll.cc | 61 ++++++- src/mds/mdstypes.h | 4 +- src/messages/MClientReconnect.h | 6 +- 7 files changed, 386 insertions(+), 13 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index cd2f91a61dfb8..95c86107f77d8 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -55,6 +55,7 @@ using namespace std; #include "mon/MonClient.h" +#include "mds/flock.h" #include "osd/OSDMap.h" #include "mon/MonMap.h" @@ -2155,6 +2156,9 @@ void Client::send_reconnect(MetaSession *session) in->make_long_path(path); ldout(cct, 10) << " path " << path << dendl; + bufferlist flockbl; + _encode_filelocks(in, flockbl); + in->caps[mds]->seq = 0; // reset seq. in->caps[mds]->issue_seq = 0; // reset seq. in->caps[mds]->mseq = 0; // reset seq. @@ -2163,7 +2167,8 @@ void Client::send_reconnect(MetaSession *session) path.get_ino(), path.get_path(), // ino in->caps_wanted(), // wanted in->caps[mds]->issued, // issued - in->snaprealm->ino); + in->snaprealm->ino, + flockbl); if (did_snaprealm.count(in->snaprealm->ino) == 0) { ldout(cct, 10) << " snaprealm " << *in->snaprealm << dendl; @@ -2309,6 +2314,8 @@ void Client::put_inode(Inode *in, int n) in->snaprealm_item.remove_myself(); if (in == root) root = 0; + delete in->fcntl_locks; + delete in->flock_locks; delete in; } } @@ -6284,6 +6291,8 @@ int Client::_release_fh(Fh *f) in->snap_cap_refs--; } + _release_filelocks(f); + put_inode(in); delete f; @@ -7281,6 +7290,266 @@ int Client::statfs(const char *path, struct statvfs *stbuf) return rval; } +int Client::_do_filelock(Inode *in, int lock_type, int op, int sleep, + struct flock *fl, uint64_t owner) +{ + ldout(cct, 10) << "_do_filelock ino " << in->ino + << (lock_type == CEPH_LOCK_FCNTL ? " fcntl" : " flock") + << " type " << fl->l_type << " owner " << owner + << " " << fl->l_start << "~" << fl->l_len << dendl; + + int lock_cmd; + if (F_RDLCK == fl->l_type) + lock_cmd = CEPH_LOCK_SHARED; + else if (F_WRLCK == fl->l_type) + lock_cmd = CEPH_LOCK_EXCL; + else if (F_UNLCK == fl->l_type) + lock_cmd = CEPH_LOCK_UNLOCK; + else + return -EIO; + + /* + * Set the most significant bit, so that MDS knows the 'owner' + * is sufficient to identify the owner of lock. (old code uses + * both 'owner' and 'pid') + */ + owner |= (1ULL << 63); + + MetaRequest *req = new MetaRequest(op); + filepath path; + in->make_nosnap_relative_path(path); + req->set_filepath(path); + req->set_inode(in); + + req->head.args.filelock_change.rule = lock_type; + req->head.args.filelock_change.type = lock_cmd; + req->head.args.filelock_change.owner = owner; + req->head.args.filelock_change.pid = fl->l_pid; + req->head.args.filelock_change.start = fl->l_start; + req->head.args.filelock_change.length = fl->l_len; + req->head.args.filelock_change.wait = sleep; + + bufferlist bl; + int ret = make_request(req, -1, -1, NULL, NULL, -1, &bl); + + if (ret == 0) { + if (op == CEPH_MDS_OP_GETFILELOCK) { + ceph_filelock filelock; + bufferlist::iterator p = bl.begin(); + ::decode(filelock, p); + + if (CEPH_LOCK_SHARED == filelock.type) + fl->l_type = F_RDLCK; + else if (CEPH_LOCK_EXCL == filelock.type) + fl->l_type = F_WRLCK; + else + fl->l_type = F_UNLCK; + + fl->l_whence = SEEK_SET; + fl->l_start = filelock.start; + fl->l_len = filelock.length; + fl->l_pid = filelock.pid; + } else if (op == CEPH_MDS_OP_SETFILELOCK) { + ceph_lock_state_t *lock_state; + if (lock_type == CEPH_LOCK_FCNTL) { + if (!in->fcntl_locks) + in->fcntl_locks = new ceph_lock_state_t(cct); + lock_state = in->fcntl_locks; + } else if (lock_type == CEPH_LOCK_FLOCK) { + if (!in->flock_locks) + in->flock_locks = new ceph_lock_state_t(cct); + lock_state = in->flock_locks; + } else + assert(0); + + ceph_filelock filelock; + _convert_flock(fl, owner, &filelock); + if (filelock.type == CEPH_LOCK_UNLOCK) { + list activated_locks; + lock_state->remove_lock(filelock, activated_locks); + } else { + bool r = lock_state->add_lock(filelock, false, false); + assert(r); + } + } else + assert(0); + } + return ret; +} + +void Client::_encode_filelocks(Inode *in, bufferlist& bl) +{ + if (!in->fcntl_locks && !in->flock_locks) + return; + + unsigned nr_fcntl_locks = in->fcntl_locks ? in->fcntl_locks->held_locks.size() : 0; + ::encode(nr_fcntl_locks, bl); + if (nr_fcntl_locks) { + ceph_lock_state_t* lock_state = in->fcntl_locks; + for(multimap::iterator p = lock_state->held_locks.begin(); + p != lock_state->held_locks.end(); + ++p) + ::encode(p->second, bl); + } + + unsigned nr_flock_locks = in->flock_locks ? in->flock_locks->held_locks.size() : 0; + ::encode(nr_flock_locks, bl); + if (nr_flock_locks) { + ceph_lock_state_t* lock_state = in->flock_locks; + for(multimap::iterator p = lock_state->held_locks.begin(); + p != lock_state->held_locks.end(); + ++p) + ::encode(p->second, bl); + } + + ldout(cct, 10) << "_encode_filelocks ino " << in->ino << ", " << nr_fcntl_locks + << " fcntl locks, " << nr_flock_locks << " flock locks" << dendl; +} + +void Client::_release_filelocks(Fh *fh) +{ + if (!fh->fcntl_locks && !fh->flock_locks) + return; + + Inode *in = fh->inode; + ldout(cct, 10) << "_release_filelocks " << fh << " ino " << in->ino << dendl; + + list > to_release; + + if (fh->fcntl_locks) { + ceph_lock_state_t* lock_state = fh->fcntl_locks; + for(multimap::iterator p = lock_state->held_locks.begin(); + p != lock_state->held_locks.end(); + ++p) + to_release.push_back(pair(CEPH_LOCK_FCNTL, p->second)); + delete fh->fcntl_locks; + } + if (fh->flock_locks) { + ceph_lock_state_t* lock_state = fh->flock_locks; + for(multimap::iterator p = lock_state->held_locks.begin(); + p != lock_state->held_locks.end(); + ++p) + to_release.push_back(pair(CEPH_LOCK_FLOCK, p->second)); + delete fh->flock_locks; + } + + if (to_release.empty()) + return; + + struct flock fl; + memset(&fl, 0, sizeof(fl)); + fl.l_whence = SEEK_SET; + fl.l_type = F_UNLCK; + + for (list >::iterator p = to_release.begin(); + p != to_release.end(); + ++p) { + fl.l_start = p->second.start; + fl.l_len = p->second.length; + fl.l_pid = p->second.pid; + _do_filelock(in, p->first, CEPH_MDS_OP_SETFILELOCK, 0, &fl, p->second.owner); + } +} + +void Client::_convert_flock(struct flock *fl, uint64_t owner, struct ceph_filelock *filelock) +{ + int lock_cmd; + if (F_RDLCK == fl->l_type) + lock_cmd = CEPH_LOCK_SHARED; + else if (F_WRLCK == fl->l_type) + lock_cmd = CEPH_LOCK_EXCL; + else + lock_cmd = CEPH_LOCK_UNLOCK;; + + filelock->start = fl->l_start; + filelock->length = fl->l_len; + filelock->client = 0; + // see comment in _do_filelock() + filelock->owner = owner | (1ULL << 63); + filelock->pid = fl->l_pid; + filelock->type = lock_cmd; +} + +int Client::_getlk(Fh *fh, struct flock *fl, uint64_t owner) +{ + Inode *in = fh->inode; + ldout(cct, 10) << "_getlk " << fh << " ino " << in->ino << dendl; + int ret = _do_filelock(in, CEPH_LOCK_FCNTL, CEPH_MDS_OP_GETFILELOCK, 0, fl, owner); + return ret; +} + +int Client::_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep) +{ + Inode *in = fh->inode; + ldout(cct, 10) << "_setlk " << fh << " ino " << in->ino << dendl; + int ret = _do_filelock(in, CEPH_LOCK_FCNTL, CEPH_MDS_OP_SETFILELOCK, sleep, fl, owner); + if (ret == 0) { + if (!fh->fcntl_locks) + fh->fcntl_locks = new ceph_lock_state_t(cct); + + ceph_filelock filelock; + _convert_flock(fl, owner, &filelock); + + if (filelock.type == CEPH_LOCK_UNLOCK) { + list activated_locks; + fh->fcntl_locks->remove_lock(filelock, activated_locks); + } else { + bool r = fh->fcntl_locks->add_lock(filelock, false, false); + assert(r); + } + } + ldout(cct, 10) << "_setlk " << fh << " ino " << in->ino << " result=" << ret << dendl; + return ret; +} + +int Client::_flock(Fh *fh, int cmd, uint64_t owner) +{ + Inode *in = fh->inode; + ldout(cct, 10) << "_flock " << fh << " ino " << in->ino << dendl; + + int sleep = !(cmd & LOCK_NB); + cmd &= ~LOCK_NB; + + int type; + switch (cmd) { + case LOCK_SH: + type = F_RDLCK; + break; + case LOCK_EX: + type = F_WRLCK; + break; + case LOCK_UN: + type = F_UNLCK; + break; + default: + return -EINVAL; + } + + struct flock fl; + memset(&fl, 0, sizeof(fl)); + fl.l_type = type; + fl.l_whence = SEEK_SET; + + int ret = _do_filelock(in, CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, sleep, &fl, owner); + if (ret == 0) { + if (!fh->flock_locks) + fh->flock_locks = new ceph_lock_state_t(cct); + + ceph_filelock filelock; + _convert_flock(&fl, owner, &filelock); + + if (filelock.type == CEPH_LOCK_UNLOCK) { + list activated_locks; + fh->flock_locks->remove_lock(filelock, activated_locks); + } else { + bool r = fh->flock_locks->add_lock(filelock, false, false); + assert(r); + } + } + ldout(cct, 10) << "_flock " << fh << " ino " << in->ino << " result=" << ret << dendl; + return ret; +} + int Client::ll_statfs(Inode *in, struct statvfs *stbuf) { /* Since the only thing this does is wrap a call to statfs, and @@ -9333,7 +9602,35 @@ int Client::ll_release(Fh *fh) return 0; } +int Client::ll_getlk(Fh *fh, struct flock *fl, uint64_t owner) +{ + Mutex::Locker lock(client_lock); + + ldout(cct, 3) << "ll_getlk (fh)" << fh << " " << fh->inode->ino << dendl; + tout(cct) << "ll_getk (fh)" << (unsigned long)fh << std::endl; + + return _getlk(fh, fl, owner); +} + +int Client::ll_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep) +{ + Mutex::Locker lock(client_lock); + + ldout(cct, 3) << "ll_setlk (fh) " << fh << " " << fh->inode->ino << dendl; + tout(cct) << "ll_setk (fh)" << (unsigned long)fh << std::endl; + return _setlk(fh, fl, owner, sleep); +} + +int Client::ll_flock(Fh *fh, int cmd, uint64_t owner) +{ + Mutex::Locker lock(client_lock); + + ldout(cct, 3) << "ll_flock (fh) " << fh << " " << fh->inode->ino << dendl; + tout(cct) << "ll_flock (fh)" << (unsigned long)fh << std::endl; + + return _flock(fh, cmd, owner); +} // ========================================= // layout diff --git a/src/client/Client.h b/src/client/Client.h index eddf4c00a20f8..eda0fef7a7fb1 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -129,6 +129,7 @@ struct CapSnap; struct MetaSession; struct MetaRequest; +class ceph_lock_state_t; typedef void (*client_ino_callback_t)(void *handle, vinodeno_t ino, int64_t off, int64_t len); @@ -634,6 +635,9 @@ private: int _fsync(Fh *fh, bool syncdataonly); int _sync_fs(); int _fallocate(Fh *fh, int mode, int64_t offset, int64_t length); + int _getlk(Fh *fh, struct flock *fl, uint64_t owner); + int _setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep); + int _flock(Fh *fh, int cmd, uint64_t owner); int get_or_create(Inode *dir, const char* name, Dentry **pdn, bool expect_null=false); @@ -686,6 +690,11 @@ private: return 0; } + int _do_filelock(Inode *in, int lock_type, int op, int sleep, + struct flock *fl, uint64_t owner); + void _encode_filelocks(Inode *in, bufferlist& bl); + void _release_filelocks(Fh *fh); + void _convert_flock(struct flock *fl, uint64_t owner, ceph_filelock *filelock); public: int mount(const std::string &mount_root); void unmount(); @@ -897,6 +906,9 @@ public: int ll_fsync(Fh *fh, bool syncdataonly); int ll_fallocate(Fh *fh, int mode, loff_t offset, loff_t length); int ll_release(Fh *fh); + int ll_getlk(Fh *fh, struct flock *fl, uint64_t owner); + int ll_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep); + int ll_flock(Fh *fh, int cmd, uint64_t owner); int ll_get_stripe_osd(struct Inode *in, uint64_t blockno, ceph_file_layout* layout); uint64_t ll_get_internal_offset(struct Inode *in, uint64_t blockno); diff --git a/src/client/Fh.h b/src/client/Fh.h index 235b244b4010c..61bf2584de8b1 100644 --- a/src/client/Fh.h +++ b/src/client/Fh.h @@ -5,6 +5,7 @@ struct Inode; class Cond; +class ceph_lock_state_t; // file handle for any open file state @@ -23,8 +24,13 @@ struct Fh { loff_t consec_read_bytes; int nr_consec_read; + // file lock + ceph_lock_state_t *fcntl_locks; + ceph_lock_state_t *flock_locks; + Fh() : inode(0), pos(0), mds(0), mode(0), flags(0), pos_locked(false), - last_pos(0), consec_read_bytes(0), nr_consec_read(0) {} + last_pos(0), consec_read_bytes(0), nr_consec_read(0), + fcntl_locks(NULL), flock_locks(NULL) {} }; diff --git a/src/client/Inode.h b/src/client/Inode.h index 2edeba8c4e6ac..daddfedd31450 100644 --- a/src/client/Inode.h +++ b/src/client/Inode.h @@ -18,6 +18,7 @@ class Dentry; class Dir; struct SnapRealm; struct Inode; +class ceph_lock_state_t; struct Cap { MetaSession *session; @@ -210,6 +211,10 @@ struct Inode { ll_ref -= n; } + // file locks + ceph_lock_state_t *fcntl_locks; + ceph_lock_state_t *flock_locks; + Inode(CephContext *cct_, vinodeno_t vino, ceph_file_layout *newlayout) : cct(cct_), ino(vino.ino), snapid(vino.snapid), rdev(0), mode(0), uid(0), gid(0), nlink(0), @@ -224,8 +229,8 @@ struct Inode { snaprealm(0), snaprealm_item(this), snapdir_parent(0), oset((void *)this, newlayout->fl_pg_pool, ino), reported_size(0), wanted_max_size(0), requested_max_size(0), - _ref(0), ll_ref(0), - dir(0), dn_set() + _ref(0), ll_ref(0), dir(0), dn_set(), + fcntl_locks(NULL), flock_locks(NULL) { memset(&dir_layout, 0, sizeof(dir_layout)); memset(&layout, 0, sizeof(layout)); diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc index c97c6df5d4fb8..48dcb6302ddfc 100644 --- a/src/client/fuse_ll.cc +++ b/src/client/fuse_ll.cc @@ -639,6 +639,55 @@ static void fuse_ll_statfs(fuse_req_t req, fuse_ino_t ino) cfuse->iput(in); // iput required } +static void fuse_ll_getlk(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, struct flock *lock) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req); + Fh *fh = (Fh*)fi->fh; + + int r = cfuse->client->ll_getlk(fh, lock, fi->lock_owner); + if (r == 0) + fuse_reply_lock(req, lock); + else + fuse_reply_err(req, -r); +} + +static void fuse_ll_setlk(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, struct flock *lock, int sleep) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req); + Fh *fh = (Fh*)fi->fh; + + // must use multithread if operation may block + if (!cfuse->client->cct->_conf->fuse_multithreaded && + sleep && lock->l_type != F_UNLCK) { + fuse_reply_err(req, EDEADLK); + return; + } + + int r = cfuse->client->ll_setlk(fh, lock, fi->lock_owner, sleep); + fuse_reply_err(req, -r); +} + +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9) +static void fuse_ll_flock(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, int cmd) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req); + Fh *fh = (Fh*)fi->fh; + + // must use multithread if operation may block + if (!cfuse->client->cct->_conf->fuse_multithreaded && + !(cmd & (LOCK_NB | LOCK_UN))) { + fuse_reply_err(req, EDEADLK); + return; + } + + int r = cfuse->client->ll_flock(fh, cmd, fi->lock_owner); + fuse_reply_err(req, -r); +} +#endif + #if 0 static int getgroups_cb(void *handle, uid_t uid, gid_t **sgids) { @@ -745,8 +794,8 @@ const static struct fuse_lowlevel_ops fuse_ll_oper = { removexattr: fuse_ll_removexattr, access: fuse_ll_access, create: fuse_ll_create, - getlk: 0, - setlk: 0, + getlk: fuse_ll_getlk, + setlk: fuse_ll_setlk, bmap: 0, #if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) #ifdef FUSE_IOCTL_COMPAT @@ -755,13 +804,15 @@ const static struct fuse_lowlevel_ops fuse_ll_oper = { ioctl: 0, #endif poll: 0, -#if FUSE_VERSION > FUSE_MAKE_VERSION(2, 9) +#endif +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9) write_buf: 0, retrieve_reply: 0, forget_multi: 0, - flock: 0, - fallocate: fuse_ll_fallocate + flock: fuse_ll_flock, #endif +#if FUSE_VERSION > FUSE_MAKE_VERSION(2, 9) + fallocate: fuse_ll_fallocate #endif }; diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 56ce23399e55f..59fe09136e5d0 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -705,7 +705,8 @@ struct cap_reconnect_t { cap_reconnect_t() { memset(&capinfo, 0, sizeof(capinfo)); } - cap_reconnect_t(uint64_t cap_id, inodeno_t pino, const string& p, int w, int i, inodeno_t sr) : + cap_reconnect_t(uint64_t cap_id, inodeno_t pino, const string& p, int w, int i, + inodeno_t sr, bufferlist& lb) : path(p) { capinfo.cap_id = cap_id; capinfo.wanted = w; @@ -713,6 +714,7 @@ struct cap_reconnect_t { capinfo.snaprealm = sr; capinfo.pathbase = pino; capinfo.flock_len = 0; + flockbl.claim(lb); } void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bl); diff --git a/src/messages/MClientReconnect.h b/src/messages/MClientReconnect.h index 4e2839cab695a..1b072a35d5533 100644 --- a/src/messages/MClientReconnect.h +++ b/src/messages/MClientReconnect.h @@ -40,9 +40,9 @@ public: } void add_cap(inodeno_t ino, uint64_t cap_id, inodeno_t pathbase, const string& path, - int wanted, int issued, - inodeno_t sr) { - caps[ino] = cap_reconnect_t(cap_id, pathbase, path, wanted, issued, sr); + int wanted, int issued, inodeno_t sr, bufferlist& lb) + { + caps[ino] = cap_reconnect_t(cap_id, pathbase, path, wanted, issued, sr, lb); } void add_snaprealm(inodeno_t ino, snapid_t seq, inodeno_t parent) { ceph_mds_snaprealm_reconnect r; -- 2.39.5