]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
client: posix file lock support
authorYan, Zheng <zyan@redhat.com>
Thu, 2 Oct 2014 11:07:41 +0000 (19:07 +0800)
committerGreg Farnum <gfarnum@redhat.com>
Fri, 13 Feb 2015 22:41:09 +0000 (14:41 -0800)
Signed-off-by: Yan, Zheng <zyan@redhat.com>
(cherry picked from commit a1b2c8ff955b30807ac53ce6bdc97cf61a7262ca)

Conflicts:
src/client/Client.cc
src/client/Client.h
src/client/Inode.h

Signed-off-by: Greg Farnum <gfarnum@redhat.com>
src/client/Client.cc
src/client/Client.h
src/client/Fh.h
src/client/Inode.h
src/client/fuse_ll.cc
src/mds/mdstypes.h
src/messages/MClientReconnect.h

index 6b19a7a35584fccce8f12a6dafd343b2f5a0f94d..7770791bd3493098ee10841c2cd9fc1867286e6f 100644 (file)
@@ -53,7 +53,7 @@ using namespace std;
 
 #include "mon/MonClient.h"
 
-#include "mds/MDSMap.h"
+#include "mds/flock.h"
 #include "osd/OSDMap.h"
 #include "mon/MonMap.h"
 
@@ -2034,6 +2034,9 @@ void Client::send_reconnect(MetaSession *session)
       in->make_long_path(path);
       ldout(cct, 10) << "    path " << path << dendl;
 
+      bufferlist flockbl;
+      _encode_filelocks(in, flockbl);
+
       in->caps[mds]->seq = 0;  // reset seq.
       in->caps[mds]->issue_seq = 0;  // reset seq.
       in->caps[mds]->mseq = 0;  // reset seq.
@@ -2042,7 +2045,8 @@ void Client::send_reconnect(MetaSession *session)
                 path.get_ino(), path.get_path(),   // ino
                 in->caps_wanted(), // wanted
                 in->caps[mds]->issued,     // issued
-                in->snaprealm->ino);
+                in->snaprealm->ino,
+                flockbl);
 
       if (did_snaprealm.count(in->snaprealm->ino) == 0) {
        ldout(cct, 10) << " snaprealm " << *in->snaprealm << dendl;
@@ -2182,6 +2186,8 @@ void Client::put_inode(Inode *in, int n)
     in->snaprealm_item.remove_myself();
     if (in == root)
       root = 0;
+    delete in->fcntl_locks;
+    delete in->flock_locks;
     delete in;
   }
 }
@@ -5870,6 +5876,8 @@ int Client::_release_fh(Fh *f)
     in->snap_cap_refs--;
   }
 
+  _release_filelocks(f);
+
   put_inode(in);
   delete f;
 
@@ -6858,6 +6866,266 @@ int Client::statfs(const char *path, struct statvfs *stbuf)
   return rval;
 }
 
+int Client::_do_filelock(Inode *in, int lock_type, int op, int sleep,
+                        struct flock *fl, uint64_t owner)
+{
+  ldout(cct, 10) << "_do_filelock ino " << in->ino
+                << (lock_type == CEPH_LOCK_FCNTL ? " fcntl" : " flock")
+                << " type " << fl->l_type << " owner " << owner
+                << " " << fl->l_start << "~" << fl->l_len << dendl;
+
+  int lock_cmd;
+  if (F_RDLCK == fl->l_type)
+    lock_cmd = CEPH_LOCK_SHARED;
+  else if (F_WRLCK == fl->l_type)
+    lock_cmd = CEPH_LOCK_EXCL;
+  else if (F_UNLCK == fl->l_type)
+    lock_cmd = CEPH_LOCK_UNLOCK;
+  else
+    return -EIO;
+
+  /*
+   * Set the most significant bit, so that MDS knows the 'owner'
+   * is sufficient to identify the owner of lock. (old code uses
+   * both 'owner' and 'pid')
+   */
+  owner |= (1ULL << 63);
+
+  MetaRequest *req = new MetaRequest(op);
+  filepath path;
+  in->make_nosnap_relative_path(path);
+  req->set_filepath(path);
+  req->set_inode(in);
+
+  req->head.args.filelock_change.rule = lock_type;
+  req->head.args.filelock_change.type = lock_cmd;
+  req->head.args.filelock_change.owner = owner;
+  req->head.args.filelock_change.pid = fl->l_pid;
+  req->head.args.filelock_change.start = fl->l_start;
+  req->head.args.filelock_change.length = fl->l_len;
+  req->head.args.filelock_change.wait = sleep;
+
+  bufferlist bl;
+  int ret = make_request(req, -1, -1, NULL, NULL, -1, &bl);
+
+  if (ret == 0) {
+    if (op == CEPH_MDS_OP_GETFILELOCK) {
+      ceph_filelock filelock;
+      bufferlist::iterator p = bl.begin();
+      ::decode(filelock, p);
+
+      if (CEPH_LOCK_SHARED == filelock.type)
+       fl->l_type = F_RDLCK;
+      else if (CEPH_LOCK_EXCL == filelock.type)
+       fl->l_type = F_WRLCK;
+      else
+       fl->l_type = F_UNLCK;
+
+      fl->l_whence = SEEK_SET;
+      fl->l_start = filelock.start;
+      fl->l_len = filelock.length;
+      fl->l_pid = filelock.pid;
+    } else if (op == CEPH_MDS_OP_SETFILELOCK) {
+      ceph_lock_state_t *lock_state;
+      if (lock_type == CEPH_LOCK_FCNTL) {
+       if (!in->fcntl_locks)
+         in->fcntl_locks = new ceph_lock_state_t(cct);
+       lock_state = in->fcntl_locks;
+      } else if (lock_type == CEPH_LOCK_FLOCK) {
+       if (!in->flock_locks)
+         in->flock_locks = new ceph_lock_state_t(cct);
+       lock_state = in->flock_locks;
+      } else
+       assert(0);
+
+      ceph_filelock filelock;
+      _convert_flock(fl, owner, &filelock);
+      if (filelock.type == CEPH_LOCK_UNLOCK) {
+       list<ceph_filelock> activated_locks;
+       lock_state->remove_lock(filelock, activated_locks);
+      } else {
+       bool r = lock_state->add_lock(filelock, false, false);
+       assert(r);
+      }
+    } else
+      assert(0);
+  }
+  return ret;
+}
+
+void Client::_encode_filelocks(Inode *in, bufferlist& bl)
+{
+  if (!in->fcntl_locks && !in->flock_locks)
+    return;
+
+  unsigned nr_fcntl_locks = in->fcntl_locks ? in->fcntl_locks->held_locks.size() : 0;
+  ::encode(nr_fcntl_locks, bl);
+  if (nr_fcntl_locks) {
+    ceph_lock_state_t* lock_state = in->fcntl_locks;
+    for(multimap<uint64_t, ceph_filelock>::iterator p = lock_state->held_locks.begin();
+       p != lock_state->held_locks.end();
+       ++p)
+      ::encode(p->second, bl);
+  }
+
+  unsigned nr_flock_locks = in->flock_locks ? in->flock_locks->held_locks.size() : 0;
+  ::encode(nr_flock_locks, bl);
+  if (nr_flock_locks) {
+    ceph_lock_state_t* lock_state = in->flock_locks;
+    for(multimap<uint64_t, ceph_filelock>::iterator p = lock_state->held_locks.begin();
+       p != lock_state->held_locks.end();
+       ++p)
+      ::encode(p->second, bl);
+  }
+
+  ldout(cct, 10) << "_encode_filelocks ino " << in->ino << ", " << nr_fcntl_locks
+                << " fcntl locks, " << nr_flock_locks << " flock locks" <<  dendl;
+}
+
+void Client::_release_filelocks(Fh *fh)
+{
+  if (!fh->fcntl_locks && !fh->flock_locks)
+    return;
+
+  Inode *in = fh->inode;
+  ldout(cct, 10) << "_release_filelocks " << fh << " ino " << in->ino << dendl;
+
+  list<pair<int, ceph_filelock> > to_release;
+
+  if (fh->fcntl_locks) {
+    ceph_lock_state_t* lock_state = fh->fcntl_locks;
+    for(multimap<uint64_t, ceph_filelock>::iterator p = lock_state->held_locks.begin();
+       p != lock_state->held_locks.end();
+       ++p)
+      to_release.push_back(pair<int, ceph_filelock>(CEPH_LOCK_FCNTL, p->second));
+    delete fh->fcntl_locks;
+  }
+  if (fh->flock_locks) {
+    ceph_lock_state_t* lock_state = fh->flock_locks;
+    for(multimap<uint64_t, ceph_filelock>::iterator p = lock_state->held_locks.begin();
+       p != lock_state->held_locks.end();
+       ++p)
+      to_release.push_back(pair<int, ceph_filelock>(CEPH_LOCK_FLOCK, p->second));
+    delete fh->flock_locks;
+  }
+
+  if (to_release.empty())
+    return;
+
+  struct flock fl;
+  memset(&fl, 0, sizeof(fl));
+  fl.l_whence = SEEK_SET;
+  fl.l_type = F_UNLCK;
+
+  for (list<pair<int, ceph_filelock> >::iterator p = to_release.begin();
+       p != to_release.end();
+       ++p) {
+    fl.l_start = p->second.start;
+    fl.l_len = p->second.length;
+    fl.l_pid = p->second.pid;
+    _do_filelock(in, p->first, CEPH_MDS_OP_SETFILELOCK, 0, &fl, p->second.owner);
+  }
+}
+
+void Client::_convert_flock(struct flock *fl, uint64_t owner, struct ceph_filelock *filelock)
+{
+  int lock_cmd;
+  if (F_RDLCK == fl->l_type)
+    lock_cmd = CEPH_LOCK_SHARED;
+  else if (F_WRLCK == fl->l_type)
+    lock_cmd = CEPH_LOCK_EXCL;
+  else
+    lock_cmd = CEPH_LOCK_UNLOCK;;
+
+  filelock->start = fl->l_start;
+  filelock->length = fl->l_len;
+  filelock->client = 0;
+  // see comment in _do_filelock()
+  filelock->owner = owner | (1ULL << 63);
+  filelock->pid = fl->l_pid;
+  filelock->type = lock_cmd;
+}
+
+int Client::_getlk(Fh *fh, struct flock *fl, uint64_t owner)
+{
+  Inode *in = fh->inode;
+  ldout(cct, 10) << "_getlk " << fh << " ino " << in->ino << dendl;
+  int ret = _do_filelock(in, CEPH_LOCK_FCNTL, CEPH_MDS_OP_GETFILELOCK, 0, fl, owner);
+  return ret;
+}
+
+int Client::_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep)
+{
+  Inode *in = fh->inode;
+  ldout(cct, 10) << "_setlk " << fh << " ino " << in->ino << dendl;
+  int ret =  _do_filelock(in, CEPH_LOCK_FCNTL, CEPH_MDS_OP_SETFILELOCK, sleep, fl, owner);
+  if (ret == 0) {
+    if (!fh->fcntl_locks)
+      fh->fcntl_locks = new ceph_lock_state_t(cct);
+
+    ceph_filelock filelock;
+    _convert_flock(fl, owner, &filelock);
+
+    if (filelock.type == CEPH_LOCK_UNLOCK) {
+      list<ceph_filelock> activated_locks;
+      fh->fcntl_locks->remove_lock(filelock, activated_locks);
+    } else {
+      bool r = fh->fcntl_locks->add_lock(filelock, false, false);
+      assert(r);
+    }
+  }
+  ldout(cct, 10) << "_setlk " << fh << " ino " << in->ino << " result=" << ret << dendl;
+  return ret;
+}
+
+int Client::_flock(Fh *fh, int cmd, uint64_t owner)
+{
+  Inode *in = fh->inode;
+  ldout(cct, 10) << "_flock " << fh << " ino " << in->ino << dendl;
+
+  int sleep = !(cmd & LOCK_NB);
+  cmd &= ~LOCK_NB;
+
+  int type;
+  switch (cmd) {
+    case LOCK_SH:
+      type = F_RDLCK;
+      break;
+    case LOCK_EX:
+      type = F_WRLCK;
+      break;
+    case LOCK_UN:
+      type = F_UNLCK;
+      break;
+    default:
+      return -EINVAL;
+  }
+
+  struct flock fl;
+  memset(&fl, 0, sizeof(fl));
+  fl.l_type = type;
+  fl.l_whence = SEEK_SET;
+
+  int ret =  _do_filelock(in, CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, sleep, &fl, owner);
+  if (ret == 0) {
+    if (!fh->flock_locks)
+      fh->flock_locks = new ceph_lock_state_t(cct);
+
+    ceph_filelock filelock;
+    _convert_flock(&fl, owner, &filelock);
+
+    if (filelock.type == CEPH_LOCK_UNLOCK) {
+      list<ceph_filelock> activated_locks;
+      fh->flock_locks->remove_lock(filelock, activated_locks);
+    } else {
+      bool r = fh->flock_locks->add_lock(filelock, false, false);
+      assert(r);
+    }
+  }
+  ldout(cct, 10) << "_flock " << fh << " ino " << in->ino << " result=" << ret << dendl;
+  return ret;
+}
+
 int Client::ll_statfs(Inode *in, struct statvfs *stbuf)
 {
   /* Since the only thing this does is wrap a call to statfs, and
@@ -8740,7 +9008,35 @@ int Client::ll_release(Fh *fh)
   return 0;
 }
 
+int Client::ll_getlk(Fh *fh, struct flock *fl, uint64_t owner)
+{
+  Mutex::Locker lock(client_lock);
+
+  ldout(cct, 3) << "ll_getlk (fh)" << fh << " " << fh->inode->ino << dendl;
+  tout(cct) << "ll_getk (fh)" << (unsigned long)fh << std::endl;
 
+  return _getlk(fh, fl, owner);
+}
+
+int Client::ll_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep)
+{
+  Mutex::Locker lock(client_lock);
+
+  ldout(cct, 3) << "ll_setlk  (fh) " << fh << " " << fh->inode->ino << dendl;
+  tout(cct) << "ll_setk (fh)" << (unsigned long)fh << std::endl;
+
+  return _setlk(fh, fl, owner, sleep);
+}
+
+int Client::ll_flock(Fh *fh, int cmd, uint64_t owner)
+{
+  Mutex::Locker lock(client_lock);
+
+  ldout(cct, 3) << "ll_flock  (fh) " << fh << " " << fh->inode->ino << dendl;
+  tout(cct) << "ll_flock (fh)" << (unsigned long)fh << std::endl;
+
+  return _flock(fh, cmd, owner);
+}
 // =========================================
 // layout
 
index e31e90af98ccf2db73b8961d587a9b5be60f696f..924e0cfe6730911897bba76d64b65b1248d50095 100644 (file)
@@ -119,6 +119,7 @@ struct CapSnap;
 
 struct MetaSession;
 struct MetaRequest;
+class ceph_lock_state_t;
 
 
 typedef void (*client_ino_callback_t)(void *handle, vinodeno_t ino, int64_t off, int64_t len);
@@ -604,6 +605,9 @@ private:
   int _fsync(Fh *fh, bool syncdataonly);
   int _sync_fs();
   int _fallocate(Fh *fh, int mode, int64_t offset, int64_t length);
+  int _getlk(Fh *fh, struct flock *fl, uint64_t owner);
+  int _setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep);
+  int _flock(Fh *fh, int cmd, uint64_t owner);
 
   int get_or_create(Inode *dir, const char* name,
                    Dentry **pdn, bool expect_null=false);
@@ -613,6 +617,12 @@ private:
   vinodeno_t _get_vino(Inode *in);
   inodeno_t _get_inodeno(Inode *in);
 
+  int _do_filelock(Inode *in, int lock_type, int op, int sleep,
+                  struct flock *fl, uint64_t owner);
+  void _encode_filelocks(Inode *in, bufferlist& bl);
+  void _release_filelocks(Fh *fh);
+  void _convert_flock(struct flock *fl, uint64_t owner, ceph_filelock *filelock);
+
 public:
   int mount(const std::string &mount_root);
   void unmount();
@@ -818,6 +828,9 @@ public:
   int ll_fsync(Fh *fh, bool syncdataonly);
   int ll_fallocate(Fh *fh, int mode, loff_t offset, loff_t length);
   int ll_release(Fh *fh);
+  int ll_getlk(Fh *fh, struct flock *fl, uint64_t owner);
+  int ll_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep);
+  int ll_flock(Fh *fh, int cmd, uint64_t owner);
   int ll_get_stripe_osd(struct Inode *in, uint64_t blockno,
                        ceph_file_layout* layout);
   uint64_t ll_get_internal_offset(struct Inode *in, uint64_t blockno);
index 083ccd1ad59beacef8c84529cd5adbc3eff22196..237a6d8a24e0473e75b8182bde7b3b18729d1446 100644 (file)
@@ -5,6 +5,7 @@
 
 class Inode;
 class Cond;
+class ceph_lock_state_t;
 
 // file handle for any open file state
 
@@ -23,8 +24,13 @@ struct Fh {
   loff_t consec_read_bytes;
   int nr_consec_read;
 
+  // file lock
+  ceph_lock_state_t *fcntl_locks;
+  ceph_lock_state_t *flock_locks;
+
   Fh() : inode(0), pos(0), mds(0), mode(0), flags(0), pos_locked(false),
-        last_pos(0), consec_read_bytes(0), nr_consec_read(0) {}
+        last_pos(0), consec_read_bytes(0), nr_consec_read(0),
+        fcntl_locks(NULL), flock_locks(NULL)  {}
 };
 
 
index 221a91a523dadb26ec3fe8bc578df4fda31490bb..91ba2fcc80b9d05fda6fb05d3bacf37030559526 100644 (file)
@@ -17,7 +17,8 @@ struct MetaSession;
 class Dentry;
 class Dir;
 struct SnapRealm;
-class Inode;
+struct Inode;
+class ceph_lock_state_t;
 
 struct Cap {
   MetaSession *session;
@@ -210,6 +211,10 @@ class Inode {
     ll_ref -= n;
   }
 
+  // file locks
+  ceph_lock_state_t *fcntl_locks;
+  ceph_lock_state_t *flock_locks;
+
   Inode(CephContext *cct_, vinodeno_t vino, ceph_file_layout *newlayout)
     : cct(cct_), ino(vino.ino), snapid(vino.snapid),
       rdev(0), mode(0), uid(0), gid(0), nlink(0),
@@ -224,8 +229,8 @@ class Inode {
       snaprealm(0), snaprealm_item(this), snapdir_parent(0),
       oset((void *)this, newlayout->fl_pg_pool, ino),
       reported_size(0), wanted_max_size(0), requested_max_size(0),
-      _ref(0), ll_ref(0), 
-      dir(0), dn_set()
+      _ref(0), ll_ref(0), dir(0), dn_set(),
+      fcntl_locks(NULL), flock_locks(NULL)
   {
     memset(&dir_layout, 0, sizeof(dir_layout));
     memset(&layout, 0, sizeof(layout));
index 7f419c3232c4a4ecb6fcb54128c03efdaa2e8da6..ef6629232f10bf8052e7d6d2deab6b8d37af300d 100644 (file)
@@ -636,6 +636,55 @@ static void fuse_ll_statfs(fuse_req_t req, fuse_ino_t ino)
   cfuse->iput(in); // iput required
 }
 
+static void fuse_ll_getlk(fuse_req_t req, fuse_ino_t ino,
+                         struct fuse_file_info *fi, struct flock *lock)
+{
+  CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req);
+  Fh *fh = (Fh*)fi->fh;
+
+  int r = cfuse->client->ll_getlk(fh, lock, fi->lock_owner);
+  if (r == 0)
+    fuse_reply_lock(req, lock);
+  else
+    fuse_reply_err(req, -r);
+}
+
+static void fuse_ll_setlk(fuse_req_t req, fuse_ino_t ino,
+                         struct fuse_file_info *fi, struct flock *lock, int sleep)
+{
+  CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req);
+  Fh *fh = (Fh*)fi->fh;
+
+  // must use multithread if operation may block
+  if (!cfuse->client->cct->_conf->fuse_multithreaded &&
+      sleep && lock->l_type != F_UNLCK) {
+    fuse_reply_err(req, EDEADLK);
+    return;
+  }
+
+  int r = cfuse->client->ll_setlk(fh, lock, fi->lock_owner, sleep);
+  fuse_reply_err(req, -r);
+}
+
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9)
+static void fuse_ll_flock(fuse_req_t req, fuse_ino_t ino,
+                         struct fuse_file_info *fi, int cmd)
+{
+  CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req);
+  Fh *fh = (Fh*)fi->fh;
+
+  // must use multithread if operation may block
+  if (!cfuse->client->cct->_conf->fuse_multithreaded &&
+      !(cmd & (LOCK_NB | LOCK_UN))) {
+    fuse_reply_err(req, EDEADLK);
+    return;
+  }
+
+  int r = cfuse->client->ll_flock(fh, cmd, fi->lock_owner);
+  fuse_reply_err(req, -r);
+}
+#endif
+
 #if 0
 static int getgroups_cb(void *handle, uid_t uid, gid_t **sgids)
 {
@@ -742,8 +791,8 @@ const static struct fuse_lowlevel_ops fuse_ll_oper = {
  removexattr: fuse_ll_removexattr,
  access: fuse_ll_access,
  create: fuse_ll_create,
- getlk: 0,
- setlk: 0,
+ getlk: fuse_ll_getlk,
+ setlk: fuse_ll_setlk,
  bmap: 0,
 #if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8)
 #ifdef FUSE_IOCTL_COMPAT
@@ -752,13 +801,15 @@ const static struct fuse_lowlevel_ops fuse_ll_oper = {
  ioctl: 0,
 #endif
  poll: 0,
-#if FUSE_VERSION > FUSE_MAKE_VERSION(2, 9)
+#endif
+#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9)
  write_buf: 0,
  retrieve_reply: 0,
  forget_multi: 0,
- flock: 0,
- fallocate: fuse_ll_fallocate
+ flock: fuse_ll_flock,
 #endif
+#if FUSE_VERSION > FUSE_MAKE_VERSION(2, 9)
+ fallocate: fuse_ll_fallocate
 #endif
 };
 
index 184cf7042877355205d6e5b2b974f6fd67fb612d..73f1ba208c01ce68da17014cb2e5be22d9e1adae 100644 (file)
@@ -703,7 +703,8 @@ struct cap_reconnect_t {
   cap_reconnect_t() {
     memset(&capinfo, 0, sizeof(capinfo));
   }
-  cap_reconnect_t(uint64_t cap_id, inodeno_t pino, const string& p, int w, int i, inodeno_t sr) : 
+  cap_reconnect_t(uint64_t cap_id, inodeno_t pino, const string& p, int w, int i,
+                 inodeno_t sr, bufferlist& lb) :
     path(p) {
     capinfo.cap_id = cap_id;
     capinfo.wanted = w;
@@ -711,6 +712,7 @@ struct cap_reconnect_t {
     capinfo.snaprealm = sr;
     capinfo.pathbase = pino;
     capinfo.flock_len = 0;
+    flockbl.claim(lb);
   }
   void encode(bufferlist& bl) const;
   void decode(bufferlist::iterator& bl);
index 4e2839cab695ab90b65b189f6fb4fa135ecc899b..1b072a35d5533aa75c2abb4c4e6fc4dee935e390 100644 (file)
@@ -40,9 +40,9 @@ public:
   }
 
   void add_cap(inodeno_t ino, uint64_t cap_id, inodeno_t pathbase, const string& path,
-              int wanted, int issued,
-              inodeno_t sr) {
-    caps[ino] = cap_reconnect_t(cap_id, pathbase, path, wanted, issued, sr);
+              int wanted, int issued, inodeno_t sr, bufferlist& lb)
+  {
+    caps[ino] = cap_reconnect_t(cap_id, pathbase, path, wanted, issued, sr, lb);
   }
   void add_snaprealm(inodeno_t ino, snapid_t seq, inodeno_t parent) {
     ceph_mds_snaprealm_reconnect r;