From c6d0c035b2be29b5f80765038604628eb81b55db Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 27 Mar 2018 21:09:43 +0800 Subject: [PATCH] client: LAZY_IO support 'client_force_lazyio' option enables LAZY_IO gloablly for libcephfs and ceph-fuse mount. 'ceph_lazyio(...)' and 'ceph_ll_lazyio(...)' enable LAZY_IO for open file in libcephfs. 'ioctl(fd, CEPH_IOC_LAZYIO, 1UL)' enables LAZY_IO for open fils in ceph-fuse mount. Fixes: http://tracker.ceph.com/issues/20598 Signed-off-by: "Yan, Zheng" --- src/client/Client.cc | 133 +++++++++++++++++++++++++++------ src/client/Client.h | 3 + src/client/Inode.cc | 4 +- src/common/ceph_fs.cc | 5 +- src/common/options.cc | 4 + src/include/ceph_fs.h | 1 + src/include/cephfs/libcephfs.h | 12 +++ src/libcephfs.cc | 12 +++ src/mds/locks.c | 8 +- 9 files changed, 152 insertions(+), 30 deletions(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index e585ed90164e..cd694b361c51 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -3360,6 +3360,35 @@ static bool is_max_size_approaching(Inode *in) return false; } +static int adjust_caps_used_for_lazyio(int used, int issued, int implemented) +{ + if (!(used & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_BUFFER))) + return used; + if (!(implemented & CEPH_CAP_FILE_LAZYIO)) + return used; + + if (issued & CEPH_CAP_FILE_LAZYIO) { + if (!(issued & CEPH_CAP_FILE_CACHE)) { + used &= ~CEPH_CAP_FILE_CACHE; + used |= CEPH_CAP_FILE_LAZYIO; + } + if (!(issued & CEPH_CAP_FILE_BUFFER)) { + used &= ~CEPH_CAP_FILE_BUFFER; + used |= CEPH_CAP_FILE_LAZYIO; + } + } else { + if (!(implemented & CEPH_CAP_FILE_CACHE)) { + used &= ~CEPH_CAP_FILE_CACHE; + used |= CEPH_CAP_FILE_LAZYIO; + } + if (!(implemented & CEPH_CAP_FILE_BUFFER)) { + used &= ~CEPH_CAP_FILE_BUFFER; + used |= CEPH_CAP_FILE_LAZYIO; + } + } + return used; +} + /** * check_caps * @@ -3386,6 +3415,9 @@ void Client::check_caps(Inode *in, unsigned flags) int issued = in->caps_issued(&implemented); int revoking = implemented & ~issued; + int orig_used = used; + used = adjust_caps_used_for_lazyio(used, issued, implemented); + int retain = wanted | used | CEPH_CAP_PIN; if (!unmounting) { if (wanted) @@ -3408,10 +3440,10 @@ void Client::check_caps(Inode *in, unsigned flags) if (in->caps.empty()) return; // guard if at end of func - if ((revoking & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) && - (used & CEPH_CAP_FILE_CACHE) && !(used & CEPH_CAP_FILE_BUFFER)) { + if (!(orig_used & CEPH_CAP_FILE_BUFFER) && + (revoking & used & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO))) { if (_release(in)) - used &= ~CEPH_CAP_FILE_CACHE; + used &= ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO); } if (!in->cap_snaps.empty()) @@ -5135,10 +5167,11 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient else if (revoked & ceph_deleg_caps_for_type(CEPH_DELEGATION_WR)) in->recall_deleg(true); - if ((used & revoked & CEPH_CAP_FILE_BUFFER) && + used = adjust_caps_used_for_lazyio(used, cap->issued, cap->implemented); + if ((used & revoked & (CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO)) && !_flush(in, new C_Client_FlushComplete(this, in))) { // waitin' for flush - } else if (revoked & CEPH_CAP_FILE_CACHE) { + } else if (used & revoked & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) { if (_release(in)) check = true; } else { @@ -8596,9 +8629,11 @@ int Client::_open(Inode *in, int flags, mode_t mode, Fh **fhp, } // use normalized flags to generate cmode - int cmode = ceph_flags_to_mode(ceph_flags_sys2wire(flags)); - if (cmode < 0) - return -EINVAL; + int cflags = ceph_flags_sys2wire(flags); + if (cct->_conf.get_val("client_force_lazyio")) + cflags |= CEPH_O_LAZY; + + int cmode = ceph_flags_to_mode(cflags); int want = ceph_caps_for_mode(cmode); int result = 0; @@ -8613,7 +8648,7 @@ int Client::_open(Inode *in, int flags, mode_t mode, Fh **fhp, filepath path; in->make_nosnap_relative_path(path); req->set_filepath(path); - req->head.args.open.flags = ceph_flags_sys2wire(flags & ~O_CREAT); + req->head.args.open.flags = cflags & ~CEPH_O_CREAT; req->head.args.open.mode = mode; req->head.args.open.pool = -1; if (cct->_conf->client_debug_getattr_caps) @@ -8898,7 +8933,7 @@ int Client::preadv(int fd, const struct iovec *iov, int iovcnt, loff_t offset) int64_t Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl) { - int have = 0; + int want, have = 0; bool movepos = false; std::unique_ptr onuninline; int64_t r = 0; @@ -8925,13 +8960,16 @@ int64_t Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl) } retry: - have = 0; - r = get_caps(in, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_CACHE, &have, -1); + if (f->mode & CEPH_FILE_MODE_LAZY) + want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; + else + want = CEPH_CAP_FILE_CACHE; + r = get_caps(in, CEPH_CAP_FILE_RD, want, &have, -1); if (r < 0) { goto done; } if (f->flags & O_DIRECT) - have &= ~CEPH_CAP_FILE_CACHE; + have &= ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO); if (in->inline_version < CEPH_INLINE_NONE) { if (!(have & CEPH_CAP_FILE_CACHE)) { @@ -8962,7 +9000,8 @@ retry: } if (!conf->client_debug_force_sync_read && - (conf->client_oc && (have & CEPH_CAP_FILE_CACHE))) { + conf->client_oc && + (have & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO))) { if (f->flags & O_RSYNC) { _flush_range(in, offset, size); @@ -9350,9 +9389,12 @@ int64_t Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf, utime_t lat; uint64_t totalwritten; - int have; - int r = get_caps(in, CEPH_CAP_FILE_WR|CEPH_CAP_AUTH_SHARED, - CEPH_CAP_FILE_BUFFER, &have, endoff); + int want, have; + if (f->mode & CEPH_FILE_MODE_LAZY) + want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; + else + want = CEPH_CAP_FILE_BUFFER; + int r = get_caps(in, CEPH_CAP_FILE_WR|CEPH_CAP_AUTH_SHARED, want, &have, endoff); if (r < 0) return r; @@ -9369,7 +9411,7 @@ int64_t Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf, } if (f->flags & O_DIRECT) - have &= ~CEPH_CAP_FILE_BUFFER; + have &= ~(CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO); ldout(cct, 10) << " snaprealm " << *in->snaprealm << dendl; @@ -9403,7 +9445,8 @@ int64_t Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf, } } - if (cct->_conf->client_oc && (have & CEPH_CAP_FILE_BUFFER)) { + if (cct->_conf->client_oc && + (have & (CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO))) { // do buffered write if (!in->oset.dirty_or_tx) get_cap_ref(in, CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_BUFFER); @@ -10277,6 +10320,48 @@ int64_t Client::drop_caches() return objectcacher->release_all(); } +int Client::_lazyio(Fh *fh, int enable) +{ + Inode *in = fh->inode.get(); + ldout(cct, 20) << __func__ << " " << *in << " " << !!enable << dendl; + + if (!!(fh->mode & CEPH_FILE_MODE_LAZY) == !!enable) + return 0; + + int orig_mode = fh->mode; + if (enable) { + fh->mode |= CEPH_FILE_MODE_LAZY; + in->get_open_ref(fh->mode); + in->put_open_ref(orig_mode); + check_caps(in, CHECK_CAPS_NODELAY); + } else { + fh->mode &= ~CEPH_FILE_MODE_LAZY; + in->get_open_ref(fh->mode); + in->put_open_ref(orig_mode); + check_caps(in, 0); + } + + return 0; +} + +int Client::lazyio(int fd, int enable) +{ + Mutex::Locker l(client_lock); + Fh *f = get_filehandle(fd); + if (!f) + return -EBADF; + + return _lazyio(f, enable); +} + +int Client::ll_lazyio(Fh *fh, int enable) +{ + Mutex::Locker lock(client_lock); + ldout(cct, 3) << __func__ << " " << fh << " " << fh->inode->ino << " " << !!enable << dendl; + tout(cct) << __func__ << std::endl; + + return _lazyio(fh, enable); +} int Client::lazyio_propogate(int fd, loff_t offset, size_t count) { @@ -11882,9 +11967,11 @@ int Client::_create(Inode *dir, const char *name, int flags, mode_t mode, } // use normalized flags to generate cmode - int cmode = ceph_flags_to_mode(ceph_flags_sys2wire(flags)); - if (cmode < 0) - return -EINVAL; + int cflags = ceph_flags_sys2wire(flags); + if (cct->_conf.get_val("client_force_lazyio")) + cflags |= CEPH_O_LAZY; + + int cmode = ceph_flags_to_mode(cflags); int64_t pool_id = -1; if (data_pool && *data_pool) { @@ -11903,7 +11990,7 @@ int Client::_create(Inode *dir, const char *name, int flags, mode_t mode, path.push_dentry(name); req->set_filepath(path); req->set_inode(dir); - req->head.args.open.flags = ceph_flags_sys2wire(flags | O_CREAT); + req->head.args.open.flags = cflags | CEPH_O_CREAT; req->head.args.open.stripe_unit = stripe_unit; req->head.args.open.stripe_count = stripe_count; diff --git a/src/client/Client.h b/src/client/Client.h index 5e005082dca5..208ff92e5d94 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -848,6 +848,7 @@ private: int _getlk(Fh *fh, struct flock *fl, uint64_t owner); int _setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep); int _flock(Fh *fh, int cmd, uint64_t owner); + int _lazyio(Fh *fh, int enable); int get_or_create(Inode *dir, const char* name, Dentry **pdn, bool expect_null=false); @@ -1108,6 +1109,7 @@ public: int64_t drop_caches(); // hpc lazyio + int lazyio(int fd, int enable); int lazyio_propogate(int fd, loff_t offset, size_t count); int lazyio_synchronize(int fd, loff_t offset, size_t count); @@ -1239,6 +1241,7 @@ public: int ll_getlk(Fh *fh, struct flock *fl, uint64_t owner); int ll_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep); int ll_flock(Fh *fh, int cmd, uint64_t owner); + int ll_lazyio(Fh *fh, int enable); int ll_file_layout(Fh *fh, file_layout_t *layout); void ll_interrupt(void *d); bool ll_handle_umask() { diff --git a/src/client/Inode.cc b/src/client/Inode.cc index a4b09538790c..6e76fcfddaab 100644 --- a/src/client/Inode.cc +++ b/src/client/Inode.cc @@ -134,7 +134,9 @@ void Inode::get_open_ref(int mode) bool Inode::put_open_ref(int mode) { //cout << "open_by_mode[" << mode << "] " << open_by_mode[mode] << " -> " << (open_by_mode[mode]-1) << std::endl; - if (--open_by_mode[mode] == 0) + auto& ref = open_by_mode.at(mode); + assert(ref > 0); + if (--ref == 0) return true; return false; } diff --git a/src/common/ceph_fs.cc b/src/common/ceph_fs.cc index ce9873121c2c..7a4b59f872a9 100644 --- a/src/common/ceph_fs.cc +++ b/src/common/ceph_fs.cc @@ -14,10 +14,8 @@ int ceph_flags_to_mode(int flags) /* because CEPH_FILE_MODE_PIN is zero, so mode = -1 is error */ int mode = -1; -#ifdef O_DIRECTORY /* fixme */ if ((flags & CEPH_O_DIRECTORY) == CEPH_O_DIRECTORY) return CEPH_FILE_MODE_PIN; -#endif switch (flags & O_ACCMODE) { case CEPH_O_WRONLY: @@ -32,6 +30,9 @@ int ceph_flags_to_mode(int flags) break; } + if (flags & CEPH_O_LAZY) + mode |= CEPH_FILE_MODE_LAZY; + return mode; } diff --git a/src/common/options.cc b/src/common/options.cc index 01aab9e47fa7..475ed855c0e4 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -7297,6 +7297,10 @@ std::vector