return false;
}
+static int adjust_caps_used_for_lazyio(int used, int issued, int implemented)
+{
+ if (!(used & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_BUFFER)))
+ return used;
+ if (!(implemented & CEPH_CAP_FILE_LAZYIO))
+ return used;
+
+ if (issued & CEPH_CAP_FILE_LAZYIO) {
+ if (!(issued & CEPH_CAP_FILE_CACHE)) {
+ used &= ~CEPH_CAP_FILE_CACHE;
+ used |= CEPH_CAP_FILE_LAZYIO;
+ }
+ if (!(issued & CEPH_CAP_FILE_BUFFER)) {
+ used &= ~CEPH_CAP_FILE_BUFFER;
+ used |= CEPH_CAP_FILE_LAZYIO;
+ }
+ } else {
+ if (!(implemented & CEPH_CAP_FILE_CACHE)) {
+ used &= ~CEPH_CAP_FILE_CACHE;
+ used |= CEPH_CAP_FILE_LAZYIO;
+ }
+ if (!(implemented & CEPH_CAP_FILE_BUFFER)) {
+ used &= ~CEPH_CAP_FILE_BUFFER;
+ used |= CEPH_CAP_FILE_LAZYIO;
+ }
+ }
+ return used;
+}
+
/**
* check_caps
*
int issued = in->caps_issued(&implemented);
int revoking = implemented & ~issued;
+ int orig_used = used;
+ used = adjust_caps_used_for_lazyio(used, issued, implemented);
+
int retain = wanted | used | CEPH_CAP_PIN;
if (!unmounting) {
if (wanted)
if (in->caps.empty())
return; // guard if at end of func
- if ((revoking & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) &&
- (used & CEPH_CAP_FILE_CACHE) && !(used & CEPH_CAP_FILE_BUFFER)) {
+ if (!(orig_used & CEPH_CAP_FILE_BUFFER) &&
+ (revoking & used & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO))) {
if (_release(in))
- used &= ~CEPH_CAP_FILE_CACHE;
+ used &= ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO);
}
if (!in->cap_snaps.empty())
else if (revoked & ceph_deleg_caps_for_type(CEPH_DELEGATION_WR))
in->recall_deleg(true);
- if ((used & revoked & CEPH_CAP_FILE_BUFFER) &&
+ used = adjust_caps_used_for_lazyio(used, cap->issued, cap->implemented);
+ if ((used & revoked & (CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO)) &&
!_flush(in, new C_Client_FlushComplete(this, in))) {
// waitin' for flush
- } else if (revoked & CEPH_CAP_FILE_CACHE) {
+ } else if (used & revoked & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) {
if (_release(in))
check = true;
} else {
}
// use normalized flags to generate cmode
- int cmode = ceph_flags_to_mode(ceph_flags_sys2wire(flags));
- if (cmode < 0)
- return -EINVAL;
+ int cflags = ceph_flags_sys2wire(flags);
+ if (cct->_conf.get_val<bool>("client_force_lazyio"))
+ cflags |= CEPH_O_LAZY;
+
+ int cmode = ceph_flags_to_mode(cflags);
int want = ceph_caps_for_mode(cmode);
int result = 0;
filepath path;
in->make_nosnap_relative_path(path);
req->set_filepath(path);
- req->head.args.open.flags = ceph_flags_sys2wire(flags & ~O_CREAT);
+ req->head.args.open.flags = cflags & ~CEPH_O_CREAT;
req->head.args.open.mode = mode;
req->head.args.open.pool = -1;
if (cct->_conf->client_debug_getattr_caps)
int64_t Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl)
{
- int have = 0;
+ int want, have = 0;
bool movepos = false;
std::unique_ptr<C_SaferCond> onuninline;
int64_t r = 0;
}
retry:
- have = 0;
- r = get_caps(in, CEPH_CAP_FILE_RD, CEPH_CAP_FILE_CACHE, &have, -1);
+ if (f->mode & CEPH_FILE_MODE_LAZY)
+ want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
+ else
+ want = CEPH_CAP_FILE_CACHE;
+ r = get_caps(in, CEPH_CAP_FILE_RD, want, &have, -1);
if (r < 0) {
goto done;
}
if (f->flags & O_DIRECT)
- have &= ~CEPH_CAP_FILE_CACHE;
+ have &= ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO);
if (in->inline_version < CEPH_INLINE_NONE) {
if (!(have & CEPH_CAP_FILE_CACHE)) {
}
if (!conf->client_debug_force_sync_read &&
- (conf->client_oc && (have & CEPH_CAP_FILE_CACHE))) {
+ conf->client_oc &&
+ (have & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO))) {
if (f->flags & O_RSYNC) {
_flush_range(in, offset, size);
utime_t lat;
uint64_t totalwritten;
- int have;
- int r = get_caps(in, CEPH_CAP_FILE_WR|CEPH_CAP_AUTH_SHARED,
- CEPH_CAP_FILE_BUFFER, &have, endoff);
+ int want, have;
+ if (f->mode & CEPH_FILE_MODE_LAZY)
+ want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
+ else
+ want = CEPH_CAP_FILE_BUFFER;
+ int r = get_caps(in, CEPH_CAP_FILE_WR|CEPH_CAP_AUTH_SHARED, want, &have, endoff);
if (r < 0)
return r;
}
if (f->flags & O_DIRECT)
- have &= ~CEPH_CAP_FILE_BUFFER;
+ have &= ~(CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO);
ldout(cct, 10) << " snaprealm " << *in->snaprealm << dendl;
}
}
- if (cct->_conf->client_oc && (have & CEPH_CAP_FILE_BUFFER)) {
+ if (cct->_conf->client_oc &&
+ (have & (CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO))) {
// do buffered write
if (!in->oset.dirty_or_tx)
get_cap_ref(in, CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_BUFFER);
return objectcacher->release_all();
}
+int Client::_lazyio(Fh *fh, int enable)
+{
+ Inode *in = fh->inode.get();
+ ldout(cct, 20) << __func__ << " " << *in << " " << !!enable << dendl;
+
+ if (!!(fh->mode & CEPH_FILE_MODE_LAZY) == !!enable)
+ return 0;
+
+ int orig_mode = fh->mode;
+ if (enable) {
+ fh->mode |= CEPH_FILE_MODE_LAZY;
+ in->get_open_ref(fh->mode);
+ in->put_open_ref(orig_mode);
+ check_caps(in, CHECK_CAPS_NODELAY);
+ } else {
+ fh->mode &= ~CEPH_FILE_MODE_LAZY;
+ in->get_open_ref(fh->mode);
+ in->put_open_ref(orig_mode);
+ check_caps(in, 0);
+ }
+
+ return 0;
+}
+
+int Client::lazyio(int fd, int enable)
+{
+ Mutex::Locker l(client_lock);
+ Fh *f = get_filehandle(fd);
+ if (!f)
+ return -EBADF;
+
+ return _lazyio(f, enable);
+}
+
+int Client::ll_lazyio(Fh *fh, int enable)
+{
+ Mutex::Locker lock(client_lock);
+ ldout(cct, 3) << __func__ << " " << fh << " " << fh->inode->ino << " " << !!enable << dendl;
+ tout(cct) << __func__ << std::endl;
+
+ return _lazyio(fh, enable);
+}
int Client::lazyio_propogate(int fd, loff_t offset, size_t count)
{
}
// use normalized flags to generate cmode
- int cmode = ceph_flags_to_mode(ceph_flags_sys2wire(flags));
- if (cmode < 0)
- return -EINVAL;
+ int cflags = ceph_flags_sys2wire(flags);
+ if (cct->_conf.get_val<bool>("client_force_lazyio"))
+ cflags |= CEPH_O_LAZY;
+
+ int cmode = ceph_flags_to_mode(cflags);
int64_t pool_id = -1;
if (data_pool && *data_pool) {
path.push_dentry(name);
req->set_filepath(path);
req->set_inode(dir);
- req->head.args.open.flags = ceph_flags_sys2wire(flags | O_CREAT);
+ req->head.args.open.flags = cflags | CEPH_O_CREAT;
req->head.args.open.stripe_unit = stripe_unit;
req->head.args.open.stripe_count = stripe_count;
int _getlk(Fh *fh, struct flock *fl, uint64_t owner);
int _setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep);
int _flock(Fh *fh, int cmd, uint64_t owner);
+ int _lazyio(Fh *fh, int enable);
int get_or_create(Inode *dir, const char* name,
Dentry **pdn, bool expect_null=false);
int64_t drop_caches();
// hpc lazyio
+ int lazyio(int fd, int enable);
int lazyio_propogate(int fd, loff_t offset, size_t count);
int lazyio_synchronize(int fd, loff_t offset, size_t count);
int ll_getlk(Fh *fh, struct flock *fl, uint64_t owner);
int ll_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep);
int ll_flock(Fh *fh, int cmd, uint64_t owner);
+ int ll_lazyio(Fh *fh, int enable);
int ll_file_layout(Fh *fh, file_layout_t *layout);
void ll_interrupt(void *d);
bool ll_handle_umask() {
bool Inode::put_open_ref(int mode)
{
//cout << "open_by_mode[" << mode << "] " << open_by_mode[mode] << " -> " << (open_by_mode[mode]-1) << std::endl;
- if (--open_by_mode[mode] == 0)
+ auto& ref = open_by_mode.at(mode);
+ assert(ref > 0);
+ if (--ref == 0)
return true;
return false;
}
/* because CEPH_FILE_MODE_PIN is zero, so mode = -1 is error */
int mode = -1;
-#ifdef O_DIRECTORY /* fixme */
if ((flags & CEPH_O_DIRECTORY) == CEPH_O_DIRECTORY)
return CEPH_FILE_MODE_PIN;
-#endif
switch (flags & O_ACCMODE) {
case CEPH_O_WRONLY:
break;
}
+ if (flags & CEPH_O_LAZY)
+ mode |= CEPH_FILE_MODE_LAZY;
+
return mode;
}
.set_description("set the directory size as the number of file bytes recursively used")
.set_long_description("This option enables a CephFS feature that stores the recursive directory size (the bytes used by files in the directory and its descendents) in the st_size field of the stat structure."),
+ Option("client_force_lazyio", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+ .set_default(false)
+ .set_description(""),
+
// note: the max amount of "in flight" dirty data is roughly (max - target)
Option("fuse_use_invalidate_cb", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(true)
#define CEPH_O_CREAT 00000100
#define CEPH_O_EXCL 00000200
#define CEPH_O_TRUNC 00001000
+#define CEPH_O_LAZY 00020000
#define CEPH_O_DIRECTORY 00200000
#define CEPH_O_NOFOLLOW 00400000
int ceph_fallocate(struct ceph_mount_info *cmount, int fd, int mode,
int64_t offset, int64_t length);
+/**
+ * Enable/disable lazyio for the file.
+ *
+ * @param cmount the ceph mount handle to use for performing the fsync.
+ * @param fd the file descriptor of the file to sync.
+ * @param enable a boolean to enable lazyio or disable lazyio.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_lazyio(struct ceph_mount_info *cmount, int fd, int enable);
+
/** @} file */
/**
int ceph_ll_setlk(struct ceph_mount_info *cmount,
Fh *fh, struct flock *fl, uint64_t owner, int sleep);
+int ceph_ll_lazyio(struct ceph_mount_info *cmount, Fh *fh, int enable);
+
/*
* Delegation support
*
return cmount->get_client()->fallocate(fd, mode, offset, length);
}
+extern "C" int ceph_lazyio(class ceph_mount_info *cmount,
+ int fd, int enable)
+{
+ return (cmount->get_client()->lazyio(fd, enable));
+}
+
extern "C" int ceph_sync_fs(struct ceph_mount_info *cmount)
{
if (!cmount->is_mounted())
return (cmount->get_client()->ll_setlk(fh, fl, owner, sleep));
}
+extern "C" int ceph_ll_lazyio(class ceph_mount_info *cmount,
+ Fh *fh, int enable)
+{
+ return (cmount->get_client()->ll_lazyio(fh, enable));
+}
+
extern "C" int ceph_ll_delegation(struct ceph_mount_info *cmount, Fh *fh,
unsigned cmd, ceph_deleg_cb_t cb, void *priv)
{
const struct sm_state_t filelock[LOCK_MAX] = {
// stable loner rep state r rp rd wr fwr l x caps(any,loner,xlocker,replica)
- [LOCK_SYNC] = { 0, false, LOCK_SYNC, ANY, 0, ANY, 0, 0, ANY, 0, CEPH_CAP_GSHARED|CEPH_CAP_GCACHE|CEPH_CAP_GRD,0,0,CEPH_CAP_GSHARED|CEPH_CAP_GCACHE|CEPH_CAP_GRD },
+ [LOCK_SYNC] = { 0, false, LOCK_SYNC, ANY, 0, ANY, 0, 0, ANY, 0, CEPH_CAP_GSHARED|CEPH_CAP_GCACHE|CEPH_CAP_GRD|CEPH_CAP_GLAZYIO,0,0,CEPH_CAP_GSHARED|CEPH_CAP_GCACHE|CEPH_CAP_GRD },
[LOCK_LOCK_SYNC] = { LOCK_SYNC, false, LOCK_LOCK, AUTH, 0, 0, 0, 0, 0, 0, CEPH_CAP_GCACHE,0,0,0 },
[LOCK_EXCL_SYNC] = { LOCK_SYNC, true, LOCK_LOCK, 0, 0, 0, 0, XCL, 0, 0, 0,CEPH_CAP_GSHARED|CEPH_CAP_GCACHE|CEPH_CAP_GRD,0,0 },
[LOCK_MIX_SYNC] = { LOCK_SYNC, false, LOCK_MIX_SYNC2,0,0, 0, 0, 0, 0, 0, CEPH_CAP_GRD|CEPH_CAP_GLAZYIO,0,0,CEPH_CAP_GRD },
[LOCK_MIX] = { 0, false, LOCK_MIX, 0, 0, REQ, ANY, 0, 0, 0, CEPH_CAP_GRD|CEPH_CAP_GWR|CEPH_CAP_GLAZYIO,0,0,CEPH_CAP_GRD },
[LOCK_SYNC_MIX] = { LOCK_MIX, false, LOCK_SYNC_MIX2,ANY,0, 0, 0, 0, 0, 0, CEPH_CAP_GRD|CEPH_CAP_GLAZYIO,0,0,CEPH_CAP_GRD },
[LOCK_SYNC_MIX2] = { LOCK_MIX, false, 0, 0, 0, 0, 0, 0, 0, 0, CEPH_CAP_GRD|CEPH_CAP_GLAZYIO,0,0,CEPH_CAP_GRD },
- [LOCK_EXCL_MIX] = { LOCK_MIX, true, LOCK_LOCK, 0, 0, 0, XCL, 0, 0, 0, 0,CEPH_CAP_GRD|CEPH_CAP_GWR|CEPH_CAP_GLAZYIO,0,0 },
+ [LOCK_EXCL_MIX] = { LOCK_MIX, true, LOCK_LOCK, 0, 0, 0, XCL, 0, 0, 0, 0,CEPH_CAP_GRD|CEPH_CAP_GWR,0,0 },
[LOCK_XSYN_MIX] = { LOCK_MIX, true, LOCK_LOCK, 0, 0, 0, XCL, 0, 0, 0, 0,0,0,0 },
- [LOCK_EXCL] = { 0, true, LOCK_LOCK, 0, 0, XCL, XCL, 0, 0, 0, 0,CEPH_CAP_GSHARED|CEPH_CAP_GEXCL|CEPH_CAP_GCACHE|CEPH_CAP_GRD|CEPH_CAP_GWR|CEPH_CAP_GBUFFER|CEPH_CAP_GLAZYIO,0,0 },
+ [LOCK_EXCL] = { 0, true, LOCK_LOCK, 0, 0, XCL, XCL, 0, 0, 0, 0,CEPH_CAP_GSHARED|CEPH_CAP_GEXCL|CEPH_CAP_GCACHE|CEPH_CAP_GRD|CEPH_CAP_GWR|CEPH_CAP_GBUFFER,0,0 },
[LOCK_SYNC_EXCL] = { LOCK_EXCL, true, LOCK_LOCK, ANY, 0, 0, 0, 0, 0, 0, 0,CEPH_CAP_GSHARED|CEPH_CAP_GCACHE|CEPH_CAP_GRD,0,0 },
- [LOCK_MIX_EXCL] = { LOCK_EXCL, true, LOCK_LOCK, 0, 0, 0, XCL, 0, 0, 0, 0,CEPH_CAP_GRD|CEPH_CAP_GWR|CEPH_CAP_GLAZYIO,0,0 },
+ [LOCK_MIX_EXCL] = { LOCK_EXCL, true, LOCK_LOCK, 0, 0, 0, XCL, 0, 0, 0, 0,CEPH_CAP_GRD|CEPH_CAP_GWR,0,0 },
[LOCK_LOCK_EXCL] = { LOCK_EXCL, true, LOCK_LOCK, AUTH, 0, 0, 0, 0, 0, 0, 0,CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0 },
[LOCK_XSYN_EXCL] = { LOCK_EXCL, true, LOCK_LOCK, AUTH, 0, XCL, 0, 0, 0, 0, 0,CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0 },