From fff600cf73e48bfb0b09b2ff91751149ec038afc Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 11 Nov 2009 16:09:52 -0800 Subject: [PATCH] filestore: flusher thread; commit snaps (disabled) --- src/config.cc | 2 + src/config.h | 2 + src/os/FileStore.cc | 150 +++++++++++++++++++++++++++++++++++++++----- src/os/FileStore.h | 21 ++++++- 4 files changed, 159 insertions(+), 16 deletions(-) diff --git a/src/config.cc b/src/config.cc index c5dd3dabe9ad9..152e5e6ae979e 100644 --- a/src/config.cc +++ b/src/config.cc @@ -498,6 +498,8 @@ static struct config_option config_optionsp[] = { OPTION(filestore_fake_collections, 0, OPT_BOOL, false), OPTION(filestore_dev, 0, OPT_STR, 0), OPTION(filestore_btrfs_trans, 0, OPT_BOOL, true), + OPTION(filestore_flusher, 0, OPT_BOOL, true), + OPTION(filestore_sync_flush, 0, OPT_BOOL, false), OPTION(ebofs, 0, OPT_BOOL, false), OPTION(ebofs_cloneable, 0, OPT_BOOL, true), OPTION(ebofs_verify, 0, OPT_BOOL, false), diff --git a/src/config.h b/src/config.h index 1abf65b571678..089b3134a2630 100644 --- a/src/config.h +++ b/src/config.h @@ -310,6 +310,8 @@ struct md_config_t { bool filestore_fake_collections; const char *filestore_dev; bool filestore_btrfs_trans; + bool filestore_flusher; + bool filestore_sync_flush; // ebofs bool ebofs; diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index 0b81a74591a83..82ca311d9249d 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -47,6 +47,9 @@ #define ATTR_MAX 80 +#define COMMIT_SNAP_DIR "commit_snaps" +#define COMMIT_SNAP_ITEM "%lld" + #ifndef __CYGWIN__ # ifndef DARWIN # include "btrfs_ioctl.h" @@ -469,12 +472,40 @@ int FileStore::mount() } journal_start(); sync_thread.create(); + flusher_thread.create(); // is this btrfs? Transaction empty; btrfs = 1; - btrfs_usertrans = true; + + btrfs_snap = false; + if (btrfs_snap) { + char dirname[100]; + sprintf(dirname, "%s/%s", basedir.c_str(), COMMIT_SNAP_DIR); + ::mkdir(dirname, 0755); + snapdir_fd = ::open(dirname, O_RDONLY); + + // get snap list + DIR *dir = ::opendir(dirname); + if (!dir) + return -errno; + + struct dirent sde, *de; + while (::readdir_r(dir, &sde, &de) == 0) { + if (!de) + break; + long long unsigned c; + if (sscanf(de->d_name, COMMIT_SNAP_ITEM, &c) == 1) + snaps.push_back(c); + } + + ::closedir(dir); + + dout(0) << " found snaps " << snaps << dendl; + } + + btrfs_usertrans = false; btrfs_trans_start_end = true; // trans start/end interface r = apply_transaction(empty, 0); if (r == 0) { @@ -538,8 +569,10 @@ int FileStore::umount() lock.Lock(); stop = true; sync_cond.Signal(); + flusher_cond.Signal(); lock.Unlock(); sync_thread.join(); + flusher_thread.join(); journal_stop(); @@ -1417,8 +1450,14 @@ int FileStore::_write(coll_t cid, const sobject_t& oid, if (did < 0) { derr(0) << "couldn't write to " << fn << " len " << len << " off " << offset << " errno " << errno << " " << strerror_r(errno, buf, sizeof(buf)) << dendl; } - - ::close(fd); + + if (g_conf.filestore_flusher) + queue_flusher(fd, offset, len); + else { + if (g_conf.filestore_sync_flush) + ::sync_file_range(fd, offset, len, SYNC_FILE_RANGE_WRITE); + ::close(fd); + } r = did; } @@ -1547,6 +1586,54 @@ int FileStore::_clone_range(coll_t cid, const sobject_t& oldoid, const sobject_t } +void FileStore::queue_flusher(int fd, __u64 off, __u64 len) +{ + lock.Lock(); + dout(10) << "queue_flusher fd " << fd << " " << off << "~" << len << dendl; + flusher_queue.push_back(fd); + flusher_queue.push_back(off); + flusher_queue.push_back(len); + flusher_cond.Signal(); + lock.Unlock(); +} + +void FileStore::flusher_entry() +{ + lock.Lock(); + dout(20) << "flusher_entry start" << dendl; + while (true) { + if (!flusher_queue.empty()) { + list<__u64> q; + q.swap(flusher_queue); + + lock.Unlock(); + while (!q.empty()) { + int fd = q.front(); + q.pop_front(); + __u64 off = q.front(); + q.pop_front(); + __u64 len = q.front(); + q.pop_front(); + if (!stop) { + dout(10) << "flusher_entry flushing+closing " << fd << dendl; + ::sync_file_range(fd, off, len, SYNC_FILE_RANGE_WRITE); + } else + dout(10) << "flusher_entry JUST closing " << fd << dendl; + ::close(fd); + } + lock.Lock(); + } else { + if (stop) + break; + dout(20) << "flusher_entry sleeping" << dendl; + flusher_cond.Wait(lock); + dout(20) << "flusher_entry awoke" << dendl; + } + } + dout(20) << "flusher_entry finish" << dendl; + lock.Unlock(); +} + void FileStore::sync_entry() { Cond othercond; @@ -1575,29 +1662,64 @@ void FileStore::sync_entry() } lock.Unlock(); - + if (commit_start()) { dout(15) << "sync_entry committing " << op_seq << dendl; + utime_t start = g_clock.now(); __u64 cp = op_seq; + + if (btrfs_snap) { + btrfs_ioctl_vol_args snapargs; + snapargs.fd = snapdir_fd; + sprintf(snapargs.name, COMMIT_SNAP_ITEM, (long long unsigned)cp); + dout(0) << "taking snap '" << snapargs.name << "'" << dendl; + int r = ::ioctl(snapargs.fd, BTRFS_IOC_SNAP_CREATE, &snapargs); + char buf[100]; + dout(0) << "snap create '" << snapargs.name << "' got " << r + << " " << strerror_r(r < 0 ? errno : 0, buf, sizeof(buf)) << dendl; + snaps.push_back(cp); + } commit_started(); - - if (btrfs) { - // do a full btrfs commit - ::ioctl(op_fd, BTRFS_IOC_SYNC); - } else { - // make the file system's journal commit. - // this works with ext3, but NOT ext4 - ::fsync(op_fd); + + if (!btrfs_snap) { + if (btrfs) { + dout(15) << "sync_entry doing btrfs sync" << dendl; + // do a full btrfs commit + ::ioctl(op_fd, BTRFS_IOC_SYNC); + } else { + // make the file system's journal commit. + // this works with ext3, but NOT ext4 + ::fsync(op_fd); + } } + utime_t done = g_clock.now(); + done -= start; + dout(10) << "sync_entry commit took " << done << dendl; commit_finish(); + + // remove old snaps? + if (false && btrfs_snap) { + while (snaps.size() > 2) { + btrfs_ioctl_vol_args snapargs; + snapargs.fd = snapdir_fd; + sprintf(snapargs.name, COMMIT_SNAP_ITEM, (long long unsigned)snaps.front()); + snaps.pop_front(); + dout(0) << "removing snap '" << snapargs.name << "'" << dendl; + int r = ::ioctl(snapargs.fd, BTRFS_IOC_SNAP_DESTROY, &snapargs); + char buf[100]; + dout(0) << "snap destroyed '" << snapargs.name << "' got " << r + << " " << strerror_r(r < 0 ? errno : 0, buf, sizeof(buf)) << dendl; + } + } + dout(15) << "sync_entry committed to op_seq " << cp << dendl; } - + lock.Lock(); - + } lock.Unlock(); } diff --git a/src/os/FileStore.h b/src/os/FileStore.h index f91bbbd867e99..3433fc5a99909 100644 --- a/src/os/FileStore.h +++ b/src/os/FileStore.h @@ -38,10 +38,14 @@ class FileStore : public JournalingObjectStore { __u64 fsid; int btrfs; + bool btrfs_snap; bool btrfs_usertrans; bool btrfs_trans_start_end; int fsid_fd, op_fd; + int snapdir_fd; + deque<__u64> snaps; + // fake attrs? FakeAttrs attrs; bool fake_attrs; @@ -76,17 +80,30 @@ class FileStore : public JournalingObjectStore { void sync_fs(); // actuall sync underlying fs + // flusher thread + Cond flusher_cond; + list<__u64> flusher_queue; + void flusher_entry(); + struct FlusherThread : public Thread { + FileStore *fs; + FlusherThread(FileStore *f) : fs(f) {} + void *entry() { + fs->flusher_entry(); + return 0; + } + } flusher_thread; + void queue_flusher(int fd, __u64 off, __u64 len); int open_journal(); public: FileStore(const char *base, const char *jdev = 0) : basedir(base), journalpath(jdev ? jdev:""), - btrfs(false), btrfs_trans_start_end(false), + btrfs(false), btrfs_snap(false), btrfs_usertrans(false), btrfs_trans_start_end(false), fsid_fd(-1), op_fd(-1), attrs(this), fake_attrs(false), collections(this), fake_collections(false), lock("FileStore::lock"), - stop(false), sync_thread(this) { } + stop(false), sync_thread(this), flusher_thread(this) { } int mount(); int umount(); -- 2.39.5