OPTION(filestore_fake_collections, 0, OPT_BOOL, false),
OPTION(filestore_dev, 0, OPT_STR, 0),
OPTION(filestore_btrfs_trans, 0, OPT_BOOL, true),
+ OPTION(filestore_flusher, 0, OPT_BOOL, true),
+ OPTION(filestore_sync_flush, 0, OPT_BOOL, false),
OPTION(ebofs, 0, OPT_BOOL, false),
OPTION(ebofs_cloneable, 0, OPT_BOOL, true),
OPTION(ebofs_verify, 0, OPT_BOOL, false),
#define ATTR_MAX 80
+#define COMMIT_SNAP_DIR "commit_snaps"
+#define COMMIT_SNAP_ITEM "%lld"
+
#ifndef __CYGWIN__
# ifndef DARWIN
# include "btrfs_ioctl.h"
}
journal_start();
sync_thread.create();
+ flusher_thread.create();
// is this btrfs?
Transaction empty;
btrfs = 1;
- btrfs_usertrans = true;
+
+ btrfs_snap = false;
+ if (btrfs_snap) {
+ char dirname[100];
+ sprintf(dirname, "%s/%s", basedir.c_str(), COMMIT_SNAP_DIR);
+ ::mkdir(dirname, 0755);
+ snapdir_fd = ::open(dirname, O_RDONLY);
+
+ // get snap list
+ DIR *dir = ::opendir(dirname);
+ if (!dir)
+ return -errno;
+
+ struct dirent sde, *de;
+ while (::readdir_r(dir, &sde, &de) == 0) {
+ if (!de)
+ break;
+ long long unsigned c;
+ if (sscanf(de->d_name, COMMIT_SNAP_ITEM, &c) == 1)
+ snaps.push_back(c);
+ }
+
+ ::closedir(dir);
+
+ dout(0) << " found snaps " << snaps << dendl;
+ }
+
+ btrfs_usertrans = false;
btrfs_trans_start_end = true; // trans start/end interface
r = apply_transaction(empty, 0);
if (r == 0) {
lock.Lock();
stop = true;
sync_cond.Signal();
+ flusher_cond.Signal();
lock.Unlock();
sync_thread.join();
+ flusher_thread.join();
journal_stop();
if (did < 0) {
derr(0) << "couldn't write to " << fn << " len " << len << " off " << offset << " errno " << errno << " " << strerror_r(errno, buf, sizeof(buf)) << dendl;
}
-
- ::close(fd);
+
+ if (g_conf.filestore_flusher)
+ queue_flusher(fd, offset, len);
+ else {
+ if (g_conf.filestore_sync_flush)
+ ::sync_file_range(fd, offset, len, SYNC_FILE_RANGE_WRITE);
+ ::close(fd);
+ }
r = did;
}
}
+void FileStore::queue_flusher(int fd, __u64 off, __u64 len)
+{
+ lock.Lock();
+ dout(10) << "queue_flusher fd " << fd << " " << off << "~" << len << dendl;
+ flusher_queue.push_back(fd);
+ flusher_queue.push_back(off);
+ flusher_queue.push_back(len);
+ flusher_cond.Signal();
+ lock.Unlock();
+}
+
+void FileStore::flusher_entry()
+{
+ lock.Lock();
+ dout(20) << "flusher_entry start" << dendl;
+ while (true) {
+ if (!flusher_queue.empty()) {
+ list<__u64> q;
+ q.swap(flusher_queue);
+
+ lock.Unlock();
+ while (!q.empty()) {
+ int fd = q.front();
+ q.pop_front();
+ __u64 off = q.front();
+ q.pop_front();
+ __u64 len = q.front();
+ q.pop_front();
+ if (!stop) {
+ dout(10) << "flusher_entry flushing+closing " << fd << dendl;
+ ::sync_file_range(fd, off, len, SYNC_FILE_RANGE_WRITE);
+ } else
+ dout(10) << "flusher_entry JUST closing " << fd << dendl;
+ ::close(fd);
+ }
+ lock.Lock();
+ } else {
+ if (stop)
+ break;
+ dout(20) << "flusher_entry sleeping" << dendl;
+ flusher_cond.Wait(lock);
+ dout(20) << "flusher_entry awoke" << dendl;
+ }
+ }
+ dout(20) << "flusher_entry finish" << dendl;
+ lock.Unlock();
+}
+
void FileStore::sync_entry()
{
Cond othercond;
}
lock.Unlock();
-
+
if (commit_start()) {
dout(15) << "sync_entry committing " << op_seq << dendl;
+ utime_t start = g_clock.now();
__u64 cp = op_seq;
+
+ if (btrfs_snap) {
+ btrfs_ioctl_vol_args snapargs;
+ snapargs.fd = snapdir_fd;
+ sprintf(snapargs.name, COMMIT_SNAP_ITEM, (long long unsigned)cp);
+ dout(0) << "taking snap '" << snapargs.name << "'" << dendl;
+ int r = ::ioctl(snapargs.fd, BTRFS_IOC_SNAP_CREATE, &snapargs);
+ char buf[100];
+ dout(0) << "snap create '" << snapargs.name << "' got " << r
+ << " " << strerror_r(r < 0 ? errno : 0, buf, sizeof(buf)) << dendl;
+ snaps.push_back(cp);
+ }
commit_started();
-
- if (btrfs) {
- // do a full btrfs commit
- ::ioctl(op_fd, BTRFS_IOC_SYNC);
- } else {
- // make the file system's journal commit.
- // this works with ext3, but NOT ext4
- ::fsync(op_fd);
+
+ if (!btrfs_snap) {
+ if (btrfs) {
+ dout(15) << "sync_entry doing btrfs sync" << dendl;
+ // do a full btrfs commit
+ ::ioctl(op_fd, BTRFS_IOC_SYNC);
+ } else {
+ // make the file system's journal commit.
+ // this works with ext3, but NOT ext4
+ ::fsync(op_fd);
+ }
}
+ utime_t done = g_clock.now();
+ done -= start;
+ dout(10) << "sync_entry commit took " << done << dendl;
commit_finish();
+
+ // remove old snaps?
+ if (false && btrfs_snap) {
+ while (snaps.size() > 2) {
+ btrfs_ioctl_vol_args snapargs;
+ snapargs.fd = snapdir_fd;
+ sprintf(snapargs.name, COMMIT_SNAP_ITEM, (long long unsigned)snaps.front());
+ snaps.pop_front();
+ dout(0) << "removing snap '" << snapargs.name << "'" << dendl;
+ int r = ::ioctl(snapargs.fd, BTRFS_IOC_SNAP_DESTROY, &snapargs);
+ char buf[100];
+ dout(0) << "snap destroyed '" << snapargs.name << "' got " << r
+ << " " << strerror_r(r < 0 ? errno : 0, buf, sizeof(buf)) << dendl;
+ }
+ }
+
dout(15) << "sync_entry committed to op_seq " << cp << dendl;
}
-
+
lock.Lock();
-
+
}
lock.Unlock();
}
__u64 fsid;
int btrfs;
+ bool btrfs_snap;
bool btrfs_usertrans;
bool btrfs_trans_start_end;
int fsid_fd, op_fd;
+ int snapdir_fd;
+ deque<__u64> snaps;
+
// fake attrs?
FakeAttrs attrs;
bool fake_attrs;
void sync_fs(); // actuall sync underlying fs
+ // flusher thread
+ Cond flusher_cond;
+ list<__u64> flusher_queue;
+ void flusher_entry();
+ struct FlusherThread : public Thread {
+ FileStore *fs;
+ FlusherThread(FileStore *f) : fs(f) {}
+ void *entry() {
+ fs->flusher_entry();
+ return 0;
+ }
+ } flusher_thread;
+ void queue_flusher(int fd, __u64 off, __u64 len);
int open_journal();
public:
FileStore(const char *base, const char *jdev = 0) :
basedir(base), journalpath(jdev ? jdev:""),
- btrfs(false), btrfs_trans_start_end(false),
+ btrfs(false), btrfs_snap(false), btrfs_usertrans(false), btrfs_trans_start_end(false),
fsid_fd(-1), op_fd(-1),
attrs(this), fake_attrs(false),
collections(this), fake_collections(false),
lock("FileStore::lock"),
- stop(false), sync_thread(this) { }
+ stop(false), sync_thread(this), flusher_thread(this) { }
int mount();
int umount();