From 7349d668916dda141fa78a80cce41c2ab9b58801 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 10 Nov 2009 07:51:44 -0800 Subject: [PATCH] filestore: clean up btrfs ioctls; use actual btrfs ioctl.h --- src/Makefile.am | 1 + src/os/FileStore.cc | 922 +++++++++++++++++++---------------------- src/os/FileStore.h | 5 +- src/os/btrfs_ioctl.h | 124 ++++++ src/push_to_kclient.pl | 3 +- 5 files changed, 548 insertions(+), 507 deletions(-) create mode 100644 src/os/btrfs_ioctl.h diff --git a/src/Makefile.am b/src/Makefile.am index 1f1cb1561dc46..385c89b07cb9e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -641,6 +641,7 @@ noinst_HEADERS = \ msg/tcp.cc\ msg/tcp.h\ objclass/objclass.h\ + os/btrfs_ioctl.h\ os/BDBMap.h\ os/Fake.h\ os/FakeStoreBDBCollections.h\ diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index 888c63650e11b..140d8c7229ea4 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -48,59 +48,11 @@ #define ATTR_MAX 80 #ifndef __CYGWIN__ -#ifndef DARWIN -# include -# define BTRFS_IOCTL_MAGIC 0x94 -struct btrfs_ioctl_trans_resv_start { - __u64 bytes, ops; -}; -# define BTRFS_IOC_TRANS_RESV_START _IOW(BTRFS_IOCTL_MAGIC, 5, \ - struct btrfs_ioctl_trans_resv_start) -# define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) -# define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) -# define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) -# define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) -#define BTRFS_IOC_WAIT_FOR_SYNC _IO(BTRFS_IOCTL_MAGIC, 5) -struct btrfs_ioctl_clone_range_args { - __s64 src_fd; - __u64 src_offset, src_length; - __u64 dest_offset; -}; - -#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ - struct btrfs_ioctl_clone_range_args) - -// alternate usertrans interface... -#define BTRFS_IOC_USERTRANS_OPEN 1 -#define BTRFS_IOC_USERTRANS_CLOSE 2 -#define BTRFS_IOC_USERTRANS_SEEK 3 -#define BTRFS_IOC_USERTRANS_WRITE 5 -#define BTRFS_IOC_USERTRANS_UNLINK 6 -#define BTRFS_IOC_USERTRANS_MKDIR 7 -#define BTRFS_IOC_USERTRANS_RMDIR 8 -#define BTRFS_IOC_USERTRANS_TRUNCATE 9 -#define BTRFS_IOC_USERTRANS_SETXATTR 10 -#define BTRFS_IOC_USERTRANS_REMOVEXATTR 11 -#define BTRFS_IOC_USERTRANS_CLONE 12 - -struct btrfs_ioctl_usertrans_op { - __u64 op; - __s64 args[5]; - __s64 rval; -}; - -struct btrfs_ioctl_usertrans { - __u64 len; - struct btrfs_ioctl_usertrans_op ops[0]; -}; - -#define BTRFS_IOC_USERTRANS _IOW(BTRFS_IOCTL_MAGIC, 13, \ - struct btrfs_ioctl_usertrans) - -#endif +# ifndef DARWIN +# include "btrfs_ioctl.h" +# endif #endif - #include "config.h" #define DOUT_SUBSYS filestore @@ -495,30 +447,33 @@ int FileStore::mount() // is this btrfs? Transaction empty; btrfs = 1; - btrfs_trans_resv_start = true; + btrfs_usertrans = true; btrfs_trans_start_end = true; // trans start/end interface r = apply_transaction(empty, 0); - if (r != 0) { - dout(0) << "mount btrfs TRANS_RESV_START ioctl NOT supported: " << strerror_r(-r, buf, sizeof(buf)) << dendl; - btrfs_trans_resv_start = false; - r = apply_transaction(empty, 0); + if (r == 0) { + dout(0) << "mount btrfs USERTRANS ioctl is supported" << dendl; } else { - dout(0) << "mount btrfs TRANS_RESV_START ioctl is supported" << dendl; + dout(0) << "mount btrfs USERTRANS ioctl is NOT supported: " << strerror_r(-r, buf, sizeof(buf)) << dendl; + btrfs_usertrans = false; + r = apply_transaction(empty, 0); + if (r == 0) { + dout(0) << "mount btrfs TRANS_START ioctl is supported" << dendl; + } else { + dout(0) << "mount btrfs TRANS_START ioctl is NOT supported: " << strerror_r(-r, buf, sizeof(buf)) << dendl; + } } if (r == 0) { - dout(0) << "mount btrfs RESV_START ioctl is supported" << dendl; - // do we have the shiny new CLONE_RANGE ioctl? btrfs = 2; int r = _do_clone_range(fsid_fd, -1, 0, 1); if (r == -EBADF) { - dout(0) << "mount detected btrfs" << dendl; + dout(0) << "mount btrfs CLONE_RANGE ioctl is supported" << dendl; } else { - dout(0) << "mount detected dingey old btrfs (r=" << r << " " << strerror_r(-r, buf, sizeof(buf)) << ")" << dendl; + dout(0) << "mount btrfs CLONE_RANGE ioctl is NOT supported: " << strerror_r(-r, buf, sizeof(buf)) << dendl; btrfs = 1; } + dout(0) << "mount detected btrfs" << dendl; } else { - dout(0) << "mount btrfs RESV_START ioctl is NOT supported: " << strerror_r(-r, buf, sizeof(buf)) << dendl; dout(0) << "mount did NOT detect btrfs" << dendl; btrfs = 0; } @@ -562,40 +517,16 @@ unsigned FileStore::apply_transaction(Transaction &t, Context *onjournal, Context *ondisk) { - op_start(); - - // non-atomic implementation - int id = _transaction_start(t.get_num_bytes(), t.get_num_ops()); - if (id < 0) { - op_journal_start(); - op_finish(); - return id; - } - - int r = _apply_transaction(t); - - _transaction_finish(id); - - op_journal_start(); - dout(10) << "op_seq is " << op_seq << dendl; - if (r >= 0) { - journal_transaction(t, onjournal, ondisk); - - ::pwrite(op_fd, &op_seq, sizeof(op_seq), 0); - - } else { - delete onjournal; - delete ondisk; - } - - op_finish(); - return r; + list tls; + tls.push_back(&t); + return apply_transactions(tls, onjournal, ondisk); } unsigned FileStore::apply_transactions(list &tls, Context *onjournal, Context *ondisk) { + int r = 0; op_start(); __u64 bytes = 0, ops = 0; @@ -606,24 +537,27 @@ unsigned FileStore::apply_transactions(list &tls, ops += (*p)->get_num_ops(); } - int id = _transaction_start(bytes, ops); - if (id < 0) { - op_journal_start(); - op_finish(); - return id; - } - - int r = 0; - for (list::iterator p = tls.begin(); - p != tls.end(); - p++) { - r = _apply_transaction(**p); - if (r < 0) - break; + if (btrfs_usertrans) { + r = _do_usertrans(tls); + } else { + int id = _transaction_start(bytes, ops); + if (id < 0) { + op_journal_start(); + op_finish(); + return id; + } + + for (list::iterator p = tls.begin(); + p != tls.end(); + p++) { + r = _apply_transaction(**p); + if (r < 0) + break; + } + + _transaction_finish(id); } - _transaction_finish(id); - op_journal_start(); dout(10) << "op_seq is " << op_seq << dendl; if (r >= 0) { @@ -660,15 +594,7 @@ int FileStore::_transaction_start(__u64 bytes, __u64 ops) assert(0); } - int r; - if (btrfs_trans_resv_start) { - btrfs_ioctl_trans_resv_start resv; - resv.bytes = bytes; - resv.ops = ops; - r = ::ioctl(fd, BTRFS_IOC_TRANS_RESV_START, (unsigned long)&resv); - } else { - r = ::ioctl(fd, BTRFS_IOC_TRANS_START); - } + int r = ::ioctl(fd, BTRFS_IOC_TRANS_START); if (r < 0) { derr(0) << "transaction_start got " << strerror_r(errno, buf, sizeof(buf)) << " from btrfs ioctl" << dendl; @@ -848,425 +774,413 @@ unsigned FileStore::_apply_transaction(Transaction& t) /*********************************************/ - -#if 0 -/* - * compound btrfs usertrans thinger version - */ -unsigned FileStore::apply_transaction(Transaction &t, Context *onsafe) +int FileStore::_do_usertrans(list& ls) { -#ifdef DARWIN - return ObjectStore::apply_transaction(t, onsafe); -#else - - // no btrfs transaction support? - // or, use trans start/end ioctls? - if (!btrfs || btrfs_trans_start_end) { - bufferlist tbl; - t.encode(tbl); // apply_transaction modifies t; encode first - op_start(); - int r = ObjectStore::apply_transaction(t); - dout(10) << "op_seq is " << op_seq << dendl; - if (r >= 0) - journal_transaction(tbl, onsafe); - else - delete onsafe; - op_finish(); - return r; - } - - // create transaction - int len = t.get_btrfs_len(); - dout(20) << "apply_transaction allocation btrfs usertrans len " << len << dendl; - btrfs_ioctl_usertrans *trans = - (btrfs_ioctl_usertrans *)new char[sizeof(*trans) + len * sizeof(trans->ops[0])]; - - trans->len = 0; - - list str; - - while (t.have_op()) { - int op = t.get_op(); - - switch (op) { - - case Transaction::OP_WRITE: - case Transaction::OP_ZERO: // write actual zeros. - { - coll_t cid; - t.get_cid(cid); - sobject_t oid; - t.get_oid(oid); - __u64 offset, len; - t.get_length(offset); - t.get_length(len); - bufferlist bl; - if (op == Transaction::OP_WRITE) - t.get_bl(bl); - else { - bufferptr bp(len); - bp.zero(); - bl.push_back(bp); + btrfs_ioctl_usertrans ut; + vector ops; + list str; + bool start_sync = false; + btrfs_ioctl_usertrans_op op; + + memset(&ut, 0, sizeof(ut)); + + for (list::iterator p = ls.begin(); p != ls.end(); p++) { + Transaction *t = *p; + + while (t->have_op()) { + int opcode = t->get_op(); + + memset(&op, 0, sizeof(op)); + + switch (opcode) { + case Transaction::OP_TOUCH: + { + char *fn = new char[PATH_MAX]; + str.push_back(fn); + get_coname(t->get_cid(), t->get_oid(), fn); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_OPEN; + op.args[0] = (unsigned long)fn; + op.args[1] = O_WRONLY | O_CREAT; + op.args[2] = 0644; + op.flags = BTRFS_IOC_UT_OP_FLAG_FD_SAVE; + op.fd_num = 0; + ops.push_back(op); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_CLOSE; + op.args[0] = 0; + op.flags = BTRFS_IOC_UT_OP_FLAG_FD_ARG0; + ops.push_back(op); } - - dout(10) << "write" << dendl; - //write(cid, oid, offset, len, bl, 0); - char *fn = new char[PATH_MAX]; - str.push_back(fn); - get_coname(cid, oid, fn); - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_OPEN; - trans->ops[trans->len].args[0] = (__s64)fn; - trans->ops[trans->len].args[1] = O_WRONLY|O_CREAT; - trans->len++; - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_SEEK; - trans->ops[trans->len].args[0] = -1; - trans->ops[trans->len].args[1] = offset; - trans->ops[trans->len].args[2] = (__s64)&trans->ops[trans->len].args[4]; // whatever. - trans->ops[trans->len].args[3] = SEEK_SET; - trans->len++; - for (list::const_iterator it = bl.buffers().begin(); - it != bl.buffers().end(); - it++) { - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_WRITE; - trans->ops[trans->len].args[0] = -1; - trans->ops[trans->len].args[1] = (__s64)(*it).c_str(); - trans->ops[trans->len].args[2] = (__s64)(*it).length(); - trans->len++; + break; + + case Transaction::OP_WRITE: + case Transaction::OP_ZERO: // write actual zeros. + { + __u64 off = t->get_length(); + __u64 len = t->get_length(); + bufferlist bl; + if (opcode == Transaction::OP_WRITE) + bl = t->get_bl(); + else { + bufferptr bp(len); + bp.zero(); + bl.push_back(bp); + } + char *fn = new char[PATH_MAX]; + str.push_back(fn); + get_coname(t->get_cid(), t->get_oid(), fn); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_OPEN; + op.args[0] = (__s64)fn; + op.args[1] = O_WRONLY|O_CREAT; + op.args[2] = 0644; + op.flags = BTRFS_IOC_UT_OP_FLAG_FD_SAVE; + op.fd_num = 0; + ops.push_back(op); + + assert(len == bl.length()); + for (list::const_iterator it = bl.buffers().begin(); + it != bl.buffers().end(); + it++) { + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_PWRITE; + op.args[0] = 0; + op.args[1] = (__s64)(*it).c_str(); + op.args[2] = (__s64)(*it).length(); + op.args[3] = off; + op.flags = BTRFS_IOC_UT_OP_FLAG_FD_ARG0; + ops.push_back(op); + off += op.args[2]; + } + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_CLOSE; + op.args[0] = 0; + op.flags = BTRFS_IOC_UT_OP_FLAG_FD_ARG0; + ops.push_back(op); } - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_CLOSE; - trans->ops[trans->len].args[0] = -1; - trans->len++; - } - break; - - case Transaction::OP_TRIMCACHE: - { - coll_t cid; - t.get_cid(cid); - sobject_t oid; - t.get_oid(oid); - __u64 offset, len; - t.get_length(offset); - t.get_length(len); - trim_from_cache(cid, oid, offset, len); - } - break; + break; - case Transaction::OP_TRUNCATE: - { - coll_t cid; - t.get_cid(cid); - sobject_t oid; - t.get_oid(oid); - __u64 len; - t.get_length(len); - //truncate(cid, oid, len, 0); - - dout(10) << "truncate" << dendl; - char *fn = new char[PATH_MAX]; - str.push_back(fn); - get_coname(cid, oid, fn); - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_TRUNCATE; - trans->ops[trans->len].args[0] = (__s64)fn; - trans->ops[trans->len].args[1] = len; - trans->len++; - } - break; + case Transaction::OP_TRUNCATE: + { + char *fn = new char[PATH_MAX]; + str.push_back(fn); + get_coname(t->get_cid(), t->get_oid(), fn); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_TRUNCATE; + op.args[0] = (__s64)fn; + op.args[1] = t->get_length(); + ops.push_back(op); + } + break; - case Transaction::OP_REMOVE: - { - coll_t cid; - t.get_cid(cid); - sobject_t oid; - t.get_oid(oid); - //remove(cid, oid, 0); - - dout(10) << "remove " << cid << " " << oid << dendl; - char *fn = new char[PATH_MAX]; - str.push_back(fn); - get_coname(cid, oid, fn); - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_UNLINK; - trans->ops[trans->len].args[0] = (__u64)fn; - trans->len++; - } - break; + case Transaction::OP_COLL_REMOVE: + case Transaction::OP_REMOVE: + { + char *fn = new char[PATH_MAX]; + str.push_back(fn); + get_coname(t->get_cid(), t->get_oid(), fn); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_UNLINK; + op.args[0] = (__u64)fn; + ops.push_back(op); + } + break; - case Transaction::OP_SETATTR: - { - coll_t cid; - t.get_cid(cid); - sobject_t oid; - t.get_oid(oid); - const char *attrname; - t.get_attrname(attrname); - bufferlist bl; - t.get_bl(bl); - //setattr(cid, oid, attrname, bl.c_str(), bl.length(), 0); - dout(10) << "setattr " << cid << " " << oid << dendl; - char *fn = new char[PATH_MAX]; - str.push_back(fn); - get_coname(cid, oid, fn); - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_SETXATTR; - trans->ops[trans->len].args[0] = (__u64)fn; - char aname[ATTR_MAX]; - sprintf(aname, "user.ceph.%s", attrname); - trans->ops[trans->len].args[1] = (__u64)aname; - trans->ops[trans->len].args[2] = (__u64)bl.c_str(); - trans->ops[trans->len].args[3] = bl.length(); - trans->ops[trans->len].args[4] = 0; - trans->len++; - } - break; + case Transaction::OP_SETATTR: + case Transaction::OP_COLL_SETATTR: + { + bufferlist bl = t->get_bl(); + char *fn = new char[PATH_MAX]; + str.push_back(fn); + + if (opcode == Transaction::OP_SETATTR) + get_coname(t->get_cid(), t->get_oid(), fn); + else + get_cdir(t->get_cid(), fn); + char *aname = new char[ATTR_MAX]; + str.push_back(aname); + sprintf(aname, "user.ceph.%s", aname); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_SETXATTR; + op.args[0] = (__u64)fn; + op.args[1] = (__u64)aname; + op.args[2] = (__u64)bl.c_str(); + op.args[3] = bl.length(); + op.args[4] = 0; + ops.push_back(op); + } + break; - case Transaction::OP_SETATTRS: - case Transaction::OP_COLL_SETATTRS: - { - // make note of old attrs - map oldattrs; - char *fn = new char[PATH_MAX]; - str.push_back(fn); - - if (op == Transaction::OP_SETATTRS) { - coll_t cid; - t.get_cid(cid); - sobject_t oid; - t.get_oid(oid); - getattrs(cid, oid, oldattrs); - get_coname(cid, oid, fn); - } else { - coll_t cid; - t.get_cid(cid); - collection_getattrs(cid, oldattrs); - get_cdir(cid, fn); + case Transaction::OP_SETATTRS: + case Transaction::OP_COLL_SETATTRS: + { + char *fn = new char[PATH_MAX]; + str.push_back(fn); + + if (opcode == Transaction::OP_SETATTRS) + get_coname(t->get_cid(), t->get_oid(), fn); + else + get_cdir(t->get_cid(), fn); + + const map& aset = t->get_attrset(); + for (map::const_iterator p = aset.begin(); + p != aset.end(); + p++) { + char *aname = new char[ATTR_MAX]; + str.push_back(aname); + sprintf(aname, "user.ceph.%s", p->first.c_str()); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_SETXATTR; + op.args[0] = (__u64)fn; + op.args[1] = (__u64)aname; + op.args[2] = (__u64)p->second.c_str(); + op.args[3] = p->second.length(); + op.args[4] = 0; + ops.push_back(op); + } } - map *pattrset; - t.get_pattrset(pattrset); - - for (map::iterator p = pattrset->begin(); - p != pattrset->end(); - p++) { - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_SETXATTR; - trans->ops[trans->len].args[0] = (__u64)fn; + break; + + case Transaction::OP_RMATTR: + case Transaction::OP_COLL_RMATTR: + { + char *fn = new char[PATH_MAX]; + str.push_back(fn); + if (opcode == Transaction::OP_RMATTR) + get_coname(t->get_cid(), t->get_oid(), fn); + else + get_cdir(t->get_cid(), fn); + char *aname = new char[ATTR_MAX]; str.push_back(aname); - sprintf(aname, "user.ceph.%s", p->first.c_str()); - trans->ops[trans->len].args[1] = (__u64)aname; - trans->ops[trans->len].args[2] = (__u64)p->second.c_str(); - trans->ops[trans->len].args[3] = p->second.length(); - trans->ops[trans->len].args[4] = 0; - trans->len++; - oldattrs.erase(p->first); + sprintf(aname, "user.ceph.%s", t->get_attrname()); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_REMOVEXATTR; + op.args[0] = (__u64)fn; + op.args[1] = (__u64)aname; + ops.push_back(op); } - - // and remove any leftovers - for (map::iterator p = oldattrs.begin(); - p != oldattrs.end(); - p++) { - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_REMOVEXATTR; - trans->ops[trans->len].args[0] = (__u64)fn; - trans->ops[trans->len].args[1] = (__u64)p->first.c_str(); - trans->len++; + break; + + case Transaction::OP_RMATTRS: + { + char *fn = new char[PATH_MAX]; + str.push_back(fn); + get_coname(t->get_cid(), t->get_oid(), fn); + + map aset; + _getattrs(fn, aset); + + for (map::iterator p = aset.begin(); p != aset.end(); p++) { + char *aname = new char[ATTR_MAX]; + str.push_back(aname); + sprintf(aname, "user.ceph.%s", p->first.c_str()); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_REMOVEXATTR; + op.args[0] = (__u64)fn; + op.args[1] = (__u64)aname; + ops.push_back(op); + } } - } - break; - - case Transaction::OP_RMATTR: - { - coll_t cid; - t.get_cid(cid); - sobject_t oid; - t.get_oid(oid); - const char *attrname; - t.get_attrname(attrname); - //rmattr(cid, oid, attrname, 0); - - dout(10) << "rmattr " << cid << " " << oid << dendl; - char *fn = new char[PATH_MAX]; - str.push_back(fn); - get_coname(cid, oid, fn); - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_REMOVEXATTR; - trans->ops[trans->len].args[0] = (__u64)fn; - trans->ops[trans->len].args[1] = (__u64)attrname; - trans->len++; - } - break; + break; - case Transaction::OP_CLONE: - { - coll_t cid; - t.get_cid(cid); - sobject_t oid; - t.get_oid(oid); - sobject_t noid; - t.get_oid(noid); - clone(cid, oid, noid); - - dout(10) << "clone " << cid << " " << oid << dendl; - char *ofn = new char[PATH_MAX]; - str.push_back(ofn); - char *nfn = new char[PATH_MAX]; - str.push_back(nfn); - get_coname(cid, oid, ofn); - get_coname(cid, noid, nfn); - - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_OPEN; - trans->ops[trans->len].args[0] = (__u64)nfn; - trans->ops[trans->len].args[1] = O_WRONLY; - trans->len++; - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_OPEN; - trans->ops[trans->len].args[0] = (__u64)ofn; - trans->ops[trans->len].args[1] = O_RDONLY; - trans->len++; - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_CLONE; - trans->ops[trans->len].args[0] = -2; - trans->ops[trans->len].args[1] = -1; - trans->len++; - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_CLOSE; - trans->ops[trans->len].args[0] = -1; - trans->len++; - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_CLOSE; - trans->ops[trans->len].args[0] = -2; - trans->len++; - } - break; + case Transaction::OP_CLONE: + { + coll_t cid = t->get_cid(); + + char *fn = new char[PATH_MAX]; + str.push_back(fn); + get_coname(cid, t->get_oid(), fn); + + char *fn2 = new char[PATH_MAX]; + str.push_back(fn2); + get_coname(cid, t->get_oid(), fn2); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_OPEN; + op.args[0] = (__u64)fn; + op.args[1] = O_RDONLY; + op.fd_num = 0; + ops.push_back(op); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_OPEN; + op.args[0] = (__u64)fn2; + op.args[1] = O_WRONLY|O_CREAT|O_TRUNC; + op.args[2] = 0644; + op.fd_num = 1; + ops.push_back(op); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_CLONERANGE; + op.args[0] = 1; + op.args[1] = 0; + op.args[2] = 0; + op.args[3] = 0; + op.flags = BTRFS_IOC_UT_OP_FLAG_FD_ARG0 | BTRFS_IOC_UT_OP_FLAG_FD_ARG1; + ops.push_back(op); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_CLOSE; + op.args[0] = 0; + op.flags = BTRFS_IOC_UT_OP_FLAG_FD_ARG0; + ops.push_back(op); + + op.args[0] = 1; + op.flags = BTRFS_IOC_UT_OP_FLAG_FD_ARG0; + ops.push_back(op); + } + break; + + case Transaction::OP_CLONERANGE: + { + coll_t cid = t->get_cid(); + + char *fn = new char[PATH_MAX]; + str.push_back(fn); + get_coname(cid, t->get_oid(), fn); + + char *fn2 = new char[PATH_MAX]; + str.push_back(fn2); + get_coname(cid, t->get_oid(), fn2); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_OPEN; + op.args[0] = (__u64)fn; + op.args[1] = O_RDONLY; + op.fd_num = 0; + ops.push_back(op); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_OPEN; + op.args[0] = (__u64)fn2; + op.args[1] = O_WRONLY|O_CREAT|O_TRUNC; + op.args[2] = 0644; + op.fd_num = 1; + ops.push_back(op); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_CLONERANGE; + op.args[0] = 1; + op.args[1] = 0; + op.args[2] = t->get_length(); // offset + op.args[3] = t->get_length(); // length + op.flags = BTRFS_IOC_UT_OP_FLAG_FD_ARG0 | BTRFS_IOC_UT_OP_FLAG_FD_ARG1; + ops.push_back(op); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_CLOSE; + op.args[0] = 0; + op.flags = BTRFS_IOC_UT_OP_FLAG_FD_ARG0; + ops.push_back(op); + + op.args[0] = 1; + ops.push_back(op); + } + break; - case Transaction::OP_MKCOLL: - { - coll_t cid; - t.get_cid(cid); - //create_collection(cid, 0); - dout(10) << "mkcoll " << cid << dendl; - char *fn = new char[PATH_MAX]; - str.push_back(fn); - get_cdir(cid, fn); - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_MKDIR; - trans->ops[trans->len].args[0] = (__u64)fn; - trans->ops[trans->len].args[1] = 0644; - trans->len++; - } - break; + case Transaction::OP_MKCOLL: + { + char *fn = new char[PATH_MAX]; + str.push_back(fn); + get_cdir(t->get_cid(), fn); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_MKDIR; + op.args[0] = (__u64)fn; + op.args[1] = 0755; + ops.push_back(op); + } + break; - case Transaction::OP_RMCOLL: - { - coll_t cid; - t.get_cid(cid); - //destroy_collection(cid, 0); - dout(10) << "rmcoll " << cid << dendl; - char *fn = new char[PATH_MAX]; - str.push_back(fn); - get_cdir(cid, fn); - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_RMDIR; - trans->ops[trans->len].args[0] = (__u64)fn; - trans->ops[trans->len].args[1] = 0644; - trans->len++; - } - break; + case Transaction::OP_RMCOLL: + { + char *fn = new char[PATH_MAX]; + str.push_back(fn); + get_cdir(t->get_cid(), fn); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_RMDIR; + op.args[0] = (__u64)fn; + ops.push_back(op); + } + break; - case Transaction::OP_COLL_ADD: - { - coll_t cid, ocid; - t.get_cid(cid); - t.get_cid(ocid); - sobject_t oid; - t.get_oid(oid); - collection_add(cid, ocid, oid, 0); - assert(0); - } - break; + case Transaction::OP_COLL_ADD: + { + const sobject_t& oid = t->get_oid(); + + char *fn = new char[PATH_MAX]; + str.push_back(fn); + get_coname(t->get_cid(), oid, fn); + + char *nfn = new char[PATH_MAX]; + str.push_back(nfn); + get_coname(t->get_cid(), oid, nfn); + + memset(&op, 0, sizeof(op)); + op.op = BTRFS_IOC_UT_OP_LINK; + op.args[0] = (__u64)fn; + op.args[1] = (__u64)nfn; + ops.push_back(op); + } + break; - case Transaction::OP_COLL_REMOVE: - { - coll_t cid; - t.get_cid(cid); - sobject_t oid; - t.get_oid(oid); - collection_remove(cid, oid, 0); + case Transaction::OP_STARTSYNC: + { + start_sync = true; + } + break; + + default: + cerr << "bad op " << opcode << std::endl; assert(0); } - break; - - case Transaction::OP_COLL_SETATTR: - { - coll_t cid; - t.get_cid(cid); - const char *attrname; - t.get_attrname(attrname); - bufferlist bl; - t.get_bl(bl); - dout(10) << "coll_setattr " << cid << dendl; - //collection_setattr(cid, attrname, bl.c_str(), bl.length(), 0); - char *fn = new char[PATH_MAX]; - str.push_back(fn); - get_cdir(cid, fn); - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_SETXATTR; - trans->ops[trans->len].args[0] = (__u64)fn; - char aname[ATTR_MAX]; - sprintf(aname, "user.ceph.%s", attrname); - trans->ops[trans->len].args[1] = (__u64)aname; - trans->ops[trans->len].args[2] = (__u64)bl.c_str(); - trans->ops[trans->len].args[3] = bl.length(); - trans->ops[trans->len].args[4] = 0; - trans->len++; - } - break; - - case Transaction::OP_COLL_RMATTR: - { - coll_t cid; - t.get_cid(cid); - const char *attrname; - t.get_attrname(attrname); - dout(10) << "coll_rmattr " << cid << dendl; - //collection_rmattr(cid, attrname, 0); - char *fn = new char[PATH_MAX]; - str.push_back(fn); - get_cdir(cid, fn); - trans->ops[trans->len].op = BTRFS_IOC_USERTRANS_REMOVEXATTR; - trans->ops[trans->len].args[0] = (__u64)fn; - trans->ops[trans->len].args[1] = (__u64)attrname; - trans->len++; - } - break; + } - - default: - cerr << "bad op " << op << std::endl; - assert(0); - } - } + ut.data_bytes += t->get_num_bytes(); + } - dout(20) << "apply_transaction final btrfs usertrans len is " << trans->len << dendl; - assert((int)trans->len <= (int)len); + ut.num_ops = ops.size(); + ut.ops_ptr = (unsigned long long)&ops[0]; + ut.num_fds = 2; + ut.metadata_ops = ops.size(); + ut.flags = 0; - // apply - int r = 0; - if (trans->len) { - r = ::ioctl(fsid_fd, BTRFS_IOC_USERTRANS, (unsigned long)trans); - if (r < 0) { - derr(0) << "apply_transaction_end got " << strerror_r(errno, buf, sizeof(buf)) - << " from btrfs usertrans ioctl" << dendl; - r = -errno; - } - } - delete[] (char *)trans; + dout(10) << "do_usertrans on " << ops.size() << " ops" << dendl; + int r = ::ioctl(op_fd, BTRFS_IOC_USERTRANS, &ut); + dout(10) << "do_usertrans on " << ops.size() << " ops = " << r << dendl; + if (r >= 0) { + assert(r == (int)ops.size()); + for (unsigned i=0; i= 0) - journal_transaction(t, onsafe); - else - delete onsafe; + } return r; -#endif /* DARWIN */ } -#endif - // -------------------- diff --git a/src/os/FileStore.h b/src/os/FileStore.h index 1372d9ca6976e..f91bbbd867e99 100644 --- a/src/os/FileStore.h +++ b/src/os/FileStore.h @@ -38,7 +38,7 @@ class FileStore : public JournalingObjectStore { __u64 fsid; int btrfs; - bool btrfs_trans_resv_start; + bool btrfs_usertrans; bool btrfs_trans_start_end; int fsid_fd, op_fd; @@ -81,7 +81,7 @@ class FileStore : public JournalingObjectStore { public: FileStore(const char *base, const char *jdev = 0) : basedir(base), journalpath(jdev ? jdev:""), - btrfs(false), btrfs_trans_resv_start(false), btrfs_trans_start_end(false), + btrfs(false), btrfs_trans_start_end(false), fsid_fd(-1), op_fd(-1), attrs(this), fake_attrs(false), collections(this), fake_collections(false), @@ -99,6 +99,7 @@ class FileStore : public JournalingObjectStore { int _transaction_start(__u64 bytes, __u64 ops); void _transaction_finish(int id); unsigned _apply_transaction(Transaction& t); + int _do_usertrans(list& tls); // ------------------ // objects diff --git a/src/os/btrfs_ioctl.h b/src/os/btrfs_ioctl.h new file mode 100644 index 0000000000000..138e8979ef9cf --- /dev/null +++ b/src/os/btrfs_ioctl.h @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __IOCTL_ +#define __IOCTL_ +#include + +#define BTRFS_IOCTL_MAGIC 0x94 +#define BTRFS_VOL_NAME_MAX 255 +#define BTRFS_PATH_NAME_MAX 4087 + +/* this should be 4k */ +struct btrfs_ioctl_vol_args { + __s64 fd; + char name[BTRFS_PATH_NAME_MAX + 1]; +}; + +struct btrfs_ioctl_clone_range_args { + __s64 src_fd; + __u64 src_offset, src_length; + __u64 dest_offset; +}; + +#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ + struct btrfs_ioctl_vol_args) +/* trans start and trans end are dangerous, and only for + * use by applications that know how to avoid the + * resulting deadlocks + */ +#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) +#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) +#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) + +#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) +#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ + struct btrfs_ioctl_vol_args) + +#define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ + struct btrfs_ioctl_clone_range_args) + +#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ + struct btrfs_ioctl_vol_args) + +/* usertrans ops */ +#define BTRFS_IOC_UT_OP_OPEN 1 +#define BTRFS_IOC_UT_OP_CLOSE 2 +#define BTRFS_IOC_UT_OP_PWRITE 3 +#define BTRFS_IOC_UT_OP_UNLINK 4 +#define BTRFS_IOC_UT_OP_LINK 5 +#define BTRFS_IOC_UT_OP_MKDIR 6 +#define BTRFS_IOC_UT_OP_RMDIR 7 +#define BTRFS_IOC_UT_OP_TRUNCATE 8 +#define BTRFS_IOC_UT_OP_SETXATTR 9 +#define BTRFS_IOC_UT_OP_REMOVEXATTR 10 +#define BTRFS_IOC_UT_OP_CLONERANGE 11 + +#define BTRFS_IOC_UT_OP_FLAG_FD_SAVE (1<< 1) +#define BTRFS_IOC_UT_OP_FLAG_FD_ARG0 (1<< 2) +#define BTRFS_IOC_UT_OP_FLAG_FD_ARG1 (1<< 3) +#define BTRFS_IOC_UT_OP_FLAG_FD_ARG2 (1<< 4) +#define BTRFS_IOC_UT_OP_FLAG_FD_ARG3 (1<< 5) +#define BTRFS_IOC_UT_OP_FLAG_FD_ARG4 (1<< 6) +#define BTRFS_IOC_UT_OP_FLAG_FAIL_ON_NE (1<< 7) +#define BTRFS_IOC_UT_OP_FLAG_FAIL_ON_EQ (1<< 8) +#define BTRFS_IOC_UT_OP_FLAG_FAIL_ON_LT (1<< 9) +#define BTRFS_IOC_UT_OP_FLAG_FAIL_ON_GT (1<<10) +#define BTRFS_IOC_UT_OP_FLAG_FAIL_ON_LTE (1<<11) +#define BTRFS_IOC_UT_OP_FLAG_FAIL_ON_GTE (1<<12) + +struct btrfs_ioctl_usertrans_op { + __u64 op; + __s64 args[5]; + __s64 rval; + __u64 flags; + __u64 fd_num; +}; + +/* + * If an op fails and we cannot complete the transaction, we may want + * to lock up the file system (requiring a reboot) to prevent a + * partial result from committing. + */ +#define BTRFS_IOC_UT_FLAG_WEDGEONFAIL (1<<13) + +struct btrfs_ioctl_usertrans { + __u64 num_ops; /* in: # ops */ + __u64 ops_ptr; /* in: usertrans_op array */ + __u64 num_fds; /* in: max fds we use */ + __u64 data_bytes, metadata_ops; /* in: for space reservation */ + __u64 flags; /* in: flags */ + __u64 ops_completed; /* out: # ops completed */ +}; + +#define BTRFS_IOC_USERTRANS _IOW(BTRFS_IOCTL_MAGIC, 16, \ + struct btrfs_ioctl_usertrans) + +#endif diff --git a/src/push_to_kclient.pl b/src/push_to_kclient.pl index 8c0d501c6958b..f76a2bb91f409 100755 --- a/src/push_to_kclient.pl +++ b/src/push_to_kclient.pl @@ -24,8 +24,9 @@ for (@files) { system "cp -v $dir/$orig $kernel/$new"; } -print "pulling changed shared files from $dir to $kernel...\n"; +print "pulling changed shared files from $kernel to $dir...\n"; system "cp -v $kernel/fs/ceph/ioctl.h $dir/src/client/ioctl.h"; +system "cp -v $kernel/fs/btrfs/ioctl.h $dir/src/os/btrfs_ioctl.h"; print "done.\n"; -- 2.39.5