From: Roald J. van Loon Date: Sat, 7 Sep 2013 13:36:07 +0000 (+0200) Subject: automake cleanup: moving rados tool to tools subdir X-Git-Tag: v0.71~163^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=417f1f2a9d92fd1ad36089b1bf722f192d3cadb5;p=ceph.git automake cleanup: moving rados tool to tools subdir Signed-off-by: Roald J. van Loon --- diff --git a/src/Makefile.am b/src/Makefile.am index 634d5245ac6..7ed2d8828bd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -479,7 +479,12 @@ librbd_la_LDFLAGS = ${AM_LDFLAGS} -version-info 1:0:0 \ -export-symbols-regex '^rbd_.*' $(PTHREAD_LIBS) $(EXTRALIBS) lib_LTLIBRARIES += librbd.la -rados_SOURCES = rados.cc rados_import.cc rados_export.cc rados_sync.cc common/obj_bencher.cc +rados_SOURCES = \ + tools/rados/rados.cc \ + tools/rados/rados_import.cc \ + tools/rados/rados_export.cc \ + tools/rados/rados_sync.cc \ + common/obj_bencher.cc rados_LDADD = libcls_lock_client.a librados.la $(LIBGLOBAL_LDA) bin_PROGRAMS += rados @@ -1784,7 +1789,7 @@ python_PYTHON = pybind/rados.py \ # headers... and everything else we want to include in a 'make dist' # that autotools doesn't magically identify. noinst_HEADERS = \ - rados_sync.h \ + tools/rados/rados_sync.h \ arch/probe.h \ arch/intel.h \ arch/neon.h \ diff --git a/src/rados.cc b/src/rados.cc deleted file mode 100644 index 0b7cc2b9cb7..00000000000 --- a/src/rados.cc +++ /dev/null @@ -1,2352 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2004-2006 Sage Weil - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#include "include/types.h" - -#include "include/rados/librados.hpp" -#include "include/rados/rados_types.hpp" -#include "rados_sync.h" -using namespace librados; - -#include "common/config.h" -#include "common/ceph_argparse.h" -#include "global/global_init.h" -#include "common/Cond.h" -#include "common/debug.h" -#include "common/errno.h" -#include "common/Formatter.h" -#include "common/obj_bencher.h" -#include "mds/inode_backtrace.h" -#include "auth/Crypto.h" -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cls/lock/cls_lock_client.h" - -int rados_tool_sync(const std::map < std::string, std::string > &opts, - std::vector &args); - -// two steps seem to be necessary to do this right -#define STR(x) _STR(x) -#define _STR(x) #x - -void usage(ostream& out) -{ - out << \ -"usage: rados [options] [commands]\n" -"POOL COMMANDS\n" -" lspools list pools\n" -" mkpool [123[ 4]] create pool '\n" -" [with auid 123[and using crush rule 4]]\n" -" cppool copy content of a pool\n" -" rmpool [ --yes-i-really-really-mean-it]\n" -" remove pool '\n" -" df show per-pool and total usage\n" -" ls list objects in pool\n\n" -" chown 123 change the pool owner to auid 123\n" -"\n" -"OBJECT COMMANDS\n" -" get [outfile] fetch object\n" -" put [infile] write object\n" -" truncate length truncate object\n" -" create [category] create object\n" -" rm ... remove object(s)\n" -" cp [target-obj] copy object\n" -" clonedata clone object data\n" -" listxattr \n" -" getxattr attr\n" -" setxattr attr val\n" -" rmxattr attr\n" -" stat objname stat the named object\n" -" mapext \n" -" lssnap list snaps\n" -" mksnap create snap \n" -" rmsnap remove snap \n" -" rollback roll back object to snap \n" -"\n" -" listsnaps list the snapshots of this object\n" -" bench write|seq|rand [-t concurrent_operations] [--no-cleanup]\n" -" default is 16 concurrent IOs and 4 MB ops\n" -" default is to clean up after write benchmark\n" -" cleanup clean up a previous benchmark operation\n" -" load-gen [options] generate load on the cluster\n" -" listomapkeys list the keys in the object map\n" -" listomapvals list the keys and vals in the object map \n" -" getomapval show the value for the specified key\n" -" in the object's object map\n" -" setomapval \n" -" rmomapkey \n" -" getomapheader \n" -" setomapheader \n" -" listwatchers list the watchers of this object\n" -"\n" -"IMPORT AND EXPORT\n" -" import [options] \n" -" Upload to \n" -" export [options] rados-pool> \n" -" Download to \n" -" options:\n" -" -f / --force Copy everything, even if it hasn't changed.\n" -" -d / --delete-after After synchronizing, delete unreferenced\n" -" files or objects from the target bucket\n" -" or directory.\n" -" --workers Number of worker threads to spawn \n" -" (default " STR(DEFAULT_NUM_RADOS_WORKER_THREADS) ")\n" -"\n" -"ADVISORY LOCKS\n" -" lock list \n" -" List all advisory locks on an object\n" -" lock get \n" -" Try to acquire a lock\n" -" lock break \n" -" Try to break a lock acquired by another client\n" -" lock info \n" -" Show lock information\n" -" options:\n" -" --lock-tag Lock tag, all locks operation should use\n" -" the same tag\n" -" --lock-cookie Locker cookie\n" -" --lock-description Description of lock\n" -" --lock-duration Lock duration (in seconds)\n" -" --lock-type Lock type (shared, exclusive)\n" -"\n" -"GLOBAL OPTIONS:\n" -" --object_locator object_locator\n" -" set object_locator for operation\n" -" -p pool\n" -" --pool=pool\n" -" select given pool by name\n" -" --target-pool=pool\n" -" select target pool by name\n" -" -b op_size\n" -" set the size of write ops for put or benchmarking\n" -" -s name\n" -" --snap name\n" -" select given snap name for (read) IO\n" -" -i infile\n" -" -o outfile\n" -" specify input or output file (for certain commands)\n" -" --create\n" -" create the pool or directory that was specified\n" -" -N namespace\n" -" --namespace=namespace\n" -" specify the namespace to use for the object\n" -"\n" -"BENCH OPTIONS:\n" -" -t N\n" -" --concurrent-ios=N\n" -" Set number of concurrent I/O operations\n" -" --show-time\n" -" prefix output with date/time\n" -"\n" -"LOAD GEN OPTIONS:\n" -" --num-objects total number of objects\n" -" --min-object-size min object size\n" -" --max-object-size max object size\n" -" --min-ops min number of operations\n" -" --max-ops max number of operations\n" -" --max-backlog max backlog (in MB)\n" -" --percent percent of operations that are read\n" -" --target-throughput target throughput (in MB)\n" -" --run-length total time (in seconds)\n"; - -} - -static void usage_exit() -{ - usage(cerr); - exit(1); -} - -static int do_get(IoCtx& io_ctx, const char *objname, const char *outfile, unsigned op_size) -{ - string oid(objname); - - int fd; - if (strcmp(outfile, "-") == 0) { - fd = 1; - } else { - fd = TEMP_FAILURE_RETRY(::open(outfile, O_WRONLY|O_CREAT|O_TRUNC, 0644)); - if (fd < 0) { - int err = errno; - cerr << "failed to open file: " << cpp_strerror(err) << std::endl; - return -err; - } - } - - uint64_t offset = 0; - int ret; - while (true) { - bufferlist outdata; - ret = io_ctx.read(oid, outdata, op_size, offset); - if (ret <= 0) { - goto out; - } - ret = outdata.write_fd(fd); - if (ret < 0) { - cerr << "error writing to file: " << cpp_strerror(ret) << std::endl; - goto out; - } - if (outdata.length() < op_size) - break; - offset += outdata.length(); - } - ret = 0; - - out: - if (fd != 1) - TEMP_FAILURE_RETRY(::close(fd)); - return ret; -} - -static int do_copy(IoCtx& io_ctx, const char *objname, IoCtx& target_ctx, const char *target_obj) -{ - string oid(objname); - bufferlist outdata; - librados::ObjectReadOperation read_op; - string start_after; - -#define COPY_CHUNK_SIZE (4 * 1024 * 1024) - read_op.read(0, COPY_CHUNK_SIZE, &outdata, NULL); - - map attrset; - read_op.getxattrs(&attrset, NULL); - - bufferlist omap_header; - read_op.omap_get_header(&omap_header, NULL); - -#define OMAP_CHUNK 1000 - map omap; - read_op.omap_get_vals(start_after, OMAP_CHUNK, &omap, NULL); - - bufferlist opbl; - int ret = io_ctx.operate(oid, &read_op, &opbl); - if (ret < 0) { - return ret; - } - - librados::ObjectWriteOperation write_op; - string target_oid(target_obj); - - /* reset dest if exists */ - write_op.create(false); - write_op.remove(); - - write_op.write_full(outdata); - write_op.omap_set_header(omap_header); - - map::iterator iter; - for (iter = attrset.begin(); iter != attrset.end(); ++iter) { - write_op.setxattr(iter->first.c_str(), iter->second); - } - if (!omap.empty()) { - write_op.omap_set(omap); - } - ret = target_ctx.operate(target_oid, &write_op); - if (ret < 0) { - return ret; - } - - uint64_t off = 0; - - while (outdata.length() == COPY_CHUNK_SIZE) { - off += outdata.length(); - outdata.clear(); - ret = io_ctx.read(oid, outdata, COPY_CHUNK_SIZE, off); - if (ret < 0) - goto err; - - ret = target_ctx.write(target_oid, outdata, outdata.length(), off); - if (ret < 0) - goto err; - } - - /* iterate through source omap and update target. This is not atomic */ - while (omap.size() == OMAP_CHUNK) { - /* now start_after should point at the last entry */ - map::iterator iter = omap.end(); - --iter; - start_after = iter->first; - - omap.clear(); - ret = io_ctx.omap_get_vals(oid, start_after, OMAP_CHUNK, &omap); - if (ret < 0) - goto err; - - if (omap.empty()) - break; - - ret = target_ctx.omap_set(target_oid, omap); - if (ret < 0) - goto err; - } - - return 0; - -err: - target_ctx.remove(target_oid); - return ret; -} - -static int do_clone_data(IoCtx& io_ctx, const char *objname, IoCtx& target_ctx, const char *target_obj) -{ - string oid(objname); - - // get size - uint64_t size; - int r = target_ctx.stat(oid, &size, NULL); - if (r < 0) - return r; - - librados::ObjectWriteOperation write_op; - string target_oid(target_obj); - - /* reset data stream only */ - write_op.create(false); - write_op.truncate(0); - write_op.clone_range(0, oid, 0, size); - return target_ctx.operate(target_oid, &write_op); -} - -static int do_copy_pool(Rados& rados, const char *src_pool, const char *target_pool) -{ - IoCtx src_ctx, target_ctx; - int ret = rados.ioctx_create(src_pool, src_ctx); - if (ret < 0) { - cerr << "cannot open source pool: " << src_pool << std::endl; - return ret; - } - ret = rados.ioctx_create(target_pool, target_ctx); - if (ret < 0) { - cerr << "cannot open target pool: " << target_pool << std::endl; - return ret; - } - librados::ObjectIterator i = src_ctx.objects_begin(); - librados::ObjectIterator i_end = src_ctx.objects_end(); - for (; i != i_end; ++i) { - string oid = i->first; - string locator = i->second; - if (i->second.size()) - cout << src_pool << ":" << oid << "(@" << locator << ")" << " => " - << target_pool << ":" << oid << "(@" << locator << ")" << std::endl; - else - cout << src_pool << ":" << oid << " => " - << target_pool << ":" << oid << std::endl; - - - target_ctx.locator_set_key(locator); - ret = do_copy(src_ctx, oid.c_str(), target_ctx, oid.c_str()); - if (ret < 0) { - char buf[64]; - cerr << "error copying object: " << strerror_r(errno, buf, sizeof(buf)) << std::endl; - return ret; - } - } - - return 0; -} - -static int do_put(IoCtx& io_ctx, const char *objname, const char *infile, int op_size) -{ - string oid(objname); - bufferlist indata; - bool stdio = false; - if (strcmp(infile, "-") == 0) - stdio = true; - - int ret; - int fd = 0; - if (!stdio) - fd = open(infile, O_RDONLY); - if (fd < 0) { - char buf[80]; - cerr << "error reading input file " << infile << ": " << strerror_r(errno, buf, sizeof(buf)) << std::endl; - return 1; - } - char *buf = new char[op_size]; - int count = op_size; - uint64_t offset = 0; - while (count != 0) { - count = read(fd, buf, op_size); - if (count < 0) { - ret = -errno; - cerr << "error reading input file " << infile << ": " << cpp_strerror(ret) << std::endl; - goto out; - } - if (count == 0) { - if (!offset) { - ret = io_ctx.create(oid, true); - if (ret < 0) { - cerr << "WARNING: could not create object: " << oid << std::endl; - goto out; - } - } - continue; - } - indata.append(buf, count); - if (offset == 0) - ret = io_ctx.write_full(oid, indata); - else - ret = io_ctx.write(oid, indata, count, offset); - indata.clear(); - - if (ret < 0) { - goto out; - } - offset += count; - } - ret = 0; - out: - TEMP_FAILURE_RETRY(close(fd)); - delete[] buf; - return ret; -} - -class RadosWatchCtx : public librados::WatchCtx { - string name; -public: - RadosWatchCtx(const char *imgname) : name(imgname) {} - virtual ~RadosWatchCtx() {} - virtual void notify(uint8_t opcode, uint64_t ver, bufferlist& bl) { - string s; - try { - bufferlist::iterator iter = bl.begin(); - ::decode(s, iter); - } catch (buffer::error *err) { - cout << "could not decode bufferlist, buffer length=" << bl.length() << std::endl; - } - cout << name << " got notification opcode=" << (int)opcode << " ver=" << ver << " msg='" << s << "'" << std::endl; - } -}; - -static const char alphanum_table[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; - -int gen_rand_alphanumeric(char *dest, int size) /* size should be the required string size + 1 */ -{ - int ret = get_random_bytes(dest, size); - if (ret < 0) { - cerr << "cannot get random bytes: " << cpp_strerror(-ret) << std::endl; - return -1; - } - - int i; - for (i=0; i objs; - - utime_t start_time; - - bool going_down; - -public: - int read_percent; - int num_objs; - size_t min_obj_len; - uint64_t max_obj_len; - size_t min_op_len; - size_t max_op_len; - size_t max_ops; - size_t max_backlog; - size_t target_throughput; - int run_length; - - enum { - OP_READ, - OP_WRITE, - }; - - struct LoadGenOp { - int id; - int type; - string oid; - size_t off; - size_t len; - bufferlist bl; - LoadGen *lg; - librados::AioCompletion *completion; - - LoadGenOp() {} - LoadGenOp(LoadGen *_lg) : lg(_lg), completion(NULL) {} - }; - - int max_op; - - map pending_ops; - - void gen_op(LoadGenOp *op); - uint64_t gen_next_op(); - void run_op(LoadGenOp *op); - - uint64_t cur_sent_rate() { - return total_sent / time_passed(); - } - - uint64_t cur_completed_rate() { - return total_completed / time_passed(); - } - - uint64_t total_expected() { - return target_throughput * time_passed(); - } - - float time_passed() { - utime_t now = ceph_clock_now(g_ceph_context); - now -= start_time; - uint64_t ns = now.nsec(); - float total = ns / 1000000000; - total += now.sec(); - return total; - } - - Mutex lock; - Cond cond; - - LoadGen(Rados *_rados) : rados(_rados), going_down(false), lock("LoadGen") { - read_percent = 80; - min_obj_len = 1024; - max_obj_len = 5ull * 1024ull * 1024ull * 1024ull; - min_op_len = 1024; - target_throughput = 5 * 1024 * 1024; // B/sec - max_op_len = 2 * 1024 * 1024; - max_backlog = target_throughput * 2; - run_length = 60; - - total_sent = 0; - total_completed = 0; - num_objs = 200; - max_op = 16; - } - int bootstrap(const char *pool); - int run(); - void cleanup(); - - void io_cb(completion_t c, LoadGenOp *op) { - total_completed += op->len; - - Mutex::Locker l(lock); - - double rate = (double)cur_completed_rate() / (1024 * 1024); - cout.precision(3); - cout << "op " << op->id << " completed, throughput=" << rate << "MB/sec" << std::endl; - - map::iterator iter = pending_ops.find(op->id); - if (iter != pending_ops.end()) - pending_ops.erase(iter); - - if (!going_down) - op->completion->release(); - - delete op; - - cond.Signal(); - } -}; - -static void _load_gen_cb(completion_t c, void *param) -{ - LoadGen::LoadGenOp *op = (LoadGen::LoadGenOp *)param; - op->lg->io_cb(c, op); -} - -int LoadGen::bootstrap(const char *pool) -{ - char buf[128]; - int i; - - if (!pool) { - cerr << "ERROR: pool name was not specified" << std::endl; - return -EINVAL; - } - - int ret = rados->ioctx_create(pool, io_ctx); - if (ret < 0) { - cerr << "error opening pool " << pool << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - return ret; - } - - int buf_len = 1; - bufferptr p = buffer::create(buf_len); - bufferlist bl; - memset(p.c_str(), 0, buf_len); - bl.push_back(p); - - list completions; - for (i = 0; i < num_objs; i++) { - obj_info info; - gen_rand_alphanumeric(buf, 16); - info.name = "obj-"; - info.name.append(buf); - info.len = get_random(min_obj_len, max_obj_len); - - // throttle... - while (completions.size() > max_ops) { - AioCompletion *c = completions.front(); - c->wait_for_complete(); - ret = c->get_return_value(); - c->release(); - completions.pop_front(); - if (ret < 0) { - cerr << "aio_write failed" << std::endl; - return ret; - } - } - - librados::AioCompletion *c = rados->aio_create_completion(NULL, NULL, NULL); - completions.push_back(c); - // generate object - ret = io_ctx.aio_write(info.name, c, bl, buf_len, info.len - buf_len); - if (ret < 0) { - cerr << "couldn't write obj: " << info.name << " ret=" << ret << std::endl; - return ret; - } - objs[i] = info; - } - - list::iterator iter; - for (iter = completions.begin(); iter != completions.end(); ++iter) { - AioCompletion *c = *iter; - c->wait_for_complete(); - ret = c->get_return_value(); - c->release(); - if (ret < 0) { // yes, we leak. - cerr << "aio_write failed" << std::endl; - return ret; - } - } - return 0; -} - -void LoadGen::run_op(LoadGenOp *op) -{ - op->completion = rados->aio_create_completion(op, _load_gen_cb, NULL); - - switch (op->type) { - case OP_READ: - io_ctx.aio_read(op->oid, op->completion, &op->bl, op->len, op->off); - break; - case OP_WRITE: - bufferptr p = buffer::create(op->len); - memset(p.c_str(), 0, op->len); - op->bl.push_back(p); - - io_ctx.aio_write(op->oid, op->completion, op->bl, op->len, op->off); - break; - } - - total_sent += op->len; -} - -void LoadGen::gen_op(LoadGenOp *op) -{ - int i = get_random(0, objs.size() - 1); - obj_info& info = objs[i]; - op->oid = info.name; - - size_t len = get_random(min_op_len, max_op_len); - if (len > info.len) - len = info.len; - size_t off = get_random(0, info.len); - - if (off + len > info.len) - off = info.len - len; - - op->off = off; - op->len = len; - - i = get_random(1, 100); - if (i > read_percent) - op->type = OP_WRITE; - else - op->type = OP_READ; - - cout << (op->type == OP_READ ? "READ" : "WRITE") << " : oid=" << op->oid << " off=" << op->off << " len=" << op->len << std::endl; -} - -uint64_t LoadGen::gen_next_op() -{ - lock.Lock(); - - LoadGenOp *op = new LoadGenOp(this); - gen_op(op); - op->id = max_op++; - pending_ops[op->id] = op; - - lock.Unlock(); - - run_op(op); - - return op->len; -} - -int LoadGen::run() -{ - start_time = ceph_clock_now(g_ceph_context); - utime_t end_time = start_time; - end_time += run_length; - utime_t stamp_time = start_time; - uint32_t total_sec = 0; - - while (1) { - lock.Lock(); - utime_t one_second(1, 0); - cond.WaitInterval(g_ceph_context, lock, one_second); - lock.Unlock(); - utime_t now = ceph_clock_now(g_ceph_context); - - if (now > end_time) - break; - - uint64_t expected = total_expected(); - lock.Lock(); - uint64_t sent = total_sent; - uint64_t completed = total_completed; - lock.Unlock(); - - if (now - stamp_time >= utime_t(1, 0)) { - double rate = (double)cur_completed_rate() / (1024 * 1024); - ++total_sec; - cout.precision(3); - cout << setw(5) << total_sec << ": throughput=" << rate << "MB/sec" << " pending data=" << sent - completed << std::endl; - stamp_time = now; - } - - while (sent < expected && - sent - completed < max_backlog && - pending_ops.size() < max_ops) { - sent += gen_next_op(); - } - } - - // get a reference to all pending requests - vector completions; - lock.Lock(); - going_down = true; - map::iterator iter; - for (iter = pending_ops.begin(); iter != pending_ops.end(); ++iter) { - LoadGenOp *op = iter->second; - completions.push_back(op->completion); - } - lock.Unlock(); - - cout << "waiting for all operations to complete" << std::endl; - - // now wait on all the pending requests - for (vector::iterator citer = completions.begin(); citer != completions.end(); ++citer) { - librados::AioCompletion *c = *citer; - c->wait_for_complete(); - c->release(); - } - - return 0; -} - -void LoadGen::cleanup() -{ - cout << "cleaning up objects" << std::endl; - map::iterator iter; - for (iter = objs.begin(); iter != objs.end(); ++iter) { - obj_info& info = iter->second; - int ret = io_ctx.remove(info.name); - if (ret < 0) - cerr << "couldn't remove obj: " << info.name << " ret=" << ret << std::endl; - } -} - - -class RadosBencher : public ObjBencher { - librados::AioCompletion **completions; - librados::Rados& rados; - librados::IoCtx& io_ctx; - librados::ObjectIterator oi; - bool iterator_valid; -protected: - int completions_init(int concurrentios) { - completions = new librados::AioCompletion *[concurrentios]; - return 0; - } - void completions_done() { - delete[] completions; - completions = NULL; - } - int create_completion(int slot, void (*cb)(void *, void*), void *arg) { - completions[slot] = rados.aio_create_completion((void *) arg, 0, cb); - - if (!completions[slot]) - return -EINVAL; - - return 0; - } - void release_completion(int slot) { - completions[slot]->release(); - completions[slot] = 0; - } - - int aio_read(const std::string& oid, int slot, bufferlist *pbl, size_t len) { - return io_ctx.aio_read(oid, completions[slot], pbl, len, 0); - } - - int aio_write(const std::string& oid, int slot, bufferlist& bl, size_t len) { - return io_ctx.aio_write(oid, completions[slot], bl, len, 0); - } - - int aio_remove(const std::string& oid, int slot) { - return io_ctx.aio_remove(oid, completions[slot]); - } - - int sync_read(const std::string& oid, bufferlist& bl, size_t len) { - return io_ctx.read(oid, bl, len, 0); - } - int sync_write(const std::string& oid, bufferlist& bl, size_t len) { - return io_ctx.write(oid, bl, len, 0); - } - - int sync_remove(const std::string& oid) { - return io_ctx.remove(oid); - } - - bool completion_is_done(int slot) { - return completions[slot]->is_safe(); - } - - int completion_wait(int slot) { - return completions[slot]->wait_for_safe_and_cb(); - } - int completion_ret(int slot) { - return completions[slot]->get_return_value(); - } - - bool get_objects(std::list* objects, int num) { - int count = 0; - - if (!iterator_valid) { - oi = io_ctx.objects_begin(); - iterator_valid = true; - } - - librados::ObjectIterator ei = io_ctx.objects_end(); - - if (oi == ei) { - iterator_valid = false; - return false; - } - - objects->clear(); - for ( ; oi != ei && count < num; ++oi) { - objects->push_back(oi->first); - ++count; - } - - return true; - } - -public: - RadosBencher(CephContext *cct_, librados::Rados& _r, librados::IoCtx& _i) - : ObjBencher(cct), completions(NULL), rados(_r), io_ctx(_i), iterator_valid(false) {} - ~RadosBencher() { } -}; - -static int do_lock_cmd(std::vector &nargs, - const std::map < std::string, std::string > &opts, - IoCtx *ioctx, - Formatter *formatter) -{ - char buf[128]; - - if (nargs.size() < 3) - usage_exit(); - - string cmd(nargs[1]); - string oid(nargs[2]); - - string lock_tag; - string lock_cookie; - string lock_description; - int lock_duration = 0; - ClsLockType lock_type = LOCK_EXCLUSIVE; - - map::const_iterator i; - i = opts.find("lock-tag"); - if (i != opts.end()) { - lock_tag = i->second; - } - i = opts.find("lock-cookie"); - if (i != opts.end()) { - lock_cookie = i->second; - } - i = opts.find("lock-description"); - if (i != opts.end()) { - lock_description = i->second; - } - i = opts.find("lock-duration"); - if (i != opts.end()) { - lock_duration = strtol(i->second.c_str(), NULL, 10); - } - i = opts.find("lock-type"); - if (i != opts.end()) { - const string& type_str = i->second; - if (type_str.compare("exclusive") == 0) { - lock_type = LOCK_EXCLUSIVE; - } else if (type_str.compare("shared") == 0) { - lock_type = LOCK_SHARED; - } else { - cerr << "unknown lock type was specified, aborting" << std::endl; - return -EINVAL; - } - } - - if (cmd.compare("list") == 0) { - list locks; - int ret = rados::cls::lock::list_locks(ioctx, oid, &locks); - if (ret < 0) { - cerr << "ERROR: rados_list_locks(): " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - return ret; - } - - formatter->open_object_section("object"); - formatter->dump_string("objname", oid); - formatter->open_array_section("locks"); - list::iterator iter; - for (iter = locks.begin(); iter != locks.end(); ++iter) { - formatter->open_object_section("lock"); - formatter->dump_string("name", *iter); - formatter->close_section(); - } - formatter->close_section(); - formatter->close_section(); - formatter->flush(cout); - return 0; - } - - if (nargs.size() < 4) - usage_exit(); - - string lock_name(nargs[3]); - - if (cmd.compare("info") == 0) { - map lockers; - ClsLockType type = LOCK_NONE; - string tag; - int ret = rados::cls::lock::get_lock_info(ioctx, oid, lock_name, &lockers, &type, &tag); - if (ret < 0) { - cerr << "ERROR: rados_lock_get_lock_info(): " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - return ret; - } - - formatter->open_object_section("lock"); - formatter->dump_string("name", lock_name); - formatter->dump_string("type", cls_lock_type_str(type)); - formatter->dump_string("tag", tag); - formatter->open_array_section("lockers"); - map::iterator iter; - for (iter = lockers.begin(); iter != lockers.end(); ++iter) { - const rados::cls::lock::locker_id_t& id = iter->first; - const rados::cls::lock::locker_info_t& info = iter->second; - formatter->open_object_section("locker"); - formatter->dump_stream("name") << id.locker; - formatter->dump_string("cookie", id.cookie); - formatter->dump_string("description", info.description); - formatter->dump_stream("expiration") << info.expiration; - formatter->dump_stream("addr") << info.addr; - formatter->close_section(); - } - formatter->close_section(); - formatter->close_section(); - formatter->flush(cout); - - return ret; - } else if (cmd.compare("get") == 0) { - rados::cls::lock::Lock l(lock_name); - l.set_cookie(lock_cookie); - l.set_tag(lock_tag); - l.set_duration(utime_t(lock_duration, 0)); - l.set_description(lock_description); - int ret; - switch (lock_type) { - case LOCK_SHARED: - ret = l.lock_shared(ioctx, oid); - break; - default: - ret = l.lock_exclusive(ioctx, oid); - } - if (ret < 0) { - cerr << "ERROR: failed locking: " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - return ret; - } - - return ret; - } - - if (nargs.size() < 5) - usage_exit(); - - if (cmd.compare("break") == 0) { - string locker(nargs[4]); - rados::cls::lock::Lock l(lock_name); - l.set_cookie(lock_cookie); - l.set_tag(lock_tag); - entity_name_t name; - if (!name.parse(locker)) { - cerr << "ERROR: failed to parse locker name (" << locker << ")" << std::endl; - return -EINVAL; - } - int ret = l.break_lock(ioctx, oid, name); - if (ret < 0) { - cerr << "ERROR: failed breaking lock: " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - return ret; - } - } else { - usage_exit(); - } - - return 0; -} - -/********************************************** - -**********************************************/ -static int rados_tool_common(const std::map < std::string, std::string > &opts, - std::vector &nargs) -{ - int ret; - bool create_pool = false; - const char *pool_name = NULL; - const char *target_pool_name = NULL; - string oloc, target_oloc, nspace; - int concurrent_ios = 16; - int op_size = 1 << 22; - bool cleanup = true; - const char *snapname = NULL; - snap_t snapid = CEPH_NOSNAP; - std::map::const_iterator i; - std::string category; - - uint64_t min_obj_len = 0; - uint64_t max_obj_len = 0; - uint64_t min_op_len = 0; - uint64_t max_op_len = 0; - uint64_t max_ops = 0; - uint64_t max_backlog = 0; - uint64_t target_throughput = 0; - int64_t read_percent = -1; - uint64_t num_objs = 0; - int run_length = 0; - - bool show_time = false; - - Formatter *formatter = NULL; - bool pretty_format = false; - - Rados rados; - IoCtx io_ctx; - - i = opts.find("create"); - if (i != opts.end()) { - create_pool = true; - } - i = opts.find("pool"); - if (i != opts.end()) { - pool_name = i->second.c_str(); - } - i = opts.find("target_pool"); - if (i != opts.end()) { - target_pool_name = i->second.c_str(); - } - i = opts.find("object_locator"); - if (i != opts.end()) { - oloc = i->second; - } - i = opts.find("target_locator"); - if (i != opts.end()) { - target_oloc = i->second; - } - i = opts.find("category"); - if (i != opts.end()) { - category = i->second; - } - i = opts.find("concurrent-ios"); - if (i != opts.end()) { - concurrent_ios = strtol(i->second.c_str(), NULL, 10); - } - i = opts.find("block-size"); - if (i != opts.end()) { - op_size = strtol(i->second.c_str(), NULL, 10); - } - i = opts.find("snap"); - if (i != opts.end()) { - snapname = i->second.c_str(); - } - i = opts.find("snapid"); - if (i != opts.end()) { - snapid = strtoll(i->second.c_str(), NULL, 10); - } - i = opts.find("min-object-size"); - if (i != opts.end()) { - min_obj_len = strtoll(i->second.c_str(), NULL, 10); - } - i = opts.find("max-object-size"); - if (i != opts.end()) { - max_obj_len = strtoll(i->second.c_str(), NULL, 10); - } - i = opts.find("min-op-len"); - if (i != opts.end()) { - min_op_len = strtoll(i->second.c_str(), NULL, 10); - } - i = opts.find("max-op-len"); - if (i != opts.end()) { - max_op_len = strtoll(i->second.c_str(), NULL, 10); - } - i = opts.find("max-ops"); - if (i != opts.end()) { - max_ops = strtoll(i->second.c_str(), NULL, 10); - } - i = opts.find("max-backlog"); - if (i != opts.end()) { - max_backlog = strtoll(i->second.c_str(), NULL, 10); - } - i = opts.find("target-throughput"); - if (i != opts.end()) { - target_throughput = strtoll(i->second.c_str(), NULL, 10); - } - i = opts.find("read-percent"); - if (i != opts.end()) { - read_percent = strtoll(i->second.c_str(), NULL, 10); - } - i = opts.find("num-objects"); - if (i != opts.end()) { - num_objs = strtoll(i->second.c_str(), NULL, 10); - } - i = opts.find("run-length"); - if (i != opts.end()) { - run_length = strtol(i->second.c_str(), NULL, 10); - } - i = opts.find("show-time"); - if (i != opts.end()) { - show_time = true; - } - i = opts.find("no-cleanup"); - if (i != opts.end()) { - cleanup = false; - } - i = opts.find("pretty-format"); - if (i != opts.end()) { - pretty_format = true; - } - i = opts.find("format"); - if (i != opts.end()) { - const char *format = i->second.c_str(); - if (strcmp(format, "xml") == 0) - formatter = new XMLFormatter(pretty_format); - else if (strcmp(format, "json") == 0) - formatter = new JSONFormatter(pretty_format); - else { - cerr << "unrecognized format: " << format << std::endl; - return -EINVAL; - } - } - i = opts.find("namespace"); - if (i != opts.end()) { - nspace = i->second; - } - - - // open rados - ret = rados.init_with_context(g_ceph_context); - if (ret) { - cerr << "couldn't initialize rados! error " << ret << std::endl; - ret = -1; - goto out; - } - - ret = rados.connect(); - if (ret) { - cerr << "couldn't connect to cluster! error " << ret << std::endl; - ret = -1; - goto out; - } - char buf[80]; - - if (create_pool && !pool_name) { - cerr << "--create-pool requested but pool_name was not specified!" << std::endl; - usage_exit(); - } - - if (create_pool) { - ret = rados.pool_create(pool_name, 0, 0); - if (ret < 0) { - cerr << "error creating pool " << pool_name << ": " - << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - } - - // open io context. - if (pool_name) { - ret = rados.ioctx_create(pool_name, io_ctx); - if (ret < 0) { - cerr << "error opening pool " << pool_name << ": " - << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - } - - // snapname? - if (snapname) { - ret = io_ctx.snap_lookup(snapname, &snapid); - if (ret < 0) { - cerr << "error looking up snap '" << snapname << "': " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - } - if (oloc.size()) { - io_ctx.locator_set_key(oloc); - } - if (!nspace.empty()) { - io_ctx.set_namespace(nspace); - } - if (snapid != CEPH_NOSNAP) { - string name; - ret = io_ctx.snap_get_name(snapid, &name); - if (ret < 0) { - cerr << "snapid " << snapid << " doesn't exist in pool " - << io_ctx.get_pool_name() << std::endl; - goto out; - } - io_ctx.snap_set_read(snapid); - cout << "selected snap " << snapid << " '" << snapname << "'" << std::endl; - } - - assert(!nargs.empty()); - - // list pools? - if (strcmp(nargs[0], "lspools") == 0) { - list vec; - rados.pool_list(vec); - for (list::iterator i = vec.begin(); i != vec.end(); ++i) - cout << *i << std::endl; - } - else if (strcmp(nargs[0], "df") == 0) { - // pools - list vec; - - if (!pool_name) - rados.pool_list(vec); - else - vec.push_back(pool_name); - - map > stats; - rados.get_pool_stats(vec, category, stats); - - if (!formatter) { - printf("%-15s %-15s" - "%12s %12s %12s %12s " - "%12s %12s %12s %12s %12s\n", - "pool name", - "category", - "KB", "objects", "clones", "degraded", - "unfound", "rd", "rd KB", "wr", "wr KB"); - } else { - formatter->open_object_section("stats"); - formatter->open_array_section("pools"); - } - for (map::iterator c = stats.begin(); c != stats.end(); ++c) { - const char *pool_name = c->first.c_str(); - stats_map& m = c->second; - if (formatter) { - formatter->open_object_section("pool"); - int64_t pool_id = rados.pool_lookup(pool_name); - formatter->dump_string("name", pool_name); - if (pool_id >= 0) - formatter->dump_format("id", "%lld", pool_id); - else - cerr << "ERROR: lookup_pg_pool_name for name=" << pool_name << " returned " << pool_id << std::endl; - formatter->open_array_section("categories"); - } - for (stats_map::iterator i = m.begin(); i != m.end(); ++i) { - const char *category = (i->first.size() ? i->first.c_str() : ""); - pool_stat_t& s = i->second; - if (!formatter) { - if (!*category) - category = "-"; - printf("%-15s " - "%-15s " - "%12lld %12lld %12lld %12lld" - "%12lld %12lld %12lld %12lld %12lld\n", - pool_name, - category, - (long long)s.num_kb, - (long long)s.num_objects, - (long long)s.num_object_clones, - (long long)s.num_objects_degraded, - (long long)s.num_objects_unfound, - (long long)s.num_rd, (long long)s.num_rd_kb, - (long long)s.num_wr, (long long)s.num_wr_kb); - } else { - formatter->open_object_section("category"); - if (category) - formatter->dump_string("name", category); - formatter->dump_format("size_bytes", "%lld", s.num_bytes); - formatter->dump_format("size_kb", "%lld", s.num_kb); - formatter->dump_format("num_objects", "%lld", s.num_objects); - formatter->dump_format("num_object_clones", "%lld", s.num_object_clones); - formatter->dump_format("num_object_copies", "%lld", s.num_object_copies); - formatter->dump_format("num_objects_missing_on_primary", "%lld", s.num_objects_missing_on_primary); - formatter->dump_format("num_objects_unfound", "%lld", s.num_objects_unfound); - formatter->dump_format("num_objects_degraded", "%lld", s.num_objects_degraded); - formatter->dump_format("read_bytes", "%lld", s.num_rd); - formatter->dump_format("read_kb", "%lld", s.num_rd_kb); - formatter->dump_format("write_bytes", "%lld", s.num_wr); - formatter->dump_format("write_kb", "%lld", s.num_wr_kb); - formatter->flush(cout); - } - if (formatter) { - formatter->close_section(); - } - } - if (formatter) { - formatter->close_section(); - formatter->close_section(); - formatter->flush(cout); - } - } - - // total - cluster_stat_t tstats; - rados.cluster_stat(tstats); - if (!formatter) { - printf(" total used %12lld %12lld\n", (long long unsigned)tstats.kb_used, - (long long unsigned)tstats.num_objects); - printf(" total avail %12lld\n", (long long unsigned)tstats.kb_avail); - printf(" total space %12lld\n", (long long unsigned)tstats.kb); - } else { - formatter->close_section(); - formatter->dump_format("total_objects", "%lld", (long long unsigned)tstats.num_objects); - formatter->dump_format("total_used", "%lld", (long long unsigned)tstats.kb_used); - formatter->dump_format("total_avail", "%lld", (long long unsigned)tstats.kb_avail); - formatter->dump_format("total_space", "%lld", (long long unsigned)tstats.kb); - formatter->close_section(); - formatter->flush(cout); - } - } - - else if (strcmp(nargs[0], "ls") == 0) { - if (!pool_name) { - cerr << "pool name was not specified" << std::endl; - ret = -1; - goto out; - } - - bool stdout = (nargs.size() < 2) || (strcmp(nargs[1], "-") == 0); - ostream *outstream; - if(stdout) - outstream = &cout; - else - outstream = new ofstream(nargs[1]); - - { - try { - librados::ObjectIterator i = io_ctx.objects_begin(); - librados::ObjectIterator i_end = io_ctx.objects_end(); - for (; i != i_end; ++i) { - if (i->second.size()) - *outstream << i->first << "\t" << i->second << std::endl; - else - *outstream << i->first << std::endl; - } - } - catch (const std::runtime_error& e) { - cerr << e.what() << std::endl; - ret = -1; - goto out; - } - } - if (!stdout) - delete outstream; - } - else if (strcmp(nargs[0], "chown") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - - uint64_t new_auid = strtol(nargs[1], 0, 10); - ret = io_ctx.set_auid(new_auid); - if (ret < 0) { - cerr << "error changing auid on pool " << io_ctx.get_pool_name() << ':' - << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - } else cerr << "changed auid on pool " << io_ctx.get_pool_name() - << " to " << new_auid << std::endl; - } - else if (strcmp(nargs[0], "mapext") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - string oid(nargs[1]); - std::map m; - ret = io_ctx.mapext(oid, 0, -1, m); - if (ret < 0) { - cerr << "mapext error on " << pool_name << "/" << oid << ": " << cpp_strerror(ret) << std::endl; - goto out; - } - std::map::iterator iter; - for (iter = m.begin(); iter != m.end(); ++iter) { - cout << hex << iter->first << "\t" << iter->second << dec << std::endl; - } - } - else if (strcmp(nargs[0], "stat") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - string oid(nargs[1]); - uint64_t size; - time_t mtime; - ret = io_ctx.stat(oid, &size, &mtime); - if (ret < 0) { - cerr << " error stat-ing " << pool_name << "/" << oid << ": " - << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } else { - cout << pool_name << "/" << oid - << " mtime " << mtime << ", size " << size << std::endl; - } - } - else if (strcmp(nargs[0], "get") == 0) { - if (!pool_name || nargs.size() < 3) - usage_exit(); - ret = do_get(io_ctx, nargs[1], nargs[2], op_size); - if (ret < 0) { - cerr << "error getting " << pool_name << "/" << nargs[1] << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - } - else if (strcmp(nargs[0], "put") == 0) { - if (!pool_name || nargs.size() < 3) - usage_exit(); - ret = do_put(io_ctx, nargs[1], nargs[2], op_size); - if (ret < 0) { - cerr << "error putting " << pool_name << "/" << nargs[1] << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - } - else if (strcmp(nargs[0], "truncate") == 0) { - if (!pool_name || nargs.size() < 3) - usage_exit(); - - string oid(nargs[1]); - long size = atol(nargs[2]); - if (size < 0) { - cerr << "error, cannot truncate to negative value" << std::endl; - usage_exit(); - } - ret = io_ctx.trunc(oid, size); - if (ret < 0) { - cerr << "error truncating oid " - << oid << " to " << size << ": " - << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - } else { - ret = 0; - } - } - else if (strcmp(nargs[0], "setxattr") == 0) { - if (!pool_name || nargs.size() < 4) - usage_exit(); - - string oid(nargs[1]); - string attr_name(nargs[2]); - string attr_val(nargs[3]); - - bufferlist bl; - bl.append(attr_val.c_str(), attr_val.length()); - - ret = io_ctx.setxattr(oid, attr_name.c_str(), bl); - if (ret < 0) { - cerr << "error setting xattr " << pool_name << "/" << oid << "/" << attr_name << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - else - ret = 0; - } - else if (strcmp(nargs[0], "getxattr") == 0) { - if (!pool_name || nargs.size() < 3) - usage_exit(); - - string oid(nargs[1]); - string attr_name(nargs[2]); - - bufferlist bl; - ret = io_ctx.getxattr(oid, attr_name.c_str(), bl); - if (ret < 0) { - cerr << "error getting xattr " << pool_name << "/" << oid << "/" << attr_name << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - else - ret = 0; - string s(bl.c_str(), bl.length()); - cout << s << std::endl; - } else if (strcmp(nargs[0], "rmxattr") == 0) { - if (!pool_name || nargs.size() < 3) - usage_exit(); - - string oid(nargs[1]); - string attr_name(nargs[2]); - - ret = io_ctx.rmxattr(oid, attr_name.c_str()); - if (ret < 0) { - cerr << "error removing xattr " << pool_name << "/" << oid << "/" << attr_name << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - } else if (strcmp(nargs[0], "listxattr") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - - string oid(nargs[1]); - map attrset; - bufferlist bl; - ret = io_ctx.getxattrs(oid, attrset); - if (ret < 0) { - cerr << "error getting xattr set " << pool_name << "/" << oid << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - - for (map::iterator iter = attrset.begin(); - iter != attrset.end(); ++iter) { - cout << iter->first << std::endl; - } - } else if (strcmp(nargs[0], "getomapheader") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - - string oid(nargs[1]); - - bufferlist header; - ret = io_ctx.omap_get_header(oid, &header); - if (ret < 0) { - cerr << "error getting omap header " << pool_name << "/" << oid - << ": " << cpp_strerror(ret) << std::endl; - goto out; - } else { - cout << "header (" << header.length() << " bytes) :\n"; - header.hexdump(cout); - cout << std::endl; - ret = 0; - } - } else if (strcmp(nargs[0], "setomapheader") == 0) { - if (!pool_name || nargs.size() < 3) - usage_exit(); - - string oid(nargs[1]); - string val(nargs[2]); - - bufferlist bl; - bl.append(val); - - ret = io_ctx.omap_set_header(oid, bl); - if (ret < 0) { - cerr << "error setting omap value " << pool_name << "/" << oid - << ": " << cpp_strerror(ret) << std::endl; - goto out; - } else { - ret = 0; - } - } else if (strcmp(nargs[0], "setomapval") == 0) { - if (!pool_name || nargs.size() < 4) - usage_exit(); - - string oid(nargs[1]); - string key(nargs[2]); - string val(nargs[3]); - - map values; - bufferlist bl; - bl.append(val); - values[key] = bl; - - ret = io_ctx.omap_set(oid, values); - if (ret < 0) { - cerr << "error setting omap value " << pool_name << "/" << oid << "/" - << key << ": " << cpp_strerror(ret) << std::endl; - goto out; - } else { - ret = 0; - } - } else if (strcmp(nargs[0], "getomapval") == 0) { - if (!pool_name || nargs.size() < 3) - usage_exit(); - - string oid(nargs[1]); - string key(nargs[2]); - set keys; - keys.insert(key); - - map values; - ret = io_ctx.omap_get_vals_by_keys(oid, keys, &values); - if (ret < 0) { - cerr << "error getting omap value " << pool_name << "/" << oid << "/" - << key << ": " << cpp_strerror(ret) << std::endl; - goto out; - } else { - ret = 0; - } - - if (values.size() && values.begin()->first == key) { - cout << " (length " << values.begin()->second.length() << ") : "; - values.begin()->second.hexdump(cout); - cout << std::endl; - } else { - cout << "No such key: " << pool_name << "/" << oid << "/" << key - << std::endl; - ret = -1; - goto out; - } - } else if (strcmp(nargs[0], "rmomapkey") == 0) { - if (!pool_name || nargs.size() < 3) - usage_exit(); - - string oid(nargs[1]); - string key(nargs[2]); - set keys; - keys.insert(key); - - ret = io_ctx.omap_rm_keys(oid, keys); - if (ret < 0) { - cerr << "error removing omap key " << pool_name << "/" << oid << "/" - << key << ": " << cpp_strerror(ret) << std::endl; - goto out; - } else { - ret = 0; - } - } else if (strcmp(nargs[0], "listomapvals") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - - string oid(nargs[1]); - string last_read = ""; - int MAX_READ = 512; - do { - map values; - ret = io_ctx.omap_get_vals(oid, last_read, MAX_READ, &values); - if (ret < 0) { - cerr << "error getting omap keys " << pool_name << "/" << oid << ": " - << cpp_strerror(ret) << std::endl; - return 1; - } - for (map::const_iterator it = values.begin(); - it != values.end(); ++it) { - // dump key in hex if it contains nonprintable characters - if (std::count_if(it->first.begin(), it->first.end(), - (int (*)(int))isprint) < (int)it->first.length()) { - cout << "key: (" << it->first.length() << " bytes):\n"; - bufferlist keybl; - keybl.append(it->first); - keybl.hexdump(cout); - } else { - cout << it->first; - } - cout << std::endl; - cout << "value: (" << it->second.length() << " bytes) :\n"; - it->second.hexdump(cout); - cout << std::endl; - } - } while (ret == MAX_READ); - ret = 0; - } - else if (strcmp(nargs[0], "cp") == 0) { - if (!pool_name) - usage_exit(); - - if (nargs.size() < 2 || nargs.size() > 3) - usage_exit(); - - const char *target = target_pool_name; - if (!target) - target = pool_name; - - const char *target_obj; - if (nargs.size() < 3) { - if (strcmp(target, pool_name) == 0) { - cerr << "cannot copy object into itself" << std::endl; - ret = -1; - goto out; - } - target_obj = nargs[1]; - } else { - target_obj = nargs[2]; - } - - // open io context. - IoCtx target_ctx; - ret = rados.ioctx_create(target, target_ctx); - if (ret < 0) { - cerr << "error opening target pool " << target << ": " - << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - if (target_oloc.size()) { - target_ctx.locator_set_key(target_oloc); - } - - ret = do_copy(io_ctx, nargs[1], target_ctx, target_obj); - if (ret < 0) { - cerr << "error copying " << pool_name << "/" << nargs[1] << " => " << target << "/" << target_obj << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - } - else if (strcmp(nargs[0], "clonedata") == 0) { - if (!pool_name) - usage_exit(); - - if (nargs.size() < 2 || nargs.size() > 3) - usage_exit(); - - const char *target = target_pool_name; - if (!target) - target = pool_name; - - const char *target_obj; - if (nargs.size() < 3) { - if (strcmp(target, pool_name) == 0) { - cerr << "cannot copy object into itself" << std::endl; - ret = -1; - goto out; - } - target_obj = nargs[1]; - } else { - target_obj = nargs[2]; - } - - // open io context. - IoCtx target_ctx; - ret = rados.ioctx_create(target, target_ctx); - if (ret < 0) { - cerr << "error opening target pool " << target << ": " - << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - if (oloc.size()) { - target_ctx.locator_set_key(oloc); - } else { - cerr << "must specify locator for clone" << std::endl; - ret = -1; - goto out; - } - - ret = do_clone_data(io_ctx, nargs[1], target_ctx, target_obj); - if (ret < 0) { - cerr << "error cloning " << pool_name << "/" << nargs[1] << " => " << target << "/" << target_obj << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - } else if (strcmp(nargs[0], "rm") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - vector::iterator iter = nargs.begin(); - ++iter; - for (; iter != nargs.end(); ++iter) { - const string & oid = *iter; - ret = io_ctx.remove(oid); - if (ret < 0) { - cerr << "error removing " << pool_name << "/" << oid << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - } - } - else if (strcmp(nargs[0], "create") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - string oid(nargs[1]); - if (nargs.size() > 2) { - string category(nargs[2]); - ret = io_ctx.create(oid, true, category); - } else { - ret = io_ctx.create(oid, true); - } - if (ret < 0) { - cerr << "error creating " << pool_name << "/" << oid << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - } - - else if (strcmp(nargs[0], "tmap") == 0) { - if (nargs.size() < 3) - usage_exit(); - if (strcmp(nargs[1], "dump") == 0) { - bufferlist outdata; - string oid(nargs[2]); - ret = io_ctx.read(oid, outdata, 0, 0); - if (ret < 0) { - cerr << "error reading " << pool_name << "/" << oid << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - bufferlist::iterator p = outdata.begin(); - bufferlist header; - map kv; - ::decode(header, p); - ::decode(kv, p); - cout << "header (" << header.length() << " bytes):\n"; - header.hexdump(cout); - cout << "\n"; - cout << kv.size() << " keys\n"; - for (map::iterator q = kv.begin(); q != kv.end(); ++q) { - cout << "key '" << q->first << "' (" << q->second.length() << " bytes):\n"; - q->second.hexdump(cout); - cout << "\n"; - } - } - else if (strcmp(nargs[1], "set") == 0 || - strcmp(nargs[1], "create") == 0) { - if (nargs.size() < 5) - usage_exit(); - string oid(nargs[2]); - string k(nargs[3]); - string v(nargs[4]); - bufferlist bl; - char c = (strcmp(nargs[1], "set") == 0) ? CEPH_OSD_TMAP_SET : CEPH_OSD_TMAP_CREATE; - ::encode(c, bl); - ::encode(k, bl); - ::encode(v, bl); - ret = io_ctx.tmap_update(oid, bl); - } - } - - else if (strcmp(nargs[0], "mkpool") == 0) { - int auid = 0; - __u8 crush_rule = 0; - if (nargs.size() < 2) - usage_exit(); - if (nargs.size() > 2) { - auid = strtol(nargs[2], 0, 10); - cerr << "setting auid:" << auid << std::endl; - if (nargs.size() > 3) { - crush_rule = (__u8)strtol(nargs[3], 0, 10); - cerr << "using crush rule " << (int)crush_rule << std::endl; - } - } - ret = rados.pool_create(nargs[1], auid, crush_rule); - if (ret < 0) { - cerr << "error creating pool " << nargs[1] << ": " - << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - cout << "successfully created pool " << nargs[1] << std::endl; - } - else if (strcmp(nargs[0], "cppool") == 0) { - if (nargs.size() != 3) - usage_exit(); - const char *src_pool = nargs[1]; - const char *target_pool = nargs[2]; - - if (strcmp(src_pool, target_pool) == 0) { - cerr << "cannot copy pool into itself" << std::endl; - ret = -1; - goto out; - } - - ret = do_copy_pool(rados, src_pool, target_pool); - if (ret < 0) { - cerr << "error copying pool " << src_pool << " => " << target_pool << ": " - << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - cout << "successfully copied pool " << nargs[1] << std::endl; - } - else if (strcmp(nargs[0], "rmpool") == 0) { - if (nargs.size() < 2) - usage_exit(); - if (nargs.size() < 4 || - strcmp(nargs[1], nargs[2]) != 0 || - strcmp(nargs[3], "--yes-i-really-really-mean-it") != 0) { - cerr << "WARNING:\n" - << " This will PERMANENTLY DESTROY an entire pool of objects with no way back.\n" - << " To confirm, pass the pool to remove twice, followed by\n" - << " --yes-i-really-really-mean-it" << std::endl; - ret = -1; - goto out; - } - ret = rados.pool_delete(nargs[1]); - if (ret >= 0) { - cout << "successfully deleted pool " << nargs[1] << std::endl; - } else { //error - cerr << "pool " << nargs[1] << " does not exist" << std::endl; - } - } - else if (strcmp(nargs[0], "lssnap") == 0) { - if (!pool_name || nargs.size() != 1) - usage_exit(); - - vector snaps; - io_ctx.snap_list(&snaps); - for (vector::iterator i = snaps.begin(); - i != snaps.end(); - ++i) { - string s; - time_t t; - if (io_ctx.snap_get_name(*i, &s) < 0) - continue; - if (io_ctx.snap_get_stamp(*i, &t) < 0) - continue; - struct tm bdt; - localtime_r(&t, &bdt); - cout << *i << "\t" << s << "\t"; - - cout.setf(std::ios::right); - cout.fill('0'); - cout << std::setw(4) << (bdt.tm_year+1900) - << '.' << std::setw(2) << (bdt.tm_mon+1) - << '.' << std::setw(2) << bdt.tm_mday - << ' ' - << std::setw(2) << bdt.tm_hour - << ':' << std::setw(2) << bdt.tm_min - << ':' << std::setw(2) << bdt.tm_sec - << std::endl; - cout.unsetf(std::ios::right); - } - cout << snaps.size() << " snaps" << std::endl; - } - - else if (strcmp(nargs[0], "mksnap") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - - ret = io_ctx.snap_create(nargs[1]); - if (ret < 0) { - cerr << "error creating pool " << pool_name << " snapshot " << nargs[1] - << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - cout << "created pool " << pool_name << " snap " << nargs[1] << std::endl; - } - - else if (strcmp(nargs[0], "rmsnap") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - - ret = io_ctx.snap_remove(nargs[1]); - if (ret < 0) { - cerr << "error removing pool " << pool_name << " snapshot " << nargs[1] - << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - cout << "removed pool " << pool_name << " snap " << nargs[1] << std::endl; - } - - else if (strcmp(nargs[0], "rollback") == 0) { - if (!pool_name || nargs.size() < 3) - usage_exit(); - - ret = io_ctx.rollback(nargs[1], nargs[2]); - if (ret < 0) { - cerr << "error rolling back pool " << pool_name << " to snapshot " << nargs[1] - << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - cout << "rolled back pool " << pool_name - << " to snapshot " << nargs[2] << std::endl; - } - else if (strcmp(nargs[0], "bench") == 0) { - if (!pool_name || nargs.size() < 3) - usage_exit(); - int seconds = atoi(nargs[1]); - int operation = 0; - if (strcmp(nargs[2], "write") == 0) - operation = OP_WRITE; - else if (strcmp(nargs[2], "seq") == 0) - operation = OP_SEQ_READ; - else if (strcmp(nargs[2], "rand") == 0) - operation = OP_RAND_READ; - else - usage_exit(); - RadosBencher bencher(g_ceph_context, rados, io_ctx); - bencher.set_show_time(show_time); - ret = bencher.aio_bench(operation, seconds, num_objs, - concurrent_ios, op_size, cleanup); - if (ret != 0) - cerr << "error during benchmark: " << ret << std::endl; - } - else if (strcmp(nargs[0], "cleanup") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - const char *prefix = nargs[1]; - RadosBencher bencher(g_ceph_context, rados, io_ctx); - ret = bencher.clean_up(prefix, concurrent_ios); - if (ret != 0) - cerr << "error during cleanup: " << ret << std::endl; - } - else if (strcmp(nargs[0], "watch") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - string oid(nargs[1]); - RadosWatchCtx ctx(oid.c_str()); - uint64_t cookie; - ret = io_ctx.watch(oid, 0, &cookie, &ctx); - if (ret != 0) - cerr << "error calling watch: " << ret << std::endl; - else { - cout << "press enter to exit..." << std::endl; - getchar(); - } - } - else if (strcmp(nargs[0], "notify") == 0) { - if (!pool_name || nargs.size() < 3) - usage_exit(); - string oid(nargs[1]); - string msg(nargs[2]); - bufferlist bl; - ::encode(msg, bl); - ret = io_ctx.notify(oid, 0, bl); - if (ret != 0) - cerr << "error calling notify: " << ret << std::endl; - } else if (strcmp(nargs[0], "load-gen") == 0) { - if (!pool_name) { - cerr << "error: must specify pool" << std::endl; - usage_exit(); - } - LoadGen lg(&rados); - if (min_obj_len) - lg.min_obj_len = min_obj_len; - if (max_obj_len) - lg.max_obj_len = max_obj_len; - if (min_op_len) - lg.min_op_len = min_op_len; - if (max_op_len) - lg.max_op_len = max_op_len; - if (max_ops) - lg.max_ops = max_ops; - if (max_backlog) - lg.max_backlog = max_backlog; - if (target_throughput) - lg.target_throughput = target_throughput << 20; - if (read_percent >= 0) - lg.read_percent = read_percent; - if (num_objs) - lg.num_objs = num_objs; - if (run_length) - lg.run_length = run_length; - - cout << "run length " << run_length << " seconds" << std::endl; - cout << "preparing " << lg.num_objs << " objects" << std::endl; - ret = lg.bootstrap(pool_name); - if (ret < 0) { - cerr << "load-gen bootstrap failed" << std::endl; - exit(1); - } - cout << "load-gen will run " << lg.run_length << " seconds" << std::endl; - lg.run(); - lg.cleanup(); - } else if (strcmp(nargs[0], "listomapkeys") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - - librados::ObjectReadOperation read; - set out_keys; - read.omap_get_keys("", LONG_MAX, &out_keys, &ret); - io_ctx.operate(nargs[1], &read, NULL); - if (ret < 0) { - cerr << "error getting omap key set " << pool_name << "/" - << nargs[1] << ": " << cpp_strerror(ret) << std::endl; - goto out; - } - - for (set::iterator iter = out_keys.begin(); - iter != out_keys.end(); ++iter) { - cout << *iter << std::endl; - } - } else if (strcmp(nargs[0], "lock") == 0) { - if (!pool_name) - usage_exit(); - - if (!formatter) { - formatter = new JSONFormatter(pretty_format); - } - ret = do_lock_cmd(nargs, opts, &io_ctx, formatter); - } else if (strcmp(nargs[0], "listwatchers") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - - string oid(nargs[1]); - std::list lw; - - ret = io_ctx.list_watchers(oid, &lw); - if (ret < 0) { - cerr << "error listing watchers " << pool_name << "/" << oid << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - else - ret = 0; - - for (std::list::iterator i = lw.begin(); i != lw.end(); ++i) { - cout << "watcher=" << i->addr << " client." << i->watcher_id << " cookie=" << i->cookie << std::endl; - } - } else if (strcmp(nargs[0], "listsnaps") == 0) { - if (!pool_name || nargs.size() < 2) - usage_exit(); - - string oid(nargs[1]); - snap_set_t ls; - - io_ctx.snap_set_read(LIBRADOS_SNAP_DIR); - ret = io_ctx.list_snaps(oid, &ls); - if (ret < 0) { - cerr << "error listing snap shots " << pool_name << "/" << oid << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; - goto out; - } - else - ret = 0; - - map snamemap; - if (formatter || pretty_format) { - vector snaps; - io_ctx.snap_list(&snaps); - for (vector::iterator i = snaps.begin(); - i != snaps.end(); ++i) { - string s; - if (io_ctx.snap_get_name(*i, &s) < 0) - continue; - snamemap.insert(pair(*i, s)); - } - } - - if (formatter) { - formatter->open_object_section("object"); - formatter->dump_string("name", oid); - formatter->open_array_section("clones"); - } else { - cout << oid << ":" << std::endl; - cout << "cloneid snaps size overlap" << std::endl; - } - - for (std::vector::iterator ci = ls.clones.begin(); - ci != ls.clones.end(); ++ci) { - - if (formatter) formatter->open_object_section("clone"); - - if (ci->cloneid == librados::SNAP_HEAD) { - if (formatter) - formatter->dump_string("id", "head"); - else - cout << "head"; - } else { - if (formatter) - formatter->dump_unsigned("id", ci->cloneid); - else - cout << ci->cloneid; - } - - if (formatter) - formatter->open_array_section("snapshots"); - else - cout << "\t"; - - if (!formatter && ci->snaps.empty()) { - cout << "-"; - } - for (std::vector::const_iterator snapindex = ci->snaps.begin(); - snapindex != ci->snaps.end(); ++snapindex) { - - map::iterator si; - - if (formatter || pretty_format) si = snamemap.find(*snapindex); - - if (formatter) { - formatter->open_object_section("snapshot"); - formatter->dump_unsigned("id", *snapindex); - if (si != snamemap.end()) - formatter->dump_string("name", si->second); - formatter->close_section(); //snapshot - } else { - if (snapindex != ci->snaps.begin()) cout << ","; - if (!pretty_format || (si == snamemap.end())) - cout << *snapindex; - else - cout << si->second << "(" << *snapindex << ")"; - } - } - - if (formatter) { - formatter->close_section(); //Snapshots - formatter->dump_unsigned("size", ci->size); - } else { - cout << "\t" << ci->size; - } - - if (ci->cloneid != librados::SNAP_HEAD) { - if (formatter) - formatter->open_array_section("overlaps"); - else - cout << "\t["; - - for (std::vector< std::pair >::iterator ovi = ci->overlap.begin(); - ovi != ci->overlap.end(); ++ovi) { - if (formatter) { - formatter->open_object_section("section"); - formatter->dump_unsigned("start", ovi->first); - formatter->dump_unsigned("length", ovi->second); - formatter->close_section(); //section - } else { - if (ovi != ci->overlap.begin()) cout << ","; - cout << ovi->first << "~" << ovi->second; - } - } - if (formatter) - formatter->close_section(); //overlaps - else - cout << "]" << std::endl; - } - if (formatter) formatter->close_section(); //clone - } - if (formatter) { - formatter->close_section(); //clones - formatter->close_section(); //object - formatter->flush(cout); - } else { - cout << std::endl; - } - - } else { - cerr << "unrecognized command " << nargs[0] << std::endl; - usage_exit(); - } - - if (ret < 0) - cerr << "error " << (-ret) << ": " << cpp_strerror(ret) << std::endl; - -out: - delete formatter; - return (ret < 0) ? 1 : 0; -} - -int main(int argc, const char **argv) -{ - vector args; - argv_to_vec(argc, argv, args); - env_to_vec(args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - std::map < std::string, std::string > opts; - std::vector::iterator i; - std::string val; - for (i = args.begin(); i != args.end(); ) { - if (ceph_argparse_double_dash(args, i)) { - break; - } else if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { - usage(cout); - exit(0); - } else if (ceph_argparse_flag(args, i, "-f", "--force", (char*)NULL)) { - opts["force"] = "true"; - } else if (ceph_argparse_flag(args, i, "-d", "--delete-after", (char*)NULL)) { - opts["delete-after"] = "true"; - } else if (ceph_argparse_flag(args, i, "-C", "--create", "--create-pool", - (char*)NULL)) { - opts["create"] = "true"; - } else if (ceph_argparse_flag(args, i, "--pretty-format", (char*)NULL)) { - opts["pretty-format"] = "true"; - } else if (ceph_argparse_flag(args, i, "--show-time", (char*)NULL)) { - opts["show-time"] = "true"; - } else if (ceph_argparse_flag(args, i, "--no-cleanup", (char*)NULL)) { - opts["no-cleanup"] = "true"; - } else if (ceph_argparse_witharg(args, i, &val, "-p", "--pool", (char*)NULL)) { - opts["pool"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--target-pool", (char*)NULL)) { - opts["target_pool"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--object-locator" , (char *)NULL)) { - opts["object_locator"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--target-locator" , (char *)NULL)) { - opts["target_locator"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--category", (char*)NULL)) { - opts["category"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "-t", "--concurrent-ios", (char*)NULL)) { - opts["concurrent-ios"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--block-size", (char*)NULL)) { - opts["block-size"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "-b", (char*)NULL)) { - opts["block-size"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "-s", "--snap", (char*)NULL)) { - opts["snap"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "-S", "--snapid", (char*)NULL)) { - opts["snapid"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--min-object-size", (char*)NULL)) { - opts["min-object-size"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--max-object-size", (char*)NULL)) { - opts["max-object-size"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--min-op-len", (char*)NULL)) { - opts["min-op-len"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--max-op-len", (char*)NULL)) { - opts["max-op-len"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--max-ops", (char*)NULL)) { - opts["max-ops"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--max-backlog", (char*)NULL)) { - opts["max-backlog"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--target-throughput", (char*)NULL)) { - opts["target-throughput"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--read-percent", (char*)NULL)) { - opts["read-percent"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--num-objects", (char*)NULL)) { - opts["num-objects"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--run-length", (char*)NULL)) { - opts["run-length"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--workers", (char*)NULL)) { - opts["workers"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--format", (char*)NULL)) { - opts["format"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--lock-tag", (char*)NULL)) { - opts["lock-tag"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--lock-cookie", (char*)NULL)) { - opts["lock-cookie"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--lock-description", (char*)NULL)) { - opts["lock-description"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--lock-duration", (char*)NULL)) { - opts["lock-duration"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "--lock-type", (char*)NULL)) { - opts["lock-type"] = val; - } else if (ceph_argparse_witharg(args, i, &val, "-N", "--namespace", (char*)NULL)) { - opts["namespace"] = val; - } else { - if (val[0] == '-') - usage_exit(); - ++i; - } - } - - if (args.empty()) { - cerr << "rados: you must give an action. Try --help" << std::endl; - return 1; - } - if ((strcmp(args[0], "import") == 0) || (strcmp(args[0], "export") == 0)) - return rados_tool_sync(opts, args); - else - return rados_tool_common(opts, args); -} diff --git a/src/rados_export.cc b/src/rados_export.cc deleted file mode 100644 index bf6654114c5..00000000000 --- a/src/rados_export.cc +++ /dev/null @@ -1,229 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2011 New Dream Network - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ -#include "include/int_types.h" - -#include "rados_sync.h" -#include "common/errno.h" -#include "common/strtol.h" -#include "include/rados/librados.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "include/compat.h" -#include "common/xattr.h" - -using namespace librados; - -class ExportLocalFileWQ : public RadosSyncWQ { -public: - ExportLocalFileWQ(IoCtxDistributor *io_ctx_dist, time_t ti, - ThreadPool *tp, ExportDir *export_dir, bool force) - : RadosSyncWQ(io_ctx_dist, ti, 0, tp), - m_export_dir(export_dir), - m_force(force) - { - } -private: - void _process(std::string *s) { - IoCtx &io_ctx(m_io_ctx_dist->get_ioctx()); - int flags = 0; - auto_ptr sobj; - auto_ptr dobj; - const std::string &rados_name(*s); - std::list < std::string > only_in_a; - std::list < std::string > only_in_b; - std::list < std::string > diff; - int ret = BackedUpObject::from_rados(io_ctx, rados_name.c_str(), sobj); - if (ret) { - cerr << ERR_PREFIX << "couldn't get '" << rados_name << "' from rados: error " - << ret << std::endl; - _exit(ret); - } - std::string obj_path(sobj->get_fs_path(m_export_dir)); - if (m_force) { - flags |= (CHANGED_CONTENTS | CHANGED_XATTRS); - } - else { - ret = BackedUpObject::from_path(obj_path.c_str(), dobj); - if (ret == ENOENT) { - sobj->get_xattrs(only_in_a); - flags |= CHANGED_CONTENTS; - } - else if (ret) { - cerr << ERR_PREFIX << "BackedUpObject::from_path returned " - << ret << std::endl; - _exit(ret); - } - else { - sobj->xattr_diff(dobj.get(), only_in_a, only_in_b, diff); - if ((sobj->get_rados_size() == dobj->get_rados_size()) && - (sobj->get_mtime() == dobj->get_mtime())) { - flags |= CHANGED_CONTENTS; - } - } - } - if (flags & CHANGED_CONTENTS) { - ret = sobj->download(io_ctx, obj_path.c_str()); - if (ret) { - cerr << ERR_PREFIX << "download error: " << ret << std::endl; - _exit(ret); - } - } - diff.splice(diff.begin(), only_in_a); - for (std::list < std::string >::const_iterator x = diff.begin(); - x != diff.end(); ++x) { - flags |= CHANGED_XATTRS; - const Xattr *xattr = sobj->get_xattr(*x); - if (xattr == NULL) { - cerr << ERR_PREFIX << "internal error on line: " << __LINE__ << std::endl; - _exit(ret); - } - std::string xattr_fs_name(USER_XATTR_PREFIX); - xattr_fs_name += x->c_str(); - ret = ceph_os_setxattr(obj_path.c_str(), xattr_fs_name.c_str(), - xattr->data, xattr->len); - if (ret) { - ret = errno; - cerr << ERR_PREFIX << "setxattr error: " << cpp_strerror(ret) << std::endl; - _exit(ret); - } - } - for (std::list < std::string >::const_iterator x = only_in_b.begin(); - x != only_in_b.end(); ++x) { - flags |= CHANGED_XATTRS; - ret = ceph_os_removexattr(obj_path.c_str(), x->c_str()); - if (ret) { - ret = errno; - cerr << ERR_PREFIX << "removexattr error: " << cpp_strerror(ret) << std::endl; - _exit(ret); - } - } - if (m_force) { - cout << "[force] " << rados_name << std::endl; - } - else if (flags & CHANGED_CONTENTS) { - cout << "[exported] " << rados_name << std::endl; - } - else if (flags & CHANGED_XATTRS) { - cout << "[xattr] " << rados_name << std::endl; - } - } - ExportDir *m_export_dir; - bool m_force; -}; - -class ExportValidateExistingWQ : public RadosSyncWQ { -public: - ExportValidateExistingWQ(IoCtxDistributor *io_ctx_dist, time_t ti, - ThreadPool *tp, const char *dir_name) - : RadosSyncWQ(io_ctx_dist, ti, 0, tp), - m_dir_name(dir_name) - { - } -private: - void _process(std::string *s) { - IoCtx &io_ctx(m_io_ctx_dist->get_ioctx()); - auto_ptr lobj; - const std::string &local_name(*s); - int ret = BackedUpObject::from_file(local_name.c_str(), m_dir_name, lobj); - if (ret) { - cout << ERR_PREFIX << "BackedUpObject::from_file: delete loop: " - << "got error " << ret << std::endl; - _exit(ret); - } - auto_ptr robj; - ret = BackedUpObject::from_rados(io_ctx, lobj->get_rados_name(), robj); - if (ret == -ENOENT) { - // The entry doesn't exist on the remote server; delete it locally - char path[strlen(m_dir_name) + local_name.size() + 2]; - snprintf(path, sizeof(path), "%s/%s", m_dir_name, local_name.c_str()); - if (unlink(path)) { - ret = errno; - cerr << ERR_PREFIX << "error unlinking '" << path << "': " - << cpp_strerror(ret) << std::endl; - _exit(ret); - } - cout << "[deleted] " << "removed '" << local_name << "'" << std::endl; - } - else if (ret) { - cerr << ERR_PREFIX << "BackedUpObject::from_rados: delete loop: " - << "got error " << ret << std::endl; - _exit(ret); - } - } - const char *m_dir_name; -}; - -int do_rados_export(ThreadPool *tp, IoCtx& io_ctx, - IoCtxDistributor *io_ctx_dist, const char *dir_name, - bool create, bool force, bool delete_after) -{ - librados::ObjectIterator oi = io_ctx.objects_begin(); - librados::ObjectIterator oi_end = io_ctx.objects_end(); - auto_ptr export_dir; - export_dir.reset(ExportDir::create_for_writing(dir_name, 1, create)); - if (!export_dir.get()) - return -EIO; - ExportLocalFileWQ export_object_wq(io_ctx_dist, time(NULL), - tp, export_dir.get(), force); - for (; oi != oi_end; ++oi) { - export_object_wq.queue(new std::string((*oi).first)); - } - export_object_wq.drain(); - - if (delete_after) { - ExportValidateExistingWQ export_val_wq(io_ctx_dist, time(NULL), - tp, dir_name); - DirHolder dh; - int err = dh.opendir(dir_name); - if (err) { - cerr << ERR_PREFIX << "opendir(" << dir_name << ") error: " - << cpp_strerror(err) << std::endl; - return err; - } - while (true) { - struct dirent *de = readdir(dh.dp); - if (!de) - break; - if ((strcmp(de->d_name, ".") == 0) || (strcmp(de->d_name, "..") == 0)) - continue; - if (is_suffix(de->d_name, RADOS_SYNC_TMP_SUFFIX)) { - char path[strlen(dir_name) + strlen(de->d_name) + 2]; - snprintf(path, sizeof(path), "%s/%s", dir_name, de->d_name); - if (unlink(path)) { - int ret = errno; - cerr << ERR_PREFIX << "error unlinking temporary file '" << path << "': " - << cpp_strerror(ret) << std::endl; - return ret; - } - cout << "[deleted] " << "removed temporary file '" << de->d_name << "'" << std::endl; - continue; - } - export_val_wq.queue(new std::string(de->d_name)); - } - export_val_wq.drain(); - } - cout << "[done]" << std::endl; - return 0; -} diff --git a/src/rados_import.cc b/src/rados_import.cc deleted file mode 100644 index a6a398d767b..00000000000 --- a/src/rados_import.cc +++ /dev/null @@ -1,239 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2011 New Dream Network - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ -#include "include/int_types.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rados_sync.h" -#include "common/errno.h" -#include "common/strtol.h" -#include "include/rados/librados.hpp" - -using namespace librados; -using std::auto_ptr; - -class ImportLocalFileWQ : public RadosSyncWQ { -public: - ImportLocalFileWQ(const char *dir_name, bool force, - IoCtxDistributor *io_ctx_dist, time_t ti, ThreadPool *tp) - : RadosSyncWQ(io_ctx_dist, ti, 0, tp), - m_dir_name(dir_name), - m_force(force) - { - } -private: - void _process(std::string *s) { - IoCtx &io_ctx(m_io_ctx_dist->get_ioctx()); - const std::string &local_name(*s); - auto_ptr sobj; - auto_ptr dobj; - std::list < std::string > only_in_a; - std::list < std::string > only_in_b; - std::list < std::string > diff; - int flags = 0; - - int ret = BackedUpObject::from_file(local_name.c_str(), - m_dir_name.c_str(), sobj); - if (ret) { - cerr << ERR_PREFIX << "BackedUpObject::from_file: got error " - << ret << std::endl; - _exit(ret); - } - const char *rados_name(sobj->get_rados_name()); - if (m_force) { - flags |= (CHANGED_CONTENTS | CHANGED_XATTRS); - } - else { - ret = BackedUpObject::from_rados(io_ctx, rados_name, dobj); - if (ret == -ENOENT) { - flags |= CHANGED_CONTENTS; - sobj->get_xattrs(only_in_a); - } - else if (ret) { - cerr << ERR_PREFIX << "BackedUpObject::from_rados returned " - << ret << std::endl; - _exit(ret); - } - else { - sobj->xattr_diff(dobj.get(), only_in_a, only_in_b, diff); - if ((sobj->get_rados_size() == dobj->get_rados_size()) && - (sobj->get_mtime() == dobj->get_mtime())) { - flags |= CHANGED_CONTENTS; - } - } - } - if (flags & CHANGED_CONTENTS) { - ret = sobj->upload(io_ctx, local_name.c_str(), m_dir_name.c_str()); - if (ret) { - cerr << ERR_PREFIX << "upload error: " << ret << std::endl; - _exit(ret); - } - } - for (std::list < std::string >::const_iterator x = only_in_a.begin(); - x != only_in_a.end(); ++x) { - flags |= CHANGED_XATTRS; - const Xattr *xattr = sobj->get_xattr(*x); - if (xattr == NULL) { - cerr << ERR_PREFIX << "internal error on line: " << __LINE__ << std::endl; - _exit(ret); - } - bufferlist bl; - bl.append(xattr->data, xattr->len); - ret = io_ctx.setxattr(rados_name, x->c_str(), bl); - if (ret < 0) { - ret = errno; - cerr << ERR_PREFIX << "io_ctx.setxattr(rados_name='" << rados_name - << "', xattr_name='" << x->c_str() << "'): " << cpp_strerror(ret) - << std::endl; - _exit(ret); - } - } - for (std::list < std::string >::const_iterator x = diff.begin(); - x != diff.end(); ++x) { - flags |= CHANGED_XATTRS; - const Xattr *xattr = sobj->get_xattr(*x); - if (xattr == NULL) { - cerr << ERR_PREFIX << "internal error on line: " << __LINE__ << std::endl; - _exit(ret); - } - bufferlist bl; - bl.append(xattr->data, xattr->len); - ret = io_ctx.rmxattr(rados_name, x->c_str()); - if (ret < 0) { - cerr << ERR_PREFIX << "io_ctx.rmxattr error2: " << cpp_strerror(ret) - << std::endl; - _exit(ret); - } - ret = io_ctx.setxattr(rados_name, x->c_str(), bl); - if (ret < 0) { - ret = errno; - cerr << ERR_PREFIX << "io_ctx.setxattr(rados_name='" << rados_name - << "', xattr='" << x->c_str() << "'): " << cpp_strerror(ret) << std::endl; - _exit(ret); - } - } - for (std::list < std::string >::const_iterator x = only_in_b.begin(); - x != only_in_b.end(); ++x) { - flags |= CHANGED_XATTRS; - ret = io_ctx.rmxattr(rados_name, x->c_str()); - if (ret < 0) { - ret = errno; - cerr << ERR_PREFIX << "rmxattr error3: " << cpp_strerror(ret) << std::endl; - _exit(ret); - } - } - if (m_force) { - cout << "[force] " << rados_name << std::endl; - } - else if (flags & CHANGED_CONTENTS) { - cout << "[imported] " << rados_name << std::endl; - } - else if (flags & CHANGED_XATTRS) { - cout << "[xattr] " << rados_name << std::endl; - } - } - std::string m_dir_name; - bool m_force; -}; - -class ImportValidateExistingWQ : public RadosSyncWQ { -public: - ImportValidateExistingWQ(ExportDir *export_dir, - IoCtxDistributor *io_ctx_dist, time_t ti, ThreadPool *tp) - : RadosSyncWQ(io_ctx_dist, ti, 0, tp), - m_export_dir(export_dir) - { - } -private: - void _process(std::string *s) { - IoCtx &io_ctx(m_io_ctx_dist->get_ioctx()); - const std::string &rados_name(*s); - auto_ptr robj; - int ret = BackedUpObject::from_rados(io_ctx, rados_name.c_str(), robj); - if (ret) { - cerr << ERR_PREFIX << "BackedUpObject::from_rados in delete loop " - << "returned " << ret << std::endl; - _exit(ret); - } - std::string obj_path(robj->get_fs_path(m_export_dir)); - auto_ptr lobj; - ret = BackedUpObject::from_path(obj_path.c_str(), lobj); - if (ret == ENOENT) { - ret = io_ctx.remove(rados_name); - if (ret && ret != -ENOENT) { - cerr << ERR_PREFIX << "io_ctx.remove(" << obj_path << ") failed " - << "with error " << ret << std::endl; - _exit(ret); - } - cout << "[deleted] " << "removed '" << rados_name << "'" << std::endl; - } - else if (ret) { - cerr << ERR_PREFIX << "BackedUpObject::from_path in delete loop " - << "returned " << ret << std::endl; - _exit(ret); - } - } - ExportDir *m_export_dir; -}; - -int do_rados_import(ThreadPool *tp, IoCtx &io_ctx, IoCtxDistributor* io_ctx_dist, - const char *dir_name, bool force, bool delete_after) -{ - auto_ptr export_dir; - export_dir.reset(ExportDir::from_file_system(dir_name)); - if (!export_dir.get()) - return -EIO; - DirHolder dh; - int ret = dh.opendir(dir_name); - if (ret) { - cerr << ERR_PREFIX << "opendir(" << dir_name << ") error: " - << cpp_strerror(ret) << std::endl; - return ret; - } - ImportLocalFileWQ import_file_wq(dir_name, force, - io_ctx_dist, time(NULL), tp); - while (true) { - struct dirent *de = readdir(dh.dp); - if (!de) - break; - if ((strcmp(de->d_name, ".") == 0) || (strcmp(de->d_name, "..") == 0)) - continue; - if (is_suffix(de->d_name, RADOS_SYNC_TMP_SUFFIX)) - continue; - import_file_wq.queue(new std::string(de->d_name)); - } - import_file_wq.drain(); - - if (delete_after) { - ImportValidateExistingWQ import_val_wq(export_dir.get(), io_ctx_dist, - time(NULL), tp); - librados::ObjectIterator oi = io_ctx.objects_begin(); - librados::ObjectIterator oi_end = io_ctx.objects_end(); - for (; oi != oi_end; ++oi) { - import_val_wq.queue(new std::string((*oi).first)); - } - import_val_wq.drain(); - } - cout << "[done]" << std::endl; - return 0; -} diff --git a/src/rados_sync.cc b/src/rados_sync.cc deleted file mode 100644 index 03293d3402a..00000000000 --- a/src/rados_sync.cc +++ /dev/null @@ -1,901 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2011 New Dream Network - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ -#include "include/int_types.h" - -#include "common/ceph_argparse.h" -#include "common/config.h" -#include "common/errno.h" -#include "common/strtol.h" -#include "global/global_context.h" -#include "global/global_init.h" -#include "include/rados/librados.hpp" -#include "rados_sync.h" -#include "include/compat.h" - -#include "common/xattr.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace librados; -using std::auto_ptr; - -static const char * const XATTR_RADOS_SYNC_VER = "user.rados_sync_ver"; -static const char * const XATTR_FULLNAME = "user.rados_full_name"; -const char USER_XATTR_PREFIX[] = "user.rados."; -static const size_t USER_XATTR_PREFIX_LEN = - sizeof(USER_XATTR_PREFIX) / sizeof(USER_XATTR_PREFIX[0]) - 1; -/* It's important that RADOS_SYNC_TMP_SUFFIX contain at least one character - * that we wouldn't normally alllow in a file name-- in this case, $ */ -const char RADOS_SYNC_TMP_SUFFIX[] = "$tmp"; -static const size_t RADOS_SYNC_TMP_SUFFIX_LEN = - sizeof(RADOS_SYNC_TMP_SUFFIX) / sizeof(RADOS_SYNC_TMP_SUFFIX[0]) - 1; - -std::string get_user_xattr_name(const char *fs_xattr_name) -{ - if (strncmp(fs_xattr_name, USER_XATTR_PREFIX, USER_XATTR_PREFIX_LEN)) - return ""; - return fs_xattr_name + USER_XATTR_PREFIX_LEN; -} - -bool is_suffix(const char *str, const char *suffix) -{ - size_t strlen_str = strlen(str); - size_t strlen_suffix = strlen(suffix); - if (strlen_str < strlen_suffix) - return false; - return (strcmp(str + (strlen_str - strlen_suffix), suffix) == 0); -} - -ExportDir* ExportDir::create_for_writing(const std::string &path, int version, - bool create) -{ - if (access(path.c_str(), R_OK | W_OK) == 0) { - return ExportDir::from_file_system(path); - } - if (!create) { - cerr << ERR_PREFIX << "ExportDir: directory '" - << path << "' does not exist. Use --create to create it." - << std::endl; - return NULL; - } - int ret = mkdir(path.c_str(), 0700); - if (ret < 0) { - int err = errno; - if (err != EEXIST) { - cerr << ERR_PREFIX << "ExportDir: mkdir error: " - << cpp_strerror(err) << std::endl; - return NULL; - } - } - char buf[32]; - snprintf(buf, sizeof(buf), "%d", version); - ret = ceph_os_setxattr(path.c_str(), XATTR_RADOS_SYNC_VER, buf, strlen(buf) + 1); - if (ret < 0) { - int err = errno; - cerr << ERR_PREFIX << "ExportDir: setxattr error :" - << cpp_strerror(err) << std::endl; - return NULL; - } - return new ExportDir(version, path); -} - -ExportDir* ExportDir::from_file_system(const std::string &path) -{ - if (access(path.c_str(), R_OK)) { - cerr << "ExportDir: source directory '" << path - << "' appears to be inaccessible." << std::endl; - return NULL; - } - int ret; - char buf[32]; - memset(buf, 0, sizeof(buf)); - ret = ceph_os_getxattr(path.c_str(), XATTR_RADOS_SYNC_VER, buf, sizeof(buf) - 1); - if (ret < 0) { - ret = errno; - if (ret == ENODATA) { - cerr << ERR_PREFIX << "ExportDir: directory '" << path - << "' does not appear to have been created by a rados " - << "export operation." << std::endl; - return NULL; - } - cerr << ERR_PREFIX << "ExportDir: getxattr error :" - << cpp_strerror(ret) << std::endl; - return NULL; - } - std::string err; - ret = strict_strtol(buf, 10, &err); - if (!err.empty()) { - cerr << ERR_PREFIX << "ExportDir: invalid value for " - << XATTR_RADOS_SYNC_VER << ": " << buf << ". parse error: " - << err << std::endl; - return NULL; - } - if (ret != 1) { - cerr << ERR_PREFIX << "ExportDir: can't handle any naming " - << "convention besides version 1. You must upgrade this program to " - << "handle the data in the new format." << std::endl; - return NULL; - } - return new ExportDir(ret, path); -} - -std::string ExportDir::get_fs_path(const std::string &rados_name) const -{ - static int HASH_LENGTH = 17; - size_t i; - size_t strlen_rados_name = strlen(rados_name.c_str()); - size_t sz; - bool need_hash = false; - if (strlen_rados_name > 200) { - sz = 200; - need_hash = true; - } - else { - sz = strlen_rados_name; - } - char fs_path[sz + HASH_LENGTH + 1]; - for (i = 0; i < sz; ++i) { - // Just replace anything that looks funny with an 'at' sign. - // Unicode also gets turned into 'at' signs. - signed char c = rados_name[i]; - if (c < 0x20) { - // Since c is signed, this also eliminates bytes with the high bit set - c = '@'; - need_hash = true; - } - else if (c == 0x7f) { - c = '@'; - need_hash = true; - } - else if (c == '/') { - c = '@'; - need_hash = true; - } - else if (c == '\\') { - c = '@'; - need_hash = true; - } - else if (c == '$') { - c = '@'; - need_hash = true; - } - else if (c == ' ') { - c = '_'; - need_hash = true; - } - fs_path[i] = c; - } - - if (need_hash) { - uint64_t hash = 17; - for (i = 0; i < strlen_rados_name; ++i) { - hash += (rados_name[i] * 33); - } - // The extra byte of length is because snprintf always NULL-terminates. - snprintf(fs_path + i, HASH_LENGTH + 1, "_%016" PRIx64, hash); - } - else { - // NULL-terminate. - fs_path[i] = '\0'; - } - - ostringstream oss; - oss << path << "/" << fs_path; - return oss.str(); -} - -ExportDir::ExportDir(int version_, const std::string &path_) - : version(version_), - path(path_) -{ -} - -DirHolder::DirHolder() - : dp(NULL) -{ -} - -DirHolder::~DirHolder() { - if (!dp) - return; - if (closedir(dp)) { - int err = errno; - cerr << ERR_PREFIX << "closedir failed: " << cpp_strerror(err) << std::endl; - } - dp = NULL; -} - -int DirHolder::opendir(const char *dir_name) { - dp = ::opendir(dir_name); - if (!dp) { - int err = errno; - return err; - } - return 0; -} - -static __thread int t_iod_idx = -1; - -static pthread_mutex_t io_ctx_distributor_lock = PTHREAD_MUTEX_INITIALIZER; - -IoCtxDistributor* IoCtxDistributor::instance() { - IoCtxDistributor *ret; - pthread_mutex_lock(&io_ctx_distributor_lock); - if (s_instance == NULL) { - s_instance = new IoCtxDistributor(); - } - ret = s_instance; - pthread_mutex_unlock(&io_ctx_distributor_lock); - return ret; -} - -int IoCtxDistributor::init(Rados &cluster, const char *pool_name, - int num_ioctxes) { - m_io_ctxes.resize(num_ioctxes); - for (std::vector::iterator i = m_io_ctxes.begin(); - i != m_io_ctxes.end(); ++i) { - IoCtx &io_ctx(*i); - int ret = cluster.ioctx_create(pool_name, io_ctx); - if (ret) { - return ret; - } - } - m_highest_iod_idx.set(0); - return 0; -} - -void IoCtxDistributor::clear() { - for (std::vector::iterator i = m_io_ctxes.begin(); - i != m_io_ctxes.end(); ++i) { - IoCtx &io_ctx(*i); - io_ctx.close(); - } - m_io_ctxes.clear(); - m_highest_iod_idx.set(0); -} - -IoCtx& IoCtxDistributor::get_ioctx() { - if (t_iod_idx == -1) { - t_iod_idx = m_highest_iod_idx.inc() - 1; - } - if (m_io_ctxes.size() <= (unsigned int)t_iod_idx) { - cerr << ERR_PREFIX << "IoCtxDistributor: logic error on line " - << __LINE__ << std::endl; - _exit(1); - } - return m_io_ctxes[t_iod_idx]; -} - -IoCtxDistributor *IoCtxDistributor::s_instance = NULL; - -IoCtxDistributor::IoCtxDistributor() { - clear(); -} - -IoCtxDistributor::~IoCtxDistributor() { - clear(); -} - -RadosSyncWQ::RadosSyncWQ(IoCtxDistributor *io_ctx_dist, time_t timeout, time_t suicide_timeout, ThreadPool *tp) - : ThreadPool::WorkQueue("FileStore::OpWQ", timeout, suicide_timeout, tp), - m_io_ctx_dist(io_ctx_dist) -{ -} - -bool RadosSyncWQ::_enqueue(std::string *s) { - m_items.push_back(s); - return true; -} - -void RadosSyncWQ::_dequeue(std::string *o) { - assert(0); -} - -bool RadosSyncWQ::_empty() { - return m_items.empty(); -} - -std::string *RadosSyncWQ::_dequeue() { - if (m_items.empty()) - return NULL; - std::string *ret = m_items.front(); - m_items.pop_front(); - return ret; -} - -void RadosSyncWQ::_process_finish(std::string *s) { - delete s; -} - -void RadosSyncWQ::_clear() { - for (std::deque::iterator i = m_items.begin(); - i != m_items.end(); ++i) { - delete *i; - } - m_items.clear(); -} - -Xattr::Xattr(char *data_, ssize_t len_) - : data(data_), len(len_) -{ -} - -Xattr::~Xattr() { - free(data); -} - -bool Xattr::operator==(const class Xattr &rhs) const { - if (len != rhs.len) - return false; - return (memcmp(data, rhs.data, len) == 0); -} - -bool Xattr::operator!=(const class Xattr &rhs) const { - return !((*this) == rhs); -} - -int BackedUpObject::from_file(const char *file_name, const char *dir_name, - std::auto_ptr &obj) -{ - char obj_path[strlen(dir_name) + strlen(file_name) + 2]; - snprintf(obj_path, sizeof(obj_path), "%s/%s", dir_name, file_name); - return BackedUpObject::from_path(obj_path, obj); -} - -int BackedUpObject::from_path(const char *path, std::auto_ptr &obj) -{ - int ret; - FILE *fp = fopen(path, "r"); - if (!fp) { - ret = errno; - if (ret != ENOENT) { - cerr << ERR_PREFIX << "BackedUpObject::from_path: error while trying to " - << "open '" << path << "': " << cpp_strerror(ret) << std::endl; - } - return ret; - } - int fd = fileno(fp); - struct stat st_buf; - memset(&st_buf, 0, sizeof(st_buf)); - ret = fstat(fd, &st_buf); - if (ret) { - ret = errno; - fclose(fp); - cerr << ERR_PREFIX << "BackedUpObject::from_path: error while trying " - << "to stat '" << path << "': " << cpp_strerror(ret) << std::endl; - return ret; - } - - // get fullname - ssize_t res = ceph_os_fgetxattr(fd, XATTR_FULLNAME, NULL, 0); - if (res <= 0) { - fclose(fp); - ret = errno; - if (res == 0) { - cerr << ERR_PREFIX << "BackedUpObject::from_path: found empty " - << XATTR_FULLNAME << " attribute on '" << path - << "'" << std::endl; - ret = ENODATA; - } else if (ret == ENODATA) { - cerr << ERR_PREFIX << "BackedUpObject::from_path: there was no " - << XATTR_FULLNAME << " attribute found on '" << path - << "'" << std::endl; - } else { - cerr << ERR_PREFIX << "getxattr error: " << cpp_strerror(ret) << std::endl; - } - return ret; - } - char rados_name_[res + 1]; - memset(rados_name_, 0, sizeof(rados_name_)); - res = ceph_os_fgetxattr(fd, XATTR_FULLNAME, rados_name_, res); - if (res < 0) { - ret = errno; - fclose(fp); - cerr << ERR_PREFIX << "BackedUpObject::getxattr(" << XATTR_FULLNAME - << ") error: " << cpp_strerror(ret) << std::endl; - return ret; - } - - BackedUpObject *o = new BackedUpObject(rados_name_, - st_buf.st_size, st_buf.st_mtime); - if (!o) { - fclose(fp); - return ENOBUFS; - } - ret = o->read_xattrs_from_file(fileno(fp)); - if (ret) { - fclose(fp); - cerr << ERR_PREFIX << "BackedUpObject::from_path(path = '" - << path << "): read_xattrs_from_file returned " << ret << std::endl; - delete o; - return ret; - } - - fclose(fp); - obj.reset(o); - return 0; -} - -int BackedUpObject::from_rados(IoCtx& io_ctx, const char *rados_name_, - auto_ptr &obj) -{ - uint64_t rados_size_ = 0; - time_t rados_time_ = 0; - int ret = io_ctx.stat(rados_name_, &rados_size_, &rados_time_); - if (ret == -ENOENT) { - // don't complain here about ENOENT - return ret; - } else if (ret < 0) { - cerr << ERR_PREFIX << "BackedUpObject::from_rados(rados_name_ = '" - << rados_name_ << "'): stat failed with error " << ret << std::endl; - return ret; - } - BackedUpObject *o = new BackedUpObject(rados_name_, rados_size_, rados_time_); - ret = o->read_xattrs_from_rados(io_ctx); - if (ret) { - cerr << ERR_PREFIX << "BackedUpObject::from_rados(rados_name_ = '" - << rados_name_ << "'): read_xattrs_from_rados returned " - << ret << std::endl; - delete o; - return ret; - } - obj.reset(o); - return 0; -} - -BackedUpObject::~BackedUpObject() -{ - for (std::map < std::string, Xattr* >::iterator x = xattrs.begin(); - x != xattrs.end(); ++x) - { - delete x->second; - x->second = NULL; - } - free(rados_name); -} - -std::string BackedUpObject::get_fs_path(const ExportDir *export_dir) const -{ - return export_dir->get_fs_path(rados_name); -} - -std::string BackedUpObject::xattrs_to_str() const -{ - ostringstream oss; - std::string prefix; - for (std::map < std::string, Xattr* >::const_iterator x = xattrs.begin(); - x != xattrs.end(); ++x) - { - char buf[x->second->len + 1]; - memcpy(buf, x->second->data, x->second->len); - buf[x->second->len] = '\0'; - oss << prefix << "{" << x->first << ":" << buf << "}"; - prefix = ", "; - } - return oss.str(); -} - -void BackedUpObject::xattr_diff(const BackedUpObject *rhs, - std::list < std::string > &only_in_a, - std::list < std::string > &only_in_b, - std::list < std::string > &diff) const -{ - only_in_a.clear(); - only_in_b.clear(); - diff.clear(); - for (std::map < std::string, Xattr* >::const_iterator x = xattrs.begin(); - x != xattrs.end(); ++x) - { - std::map < std::string, Xattr* >::const_iterator r = rhs->xattrs.find(x->first); - if (r == rhs->xattrs.end()) { - only_in_a.push_back(x->first); - } - else { - const Xattr &r_obj(*r->second); - const Xattr &x_obj(*x->second); - if (r_obj != x_obj) - diff.push_back(x->first); - } - } - for (std::map < std::string, Xattr* >::const_iterator r = rhs->xattrs.begin(); - r != rhs->xattrs.end(); ++r) - { - std::map < std::string, Xattr* >::const_iterator x = rhs->xattrs.find(r->first); - if (x == xattrs.end()) { - only_in_b.push_back(r->first); - } - } -} - -void BackedUpObject::get_xattrs(std::list < std::string > &xattrs_) const -{ - for (std::map < std::string, Xattr* >::const_iterator r = xattrs.begin(); - r != xattrs.end(); ++r) - { - xattrs_.push_back(r->first); - } -} - -const Xattr* BackedUpObject::get_xattr(const std::string name) const -{ - std::map < std::string, Xattr* >::const_iterator x = xattrs.find(name); - if (x == xattrs.end()) - return NULL; - else - return x->second; -} - -const char *BackedUpObject::get_rados_name() const { - return rados_name; -} - -uint64_t BackedUpObject::get_rados_size() const { - return rados_size; -} - -time_t BackedUpObject::get_mtime() const { - return rados_time; -} - -int BackedUpObject::download(IoCtx &io_ctx, const char *path) -{ - char tmp_path[strlen(path) + RADOS_SYNC_TMP_SUFFIX_LEN + 1]; - snprintf(tmp_path, sizeof(tmp_path), "%s%s", path, RADOS_SYNC_TMP_SUFFIX); - FILE *fp = fopen(tmp_path, "w"); - if (!fp) { - int err = errno; - cerr << ERR_PREFIX << "download: error opening '" << tmp_path << "':" - << cpp_strerror(err) << std::endl; - return err; - } - int fd = fileno(fp); - uint64_t off = 0; - static const int CHUNK_SZ = 32765; - while (true) { - bufferlist bl; - int rlen = io_ctx.read(rados_name, bl, CHUNK_SZ, off); - if (rlen < 0) { - cerr << ERR_PREFIX << "download: io_ctx.read(" << rados_name << ") returned " - << rlen << std::endl; - return rlen; - } - if (rlen < CHUNK_SZ) - off = 0; - else - off += rlen; - size_t flen = fwrite(bl.c_str(), 1, rlen, fp); - if (flen != (size_t)rlen) { - int err = errno; - cerr << ERR_PREFIX << "download: fwrite(" << tmp_path << ") error: " - << cpp_strerror(err) << std::endl; - fclose(fp); - return err; - } - if (off == 0) - break; - } - size_t attr_sz = strlen(rados_name) + 1; - int res = ceph_os_fsetxattr(fd, XATTR_FULLNAME, rados_name, attr_sz); - if (res) { - int err = errno; - cerr << ERR_PREFIX << "download: fsetxattr(" << tmp_path << ") error: " - << cpp_strerror(err) << std::endl; - fclose(fp); - return err; - } - if (fclose(fp)) { - int err = errno; - cerr << ERR_PREFIX << "download: fclose(" << tmp_path << ") error: " - << cpp_strerror(err) << std::endl; - return err; - } - if (rename(tmp_path, path)) { - int err = errno; - cerr << ERR_PREFIX << "download: rename(" << tmp_path << ", " - << path << ") error: " << cpp_strerror(err) << std::endl; - return err; - } - return 0; -} - -int BackedUpObject::upload(IoCtx &io_ctx, const char *file_name, const char *dir_name) -{ - char path[strlen(file_name) + strlen(dir_name) + 2]; - snprintf(path, sizeof(path), "%s/%s", dir_name, file_name); - FILE *fp = fopen(path, "r"); - if (!fp) { - int err = errno; - cerr << ERR_PREFIX << "upload: error opening '" << path << "': " - << cpp_strerror(err) << std::endl; - return err; - } - // Need to truncate RADOS object to size 0, in case there is - // already something there. - int ret = io_ctx.trunc(rados_name, 0); - if (ret) { - cerr << ERR_PREFIX << "upload: trunc failed with error " << ret << std::endl; - fclose(fp); - return ret; - } - uint64_t off = 0; - static const int CHUNK_SZ = 32765; - while (true) { - char buf[CHUNK_SZ]; - int flen = fread(buf, 1, CHUNK_SZ, fp); - if (flen < 0) { - int err = errno; - cerr << ERR_PREFIX << "upload: fread(" << file_name << ") error: " - << cpp_strerror(err) << std::endl; - fclose(fp); - return err; - } - if ((flen == 0) && (off != 0)) { - fclose(fp); - break; - } - // There must be a zero-copy way to do this? - bufferlist bl; - bl.append(buf, flen); - int rlen = io_ctx.write(rados_name, bl, flen, off); - if (rlen < 0) { - fclose(fp); - cerr << ERR_PREFIX << "upload: rados_write error: " << rlen << std::endl; - return rlen; - } - if (rlen != flen) { - fclose(fp); - cerr << ERR_PREFIX << "upload: rados_write error: short write" << std::endl; - return -EIO; - } - off += rlen; - if (flen < CHUNK_SZ) { - fclose(fp); - return 0; - } - } - return 0; -} - -BackedUpObject::BackedUpObject(const char *rados_name_, - uint64_t rados_size_, time_t rados_time_) - : rados_name(strdup(rados_name_)), - rados_size(rados_size_), - rados_time(rados_time_) -{ -} - -int BackedUpObject::read_xattrs_from_file(int fd) -{ - ssize_t blen = ceph_os_flistxattr(fd, NULL, 0); - if (blen > 0x1000000) { - cerr << ERR_PREFIX << "BackedUpObject::read_xattrs_from_file: unwilling " - << "to allocate a buffer of size " << blen << " on the stack for " - << "flistxattr." << std::endl; - return ENOBUFS; - } - char buf[blen + 1]; - memset(buf, 0, sizeof(buf)); - ssize_t blen2 = ceph_os_flistxattr(fd, buf, blen); - if (blen != blen2) { - cerr << ERR_PREFIX << "BackedUpObject::read_xattrs_from_file: xattrs changed while " - << "we were trying to " - << "list them? First length was " << blen << ", but now it's " << blen2 - << std::endl; - return EDOM; - } - const char *b = buf; - while (*b) { - size_t bs = strlen(b); - std::string xattr_name = get_user_xattr_name(b); - if (!xattr_name.empty()) { - ssize_t attr_len = ceph_os_fgetxattr(fd, b, NULL, 0); - if (attr_len < 0) { - int err = errno; - cerr << ERR_PREFIX << "BackedUpObject::read_xattrs_from_file: " - << "fgetxattr(rados_name = '" << rados_name << "', xattr_name='" - << xattr_name << "') failed: " << cpp_strerror(err) << std::endl; - return EDOM; - } - char *attr = (char*)malloc(attr_len); - if (!attr) { - cerr << ERR_PREFIX << "BackedUpObject::read_xattrs_from_file: " - << "malloc(" << attr_len << ") failed for xattr_name='" - << xattr_name << "'" << std::endl; - return ENOBUFS; - } - ssize_t attr_len2 = ceph_os_fgetxattr(fd, b, attr, attr_len); - if (attr_len2 < 0) { - int err = errno; - cerr << ERR_PREFIX << "BackedUpObject::read_xattrs_from_file: " - << "fgetxattr(rados_name = '" << rados_name << "', " - << "xattr_name='" << xattr_name << "') failed: " - << cpp_strerror(err) << std::endl; - free(attr); - return EDOM; - } - if (attr_len2 != attr_len) { - cerr << ERR_PREFIX << "BackedUpObject::read_xattrs_from_file: xattr " - << "changed while we were trying to get it? " - << "fgetxattr(rados_name = '"<< rados_name - << "', xattr_name='" << xattr_name << "') returned a different length " - << "than when we first called it! old_len = " << attr_len - << "new_len = " << attr_len2 << std::endl; - free(attr); - return EDOM; - } - xattrs[xattr_name] = new Xattr(attr, attr_len); - } - b += (bs + 1); - } - return 0; -} - -int BackedUpObject::read_xattrs_from_rados(IoCtx &io_ctx) -{ - map attrset; - int ret = io_ctx.getxattrs(rados_name, attrset); - if (ret) { - cerr << ERR_PREFIX << "BackedUpObject::read_xattrs_from_rados: " - << "getxattrs failed with error code " << ret << std::endl; - return ret; - } - for (map::iterator i = attrset.begin(); - i != attrset.end(); ) - { - bufferlist& bl(i->second); - char *data = (char*)malloc(bl.length()); - if (!data) - return ENOBUFS; - memcpy(data, bl.c_str(), bl.length()); - Xattr *xattr = new Xattr(data, bl.length()); - if (!xattr) { - free(data); - return ENOBUFS; - } - xattrs[i->first] = xattr; - attrset.erase(i++); - } - return 0; -} - -int rados_tool_sync(const std::map < std::string, std::string > &opts, - std::vector &args) -{ - int ret; - bool force = opts.count("force"); - bool delete_after = opts.count("delete-after"); - bool create = opts.count("create"); - - std::map < std::string, std::string >::const_iterator n = opts.find("workers"); - int num_threads; - if (n == opts.end()) { - num_threads = DEFAULT_NUM_RADOS_WORKER_THREADS; - } - else { - std::string err; - num_threads = strict_strtol(n->second.c_str(), 10, &err); - if (!err.empty()) { - cerr << "rados: can't parse number of worker threads given: " - << err << std::endl; - return 1; - } - if ((num_threads < 1) || (num_threads > 9000)) { - cerr << "rados: unreasonable value given for num_threads: " - << num_threads << std::endl; - return 1; - } - } - - - std::string action, src, dst; - std::vector::iterator i = args.begin(); - if ((i != args.end()) && - ((strcmp(*i, "import") == 0) || (strcmp(*i, "export") == 0))) { - action = *i; - ++i; - } - else { - cerr << "rados" << ": You must specify either 'import' or 'export'.\n"; - cerr << "Use --help to show help.\n"; - exit(1); - } - if (i != args.end()) { - src = *i; - ++i; - } - else { - cerr << "rados" << ": You must give a source.\n"; - cerr << "Use --help to show help.\n"; - exit(1); - } - if (i != args.end()) { - dst = *i; - ++i; - } - else { - cerr << "rados" << ": You must give a destination.\n"; - cerr << "Use --help to show help.\n"; - exit(1); - } - - // open rados - Rados rados; - if (rados.init_with_context(g_ceph_context) < 0) { - cerr << "rados" << ": failed to initialize Rados!" << std::endl; - exit(1); - } - if (rados.connect() < 0) { - cerr << "rados" << ": failed to connect to Rados cluster!" << std::endl; - exit(1); - } - IoCtx io_ctx; - std::string pool_name = (action == "import") ? dst : src; - ret = rados.ioctx_create(pool_name.c_str(), io_ctx); - if ((ret == -ENOENT) && (action == "import")) { - if (create) { - ret = rados.pool_create(pool_name.c_str()); - if (ret) { - cerr << "rados" << ": pool_create failed with error " << ret - << std::endl; - exit(ret); - } - ret = rados.ioctx_create(pool_name.c_str(), io_ctx); - } - else { - cerr << "rados" << ": pool '" << pool_name << "' does not exist. Use " - << "--create to try to create it." << std::endl; - exit(ENOENT); - } - } - if (ret < 0) { - cerr << "rados" << ": error opening pool " << pool_name << ": " - << cpp_strerror(ret) << std::endl; - exit(ret); - } - - IoCtxDistributor *io_ctx_dist = IoCtxDistributor::instance(); - ret = io_ctx_dist->init(rados, pool_name.c_str(), num_threads); - if (ret) { - cerr << ERR_PREFIX << "failed to initialize Rados io contexts." - << std::endl; - _exit(ret); - } - - ThreadPool thread_pool(g_ceph_context, "rados_sync_threadpool", num_threads); - thread_pool.start(); - - if (action == "import") { - ret = do_rados_import(&thread_pool, io_ctx, io_ctx_dist, src.c_str(), - force, delete_after); - thread_pool.stop(); - return ret; - } - else { - ret = do_rados_export(&thread_pool, io_ctx, io_ctx_dist, dst.c_str(), - create, force, delete_after); - thread_pool.stop(); - return ret; - } -} diff --git a/src/rados_sync.h b/src/rados_sync.h deleted file mode 100644 index 0f7226e0239..00000000000 --- a/src/rados_sync.h +++ /dev/null @@ -1,217 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2004-2006 Sage Weil - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - -#ifndef CEPH_RADOS_SYNC_H -#define CEPH_RADOS_SYNC_H - -#include -#include "include/atomic.h" -#include "common/WorkQueue.h" - -#include -#include - -namespace librados { - class IoCtx; - class Rados; -} - -extern const char USER_XATTR_PREFIX[]; -extern const char RADOS_SYNC_TMP_SUFFIX[]; -#define ERR_PREFIX "[ERROR] " -#define DEFAULT_NUM_RADOS_WORKER_THREADS 5 - -/* Linux seems to use ENODATA instead of ENOATTR when an extended attribute - * is missing */ -#ifndef ENOATTR -#define ENOATTR ENODATA -#endif - -enum { - CHANGED_XATTRS = 0x1, - CHANGED_CONTENTS = 0x2, -}; - -/** Given the name of an extended attribute from a file in the filesystem, - * returns an empty string if the extended attribute does not represent a rados - * user extended attribute. Otherwise, returns the name of the rados extended - * attribute. - * - * Rados user xattrs are prefixed with USER_XATTR_PREFIX. - */ -std::string get_user_xattr_name(const char *fs_xattr_name); - -/* Returns true if 'suffix' is a suffix of str */ -bool is_suffix(const char *str, const char *suffix); - -/** Represents a directory in the filesystem that we export rados objects to (or - * import them from.) - */ -class ExportDir -{ -public: - static ExportDir* create_for_writing(const std::string &path, int version, - bool create); - static ExportDir* from_file_system(const std::string &path); - - /* Given a rados object name, return something which looks kind of like the - * first part of the name. - * - * The actual file name that the backed-up object is stored in is irrelevant - * to rados_sync. The only reason to make it human-readable at all is to make - * things easier on sysadmins. The XATTR_FULLNAME extended attribute has the - * real, full object name. - * - * This function turns unicode into a bunch of 'at' signs. This could be - * fixed. If you try, be sure to handle all the multibyte characters - * correctly. - * I guess a better hash would be nice too. - */ - std::string get_fs_path(const std::string &rados_name) const; - -private: - ExportDir(int version_, const std::string &path_); - - int version; - std::string path; -}; - -/** Smart pointer wrapper for a DIR* - */ -class DirHolder { -public: - DirHolder(); - ~DirHolder(); - int opendir(const char *dir_name); - DIR *dp; -}; - -/** IoCtxDistributor is a singleton that distributes out IoCtx instances to - * different threads. - */ -class IoCtxDistributor -{ -public: - static IoCtxDistributor* instance(); - int init(librados::Rados &cluster, const char *pool_name, int num_ioctxes); - void clear(); - librados::IoCtx& get_ioctx(); -private: - static IoCtxDistributor *s_instance; - IoCtxDistributor(); - ~IoCtxDistributor(); - - ceph::atomic_t m_highest_iod_idx; - - /* NB: there might be some false sharing here that we could optimize - * away in the future */ - std::vector m_io_ctxes; -}; - -class RadosSyncWQ : public ThreadPool::WorkQueue { -public: - RadosSyncWQ(IoCtxDistributor *io_ctx_dist, time_t timeout, time_t suicide_timeout, ThreadPool *tp); -protected: - IoCtxDistributor *m_io_ctx_dist; -private: - bool _enqueue(std::string *s); - void _dequeue(std::string *o); - bool _empty(); - std::string *_dequeue(); - void _process_finish(std::string *s); - void _clear(); - std::deque m_items; -}; - -/* Stores a length and a chunk of malloc()ed data */ -class Xattr { -public: - Xattr(char *data_, ssize_t len_); - ~Xattr(); - bool operator==(const class Xattr &rhs) const; - bool operator!=(const class Xattr &rhs) const; - - char *data; - ssize_t len; -}; - -/* Represents an object that we are backing up */ -class BackedUpObject -{ -public: - static int from_file(const char *file_name, const char *dir_name, - std::auto_ptr &obj); - static int from_path(const char *path, std::auto_ptr &obj); - static int from_rados(librados::IoCtx& io_ctx, const char *rados_name_, - auto_ptr &obj); - ~BackedUpObject(); - - /* Get the mangled name for this rados object. */ - std::string get_fs_path(const ExportDir *export_dir) const; - - /* Convert the xattrs on this BackedUpObject to a kind of JSON-like string. - * This is only used for debugging. - * Note that we're assuming we can just treat the xattr data as a - * null-terminated string, which isn't true. Again, this is just for debugging, - * so it doesn't matter. - */ - std::string xattrs_to_str() const; - - /* Diff the extended attributes on this BackedUpObject with those found on a - * different BackedUpObject - */ - void xattr_diff(const BackedUpObject *rhs, - std::list < std::string > &only_in_a, - std::list < std::string > &only_in_b, - std::list < std::string > &diff) const; - - void get_xattrs(std::list < std::string > &xattrs_) const; - - const Xattr* get_xattr(const std::string name) const; - - const char *get_rados_name() const; - - uint64_t get_rados_size() const; - - time_t get_mtime() const; - - int download(librados::IoCtx &io_ctx, const char *path); - - int upload(librados::IoCtx &io_ctx, const char *file_name, const char *dir_name); - -private: - BackedUpObject(const char *rados_name_, uint64_t rados_size_, time_t rados_time_); - - int read_xattrs_from_file(int fd); - - int read_xattrs_from_rados(librados::IoCtx &io_ctx); - - // don't allow copying - BackedUpObject &operator=(const BackedUpObject &rhs); - BackedUpObject(const BackedUpObject &rhs); - - char *rados_name; - uint64_t rados_size; - uint64_t rados_time; - std::map < std::string, Xattr* > xattrs; -}; - -extern int do_rados_import(ThreadPool *tp, librados::IoCtx &io_ctx, - IoCtxDistributor* io_ctx_dist, const char *dir_name, - bool force, bool delete_after); -extern int do_rados_export(ThreadPool *tp, librados::IoCtx& io_ctx, - IoCtxDistributor *io_ctx_dist, const char *dir_name, - bool create, bool force, bool delete_after); - -#endif diff --git a/src/tools/rados/rados.cc b/src/tools/rados/rados.cc new file mode 100644 index 00000000000..0b7cc2b9cb7 --- /dev/null +++ b/src/tools/rados/rados.cc @@ -0,0 +1,2352 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "include/types.h" + +#include "include/rados/librados.hpp" +#include "include/rados/rados_types.hpp" +#include "rados_sync.h" +using namespace librados; + +#include "common/config.h" +#include "common/ceph_argparse.h" +#include "global/global_init.h" +#include "common/Cond.h" +#include "common/debug.h" +#include "common/errno.h" +#include "common/Formatter.h" +#include "common/obj_bencher.h" +#include "mds/inode_backtrace.h" +#include "auth/Crypto.h" +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cls/lock/cls_lock_client.h" + +int rados_tool_sync(const std::map < std::string, std::string > &opts, + std::vector &args); + +// two steps seem to be necessary to do this right +#define STR(x) _STR(x) +#define _STR(x) #x + +void usage(ostream& out) +{ + out << \ +"usage: rados [options] [commands]\n" +"POOL COMMANDS\n" +" lspools list pools\n" +" mkpool [123[ 4]] create pool '\n" +" [with auid 123[and using crush rule 4]]\n" +" cppool copy content of a pool\n" +" rmpool [ --yes-i-really-really-mean-it]\n" +" remove pool '\n" +" df show per-pool and total usage\n" +" ls list objects in pool\n\n" +" chown 123 change the pool owner to auid 123\n" +"\n" +"OBJECT COMMANDS\n" +" get [outfile] fetch object\n" +" put [infile] write object\n" +" truncate length truncate object\n" +" create [category] create object\n" +" rm ... remove object(s)\n" +" cp [target-obj] copy object\n" +" clonedata clone object data\n" +" listxattr \n" +" getxattr attr\n" +" setxattr attr val\n" +" rmxattr attr\n" +" stat objname stat the named object\n" +" mapext \n" +" lssnap list snaps\n" +" mksnap create snap \n" +" rmsnap remove snap \n" +" rollback roll back object to snap \n" +"\n" +" listsnaps list the snapshots of this object\n" +" bench write|seq|rand [-t concurrent_operations] [--no-cleanup]\n" +" default is 16 concurrent IOs and 4 MB ops\n" +" default is to clean up after write benchmark\n" +" cleanup clean up a previous benchmark operation\n" +" load-gen [options] generate load on the cluster\n" +" listomapkeys list the keys in the object map\n" +" listomapvals list the keys and vals in the object map \n" +" getomapval show the value for the specified key\n" +" in the object's object map\n" +" setomapval \n" +" rmomapkey \n" +" getomapheader \n" +" setomapheader \n" +" listwatchers list the watchers of this object\n" +"\n" +"IMPORT AND EXPORT\n" +" import [options] \n" +" Upload to \n" +" export [options] rados-pool> \n" +" Download to \n" +" options:\n" +" -f / --force Copy everything, even if it hasn't changed.\n" +" -d / --delete-after After synchronizing, delete unreferenced\n" +" files or objects from the target bucket\n" +" or directory.\n" +" --workers Number of worker threads to spawn \n" +" (default " STR(DEFAULT_NUM_RADOS_WORKER_THREADS) ")\n" +"\n" +"ADVISORY LOCKS\n" +" lock list \n" +" List all advisory locks on an object\n" +" lock get \n" +" Try to acquire a lock\n" +" lock break \n" +" Try to break a lock acquired by another client\n" +" lock info \n" +" Show lock information\n" +" options:\n" +" --lock-tag Lock tag, all locks operation should use\n" +" the same tag\n" +" --lock-cookie Locker cookie\n" +" --lock-description Description of lock\n" +" --lock-duration Lock duration (in seconds)\n" +" --lock-type Lock type (shared, exclusive)\n" +"\n" +"GLOBAL OPTIONS:\n" +" --object_locator object_locator\n" +" set object_locator for operation\n" +" -p pool\n" +" --pool=pool\n" +" select given pool by name\n" +" --target-pool=pool\n" +" select target pool by name\n" +" -b op_size\n" +" set the size of write ops for put or benchmarking\n" +" -s name\n" +" --snap name\n" +" select given snap name for (read) IO\n" +" -i infile\n" +" -o outfile\n" +" specify input or output file (for certain commands)\n" +" --create\n" +" create the pool or directory that was specified\n" +" -N namespace\n" +" --namespace=namespace\n" +" specify the namespace to use for the object\n" +"\n" +"BENCH OPTIONS:\n" +" -t N\n" +" --concurrent-ios=N\n" +" Set number of concurrent I/O operations\n" +" --show-time\n" +" prefix output with date/time\n" +"\n" +"LOAD GEN OPTIONS:\n" +" --num-objects total number of objects\n" +" --min-object-size min object size\n" +" --max-object-size max object size\n" +" --min-ops min number of operations\n" +" --max-ops max number of operations\n" +" --max-backlog max backlog (in MB)\n" +" --percent percent of operations that are read\n" +" --target-throughput target throughput (in MB)\n" +" --run-length total time (in seconds)\n"; + +} + +static void usage_exit() +{ + usage(cerr); + exit(1); +} + +static int do_get(IoCtx& io_ctx, const char *objname, const char *outfile, unsigned op_size) +{ + string oid(objname); + + int fd; + if (strcmp(outfile, "-") == 0) { + fd = 1; + } else { + fd = TEMP_FAILURE_RETRY(::open(outfile, O_WRONLY|O_CREAT|O_TRUNC, 0644)); + if (fd < 0) { + int err = errno; + cerr << "failed to open file: " << cpp_strerror(err) << std::endl; + return -err; + } + } + + uint64_t offset = 0; + int ret; + while (true) { + bufferlist outdata; + ret = io_ctx.read(oid, outdata, op_size, offset); + if (ret <= 0) { + goto out; + } + ret = outdata.write_fd(fd); + if (ret < 0) { + cerr << "error writing to file: " << cpp_strerror(ret) << std::endl; + goto out; + } + if (outdata.length() < op_size) + break; + offset += outdata.length(); + } + ret = 0; + + out: + if (fd != 1) + TEMP_FAILURE_RETRY(::close(fd)); + return ret; +} + +static int do_copy(IoCtx& io_ctx, const char *objname, IoCtx& target_ctx, const char *target_obj) +{ + string oid(objname); + bufferlist outdata; + librados::ObjectReadOperation read_op; + string start_after; + +#define COPY_CHUNK_SIZE (4 * 1024 * 1024) + read_op.read(0, COPY_CHUNK_SIZE, &outdata, NULL); + + map attrset; + read_op.getxattrs(&attrset, NULL); + + bufferlist omap_header; + read_op.omap_get_header(&omap_header, NULL); + +#define OMAP_CHUNK 1000 + map omap; + read_op.omap_get_vals(start_after, OMAP_CHUNK, &omap, NULL); + + bufferlist opbl; + int ret = io_ctx.operate(oid, &read_op, &opbl); + if (ret < 0) { + return ret; + } + + librados::ObjectWriteOperation write_op; + string target_oid(target_obj); + + /* reset dest if exists */ + write_op.create(false); + write_op.remove(); + + write_op.write_full(outdata); + write_op.omap_set_header(omap_header); + + map::iterator iter; + for (iter = attrset.begin(); iter != attrset.end(); ++iter) { + write_op.setxattr(iter->first.c_str(), iter->second); + } + if (!omap.empty()) { + write_op.omap_set(omap); + } + ret = target_ctx.operate(target_oid, &write_op); + if (ret < 0) { + return ret; + } + + uint64_t off = 0; + + while (outdata.length() == COPY_CHUNK_SIZE) { + off += outdata.length(); + outdata.clear(); + ret = io_ctx.read(oid, outdata, COPY_CHUNK_SIZE, off); + if (ret < 0) + goto err; + + ret = target_ctx.write(target_oid, outdata, outdata.length(), off); + if (ret < 0) + goto err; + } + + /* iterate through source omap and update target. This is not atomic */ + while (omap.size() == OMAP_CHUNK) { + /* now start_after should point at the last entry */ + map::iterator iter = omap.end(); + --iter; + start_after = iter->first; + + omap.clear(); + ret = io_ctx.omap_get_vals(oid, start_after, OMAP_CHUNK, &omap); + if (ret < 0) + goto err; + + if (omap.empty()) + break; + + ret = target_ctx.omap_set(target_oid, omap); + if (ret < 0) + goto err; + } + + return 0; + +err: + target_ctx.remove(target_oid); + return ret; +} + +static int do_clone_data(IoCtx& io_ctx, const char *objname, IoCtx& target_ctx, const char *target_obj) +{ + string oid(objname); + + // get size + uint64_t size; + int r = target_ctx.stat(oid, &size, NULL); + if (r < 0) + return r; + + librados::ObjectWriteOperation write_op; + string target_oid(target_obj); + + /* reset data stream only */ + write_op.create(false); + write_op.truncate(0); + write_op.clone_range(0, oid, 0, size); + return target_ctx.operate(target_oid, &write_op); +} + +static int do_copy_pool(Rados& rados, const char *src_pool, const char *target_pool) +{ + IoCtx src_ctx, target_ctx; + int ret = rados.ioctx_create(src_pool, src_ctx); + if (ret < 0) { + cerr << "cannot open source pool: " << src_pool << std::endl; + return ret; + } + ret = rados.ioctx_create(target_pool, target_ctx); + if (ret < 0) { + cerr << "cannot open target pool: " << target_pool << std::endl; + return ret; + } + librados::ObjectIterator i = src_ctx.objects_begin(); + librados::ObjectIterator i_end = src_ctx.objects_end(); + for (; i != i_end; ++i) { + string oid = i->first; + string locator = i->second; + if (i->second.size()) + cout << src_pool << ":" << oid << "(@" << locator << ")" << " => " + << target_pool << ":" << oid << "(@" << locator << ")" << std::endl; + else + cout << src_pool << ":" << oid << " => " + << target_pool << ":" << oid << std::endl; + + + target_ctx.locator_set_key(locator); + ret = do_copy(src_ctx, oid.c_str(), target_ctx, oid.c_str()); + if (ret < 0) { + char buf[64]; + cerr << "error copying object: " << strerror_r(errno, buf, sizeof(buf)) << std::endl; + return ret; + } + } + + return 0; +} + +static int do_put(IoCtx& io_ctx, const char *objname, const char *infile, int op_size) +{ + string oid(objname); + bufferlist indata; + bool stdio = false; + if (strcmp(infile, "-") == 0) + stdio = true; + + int ret; + int fd = 0; + if (!stdio) + fd = open(infile, O_RDONLY); + if (fd < 0) { + char buf[80]; + cerr << "error reading input file " << infile << ": " << strerror_r(errno, buf, sizeof(buf)) << std::endl; + return 1; + } + char *buf = new char[op_size]; + int count = op_size; + uint64_t offset = 0; + while (count != 0) { + count = read(fd, buf, op_size); + if (count < 0) { + ret = -errno; + cerr << "error reading input file " << infile << ": " << cpp_strerror(ret) << std::endl; + goto out; + } + if (count == 0) { + if (!offset) { + ret = io_ctx.create(oid, true); + if (ret < 0) { + cerr << "WARNING: could not create object: " << oid << std::endl; + goto out; + } + } + continue; + } + indata.append(buf, count); + if (offset == 0) + ret = io_ctx.write_full(oid, indata); + else + ret = io_ctx.write(oid, indata, count, offset); + indata.clear(); + + if (ret < 0) { + goto out; + } + offset += count; + } + ret = 0; + out: + TEMP_FAILURE_RETRY(close(fd)); + delete[] buf; + return ret; +} + +class RadosWatchCtx : public librados::WatchCtx { + string name; +public: + RadosWatchCtx(const char *imgname) : name(imgname) {} + virtual ~RadosWatchCtx() {} + virtual void notify(uint8_t opcode, uint64_t ver, bufferlist& bl) { + string s; + try { + bufferlist::iterator iter = bl.begin(); + ::decode(s, iter); + } catch (buffer::error *err) { + cout << "could not decode bufferlist, buffer length=" << bl.length() << std::endl; + } + cout << name << " got notification opcode=" << (int)opcode << " ver=" << ver << " msg='" << s << "'" << std::endl; + } +}; + +static const char alphanum_table[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + +int gen_rand_alphanumeric(char *dest, int size) /* size should be the required string size + 1 */ +{ + int ret = get_random_bytes(dest, size); + if (ret < 0) { + cerr << "cannot get random bytes: " << cpp_strerror(-ret) << std::endl; + return -1; + } + + int i; + for (i=0; i objs; + + utime_t start_time; + + bool going_down; + +public: + int read_percent; + int num_objs; + size_t min_obj_len; + uint64_t max_obj_len; + size_t min_op_len; + size_t max_op_len; + size_t max_ops; + size_t max_backlog; + size_t target_throughput; + int run_length; + + enum { + OP_READ, + OP_WRITE, + }; + + struct LoadGenOp { + int id; + int type; + string oid; + size_t off; + size_t len; + bufferlist bl; + LoadGen *lg; + librados::AioCompletion *completion; + + LoadGenOp() {} + LoadGenOp(LoadGen *_lg) : lg(_lg), completion(NULL) {} + }; + + int max_op; + + map pending_ops; + + void gen_op(LoadGenOp *op); + uint64_t gen_next_op(); + void run_op(LoadGenOp *op); + + uint64_t cur_sent_rate() { + return total_sent / time_passed(); + } + + uint64_t cur_completed_rate() { + return total_completed / time_passed(); + } + + uint64_t total_expected() { + return target_throughput * time_passed(); + } + + float time_passed() { + utime_t now = ceph_clock_now(g_ceph_context); + now -= start_time; + uint64_t ns = now.nsec(); + float total = ns / 1000000000; + total += now.sec(); + return total; + } + + Mutex lock; + Cond cond; + + LoadGen(Rados *_rados) : rados(_rados), going_down(false), lock("LoadGen") { + read_percent = 80; + min_obj_len = 1024; + max_obj_len = 5ull * 1024ull * 1024ull * 1024ull; + min_op_len = 1024; + target_throughput = 5 * 1024 * 1024; // B/sec + max_op_len = 2 * 1024 * 1024; + max_backlog = target_throughput * 2; + run_length = 60; + + total_sent = 0; + total_completed = 0; + num_objs = 200; + max_op = 16; + } + int bootstrap(const char *pool); + int run(); + void cleanup(); + + void io_cb(completion_t c, LoadGenOp *op) { + total_completed += op->len; + + Mutex::Locker l(lock); + + double rate = (double)cur_completed_rate() / (1024 * 1024); + cout.precision(3); + cout << "op " << op->id << " completed, throughput=" << rate << "MB/sec" << std::endl; + + map::iterator iter = pending_ops.find(op->id); + if (iter != pending_ops.end()) + pending_ops.erase(iter); + + if (!going_down) + op->completion->release(); + + delete op; + + cond.Signal(); + } +}; + +static void _load_gen_cb(completion_t c, void *param) +{ + LoadGen::LoadGenOp *op = (LoadGen::LoadGenOp *)param; + op->lg->io_cb(c, op); +} + +int LoadGen::bootstrap(const char *pool) +{ + char buf[128]; + int i; + + if (!pool) { + cerr << "ERROR: pool name was not specified" << std::endl; + return -EINVAL; + } + + int ret = rados->ioctx_create(pool, io_ctx); + if (ret < 0) { + cerr << "error opening pool " << pool << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + return ret; + } + + int buf_len = 1; + bufferptr p = buffer::create(buf_len); + bufferlist bl; + memset(p.c_str(), 0, buf_len); + bl.push_back(p); + + list completions; + for (i = 0; i < num_objs; i++) { + obj_info info; + gen_rand_alphanumeric(buf, 16); + info.name = "obj-"; + info.name.append(buf); + info.len = get_random(min_obj_len, max_obj_len); + + // throttle... + while (completions.size() > max_ops) { + AioCompletion *c = completions.front(); + c->wait_for_complete(); + ret = c->get_return_value(); + c->release(); + completions.pop_front(); + if (ret < 0) { + cerr << "aio_write failed" << std::endl; + return ret; + } + } + + librados::AioCompletion *c = rados->aio_create_completion(NULL, NULL, NULL); + completions.push_back(c); + // generate object + ret = io_ctx.aio_write(info.name, c, bl, buf_len, info.len - buf_len); + if (ret < 0) { + cerr << "couldn't write obj: " << info.name << " ret=" << ret << std::endl; + return ret; + } + objs[i] = info; + } + + list::iterator iter; + for (iter = completions.begin(); iter != completions.end(); ++iter) { + AioCompletion *c = *iter; + c->wait_for_complete(); + ret = c->get_return_value(); + c->release(); + if (ret < 0) { // yes, we leak. + cerr << "aio_write failed" << std::endl; + return ret; + } + } + return 0; +} + +void LoadGen::run_op(LoadGenOp *op) +{ + op->completion = rados->aio_create_completion(op, _load_gen_cb, NULL); + + switch (op->type) { + case OP_READ: + io_ctx.aio_read(op->oid, op->completion, &op->bl, op->len, op->off); + break; + case OP_WRITE: + bufferptr p = buffer::create(op->len); + memset(p.c_str(), 0, op->len); + op->bl.push_back(p); + + io_ctx.aio_write(op->oid, op->completion, op->bl, op->len, op->off); + break; + } + + total_sent += op->len; +} + +void LoadGen::gen_op(LoadGenOp *op) +{ + int i = get_random(0, objs.size() - 1); + obj_info& info = objs[i]; + op->oid = info.name; + + size_t len = get_random(min_op_len, max_op_len); + if (len > info.len) + len = info.len; + size_t off = get_random(0, info.len); + + if (off + len > info.len) + off = info.len - len; + + op->off = off; + op->len = len; + + i = get_random(1, 100); + if (i > read_percent) + op->type = OP_WRITE; + else + op->type = OP_READ; + + cout << (op->type == OP_READ ? "READ" : "WRITE") << " : oid=" << op->oid << " off=" << op->off << " len=" << op->len << std::endl; +} + +uint64_t LoadGen::gen_next_op() +{ + lock.Lock(); + + LoadGenOp *op = new LoadGenOp(this); + gen_op(op); + op->id = max_op++; + pending_ops[op->id] = op; + + lock.Unlock(); + + run_op(op); + + return op->len; +} + +int LoadGen::run() +{ + start_time = ceph_clock_now(g_ceph_context); + utime_t end_time = start_time; + end_time += run_length; + utime_t stamp_time = start_time; + uint32_t total_sec = 0; + + while (1) { + lock.Lock(); + utime_t one_second(1, 0); + cond.WaitInterval(g_ceph_context, lock, one_second); + lock.Unlock(); + utime_t now = ceph_clock_now(g_ceph_context); + + if (now > end_time) + break; + + uint64_t expected = total_expected(); + lock.Lock(); + uint64_t sent = total_sent; + uint64_t completed = total_completed; + lock.Unlock(); + + if (now - stamp_time >= utime_t(1, 0)) { + double rate = (double)cur_completed_rate() / (1024 * 1024); + ++total_sec; + cout.precision(3); + cout << setw(5) << total_sec << ": throughput=" << rate << "MB/sec" << " pending data=" << sent - completed << std::endl; + stamp_time = now; + } + + while (sent < expected && + sent - completed < max_backlog && + pending_ops.size() < max_ops) { + sent += gen_next_op(); + } + } + + // get a reference to all pending requests + vector completions; + lock.Lock(); + going_down = true; + map::iterator iter; + for (iter = pending_ops.begin(); iter != pending_ops.end(); ++iter) { + LoadGenOp *op = iter->second; + completions.push_back(op->completion); + } + lock.Unlock(); + + cout << "waiting for all operations to complete" << std::endl; + + // now wait on all the pending requests + for (vector::iterator citer = completions.begin(); citer != completions.end(); ++citer) { + librados::AioCompletion *c = *citer; + c->wait_for_complete(); + c->release(); + } + + return 0; +} + +void LoadGen::cleanup() +{ + cout << "cleaning up objects" << std::endl; + map::iterator iter; + for (iter = objs.begin(); iter != objs.end(); ++iter) { + obj_info& info = iter->second; + int ret = io_ctx.remove(info.name); + if (ret < 0) + cerr << "couldn't remove obj: " << info.name << " ret=" << ret << std::endl; + } +} + + +class RadosBencher : public ObjBencher { + librados::AioCompletion **completions; + librados::Rados& rados; + librados::IoCtx& io_ctx; + librados::ObjectIterator oi; + bool iterator_valid; +protected: + int completions_init(int concurrentios) { + completions = new librados::AioCompletion *[concurrentios]; + return 0; + } + void completions_done() { + delete[] completions; + completions = NULL; + } + int create_completion(int slot, void (*cb)(void *, void*), void *arg) { + completions[slot] = rados.aio_create_completion((void *) arg, 0, cb); + + if (!completions[slot]) + return -EINVAL; + + return 0; + } + void release_completion(int slot) { + completions[slot]->release(); + completions[slot] = 0; + } + + int aio_read(const std::string& oid, int slot, bufferlist *pbl, size_t len) { + return io_ctx.aio_read(oid, completions[slot], pbl, len, 0); + } + + int aio_write(const std::string& oid, int slot, bufferlist& bl, size_t len) { + return io_ctx.aio_write(oid, completions[slot], bl, len, 0); + } + + int aio_remove(const std::string& oid, int slot) { + return io_ctx.aio_remove(oid, completions[slot]); + } + + int sync_read(const std::string& oid, bufferlist& bl, size_t len) { + return io_ctx.read(oid, bl, len, 0); + } + int sync_write(const std::string& oid, bufferlist& bl, size_t len) { + return io_ctx.write(oid, bl, len, 0); + } + + int sync_remove(const std::string& oid) { + return io_ctx.remove(oid); + } + + bool completion_is_done(int slot) { + return completions[slot]->is_safe(); + } + + int completion_wait(int slot) { + return completions[slot]->wait_for_safe_and_cb(); + } + int completion_ret(int slot) { + return completions[slot]->get_return_value(); + } + + bool get_objects(std::list* objects, int num) { + int count = 0; + + if (!iterator_valid) { + oi = io_ctx.objects_begin(); + iterator_valid = true; + } + + librados::ObjectIterator ei = io_ctx.objects_end(); + + if (oi == ei) { + iterator_valid = false; + return false; + } + + objects->clear(); + for ( ; oi != ei && count < num; ++oi) { + objects->push_back(oi->first); + ++count; + } + + return true; + } + +public: + RadosBencher(CephContext *cct_, librados::Rados& _r, librados::IoCtx& _i) + : ObjBencher(cct), completions(NULL), rados(_r), io_ctx(_i), iterator_valid(false) {} + ~RadosBencher() { } +}; + +static int do_lock_cmd(std::vector &nargs, + const std::map < std::string, std::string > &opts, + IoCtx *ioctx, + Formatter *formatter) +{ + char buf[128]; + + if (nargs.size() < 3) + usage_exit(); + + string cmd(nargs[1]); + string oid(nargs[2]); + + string lock_tag; + string lock_cookie; + string lock_description; + int lock_duration = 0; + ClsLockType lock_type = LOCK_EXCLUSIVE; + + map::const_iterator i; + i = opts.find("lock-tag"); + if (i != opts.end()) { + lock_tag = i->second; + } + i = opts.find("lock-cookie"); + if (i != opts.end()) { + lock_cookie = i->second; + } + i = opts.find("lock-description"); + if (i != opts.end()) { + lock_description = i->second; + } + i = opts.find("lock-duration"); + if (i != opts.end()) { + lock_duration = strtol(i->second.c_str(), NULL, 10); + } + i = opts.find("lock-type"); + if (i != opts.end()) { + const string& type_str = i->second; + if (type_str.compare("exclusive") == 0) { + lock_type = LOCK_EXCLUSIVE; + } else if (type_str.compare("shared") == 0) { + lock_type = LOCK_SHARED; + } else { + cerr << "unknown lock type was specified, aborting" << std::endl; + return -EINVAL; + } + } + + if (cmd.compare("list") == 0) { + list locks; + int ret = rados::cls::lock::list_locks(ioctx, oid, &locks); + if (ret < 0) { + cerr << "ERROR: rados_list_locks(): " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + return ret; + } + + formatter->open_object_section("object"); + formatter->dump_string("objname", oid); + formatter->open_array_section("locks"); + list::iterator iter; + for (iter = locks.begin(); iter != locks.end(); ++iter) { + formatter->open_object_section("lock"); + formatter->dump_string("name", *iter); + formatter->close_section(); + } + formatter->close_section(); + formatter->close_section(); + formatter->flush(cout); + return 0; + } + + if (nargs.size() < 4) + usage_exit(); + + string lock_name(nargs[3]); + + if (cmd.compare("info") == 0) { + map lockers; + ClsLockType type = LOCK_NONE; + string tag; + int ret = rados::cls::lock::get_lock_info(ioctx, oid, lock_name, &lockers, &type, &tag); + if (ret < 0) { + cerr << "ERROR: rados_lock_get_lock_info(): " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + return ret; + } + + formatter->open_object_section("lock"); + formatter->dump_string("name", lock_name); + formatter->dump_string("type", cls_lock_type_str(type)); + formatter->dump_string("tag", tag); + formatter->open_array_section("lockers"); + map::iterator iter; + for (iter = lockers.begin(); iter != lockers.end(); ++iter) { + const rados::cls::lock::locker_id_t& id = iter->first; + const rados::cls::lock::locker_info_t& info = iter->second; + formatter->open_object_section("locker"); + formatter->dump_stream("name") << id.locker; + formatter->dump_string("cookie", id.cookie); + formatter->dump_string("description", info.description); + formatter->dump_stream("expiration") << info.expiration; + formatter->dump_stream("addr") << info.addr; + formatter->close_section(); + } + formatter->close_section(); + formatter->close_section(); + formatter->flush(cout); + + return ret; + } else if (cmd.compare("get") == 0) { + rados::cls::lock::Lock l(lock_name); + l.set_cookie(lock_cookie); + l.set_tag(lock_tag); + l.set_duration(utime_t(lock_duration, 0)); + l.set_description(lock_description); + int ret; + switch (lock_type) { + case LOCK_SHARED: + ret = l.lock_shared(ioctx, oid); + break; + default: + ret = l.lock_exclusive(ioctx, oid); + } + if (ret < 0) { + cerr << "ERROR: failed locking: " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + return ret; + } + + return ret; + } + + if (nargs.size() < 5) + usage_exit(); + + if (cmd.compare("break") == 0) { + string locker(nargs[4]); + rados::cls::lock::Lock l(lock_name); + l.set_cookie(lock_cookie); + l.set_tag(lock_tag); + entity_name_t name; + if (!name.parse(locker)) { + cerr << "ERROR: failed to parse locker name (" << locker << ")" << std::endl; + return -EINVAL; + } + int ret = l.break_lock(ioctx, oid, name); + if (ret < 0) { + cerr << "ERROR: failed breaking lock: " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + return ret; + } + } else { + usage_exit(); + } + + return 0; +} + +/********************************************** + +**********************************************/ +static int rados_tool_common(const std::map < std::string, std::string > &opts, + std::vector &nargs) +{ + int ret; + bool create_pool = false; + const char *pool_name = NULL; + const char *target_pool_name = NULL; + string oloc, target_oloc, nspace; + int concurrent_ios = 16; + int op_size = 1 << 22; + bool cleanup = true; + const char *snapname = NULL; + snap_t snapid = CEPH_NOSNAP; + std::map::const_iterator i; + std::string category; + + uint64_t min_obj_len = 0; + uint64_t max_obj_len = 0; + uint64_t min_op_len = 0; + uint64_t max_op_len = 0; + uint64_t max_ops = 0; + uint64_t max_backlog = 0; + uint64_t target_throughput = 0; + int64_t read_percent = -1; + uint64_t num_objs = 0; + int run_length = 0; + + bool show_time = false; + + Formatter *formatter = NULL; + bool pretty_format = false; + + Rados rados; + IoCtx io_ctx; + + i = opts.find("create"); + if (i != opts.end()) { + create_pool = true; + } + i = opts.find("pool"); + if (i != opts.end()) { + pool_name = i->second.c_str(); + } + i = opts.find("target_pool"); + if (i != opts.end()) { + target_pool_name = i->second.c_str(); + } + i = opts.find("object_locator"); + if (i != opts.end()) { + oloc = i->second; + } + i = opts.find("target_locator"); + if (i != opts.end()) { + target_oloc = i->second; + } + i = opts.find("category"); + if (i != opts.end()) { + category = i->second; + } + i = opts.find("concurrent-ios"); + if (i != opts.end()) { + concurrent_ios = strtol(i->second.c_str(), NULL, 10); + } + i = opts.find("block-size"); + if (i != opts.end()) { + op_size = strtol(i->second.c_str(), NULL, 10); + } + i = opts.find("snap"); + if (i != opts.end()) { + snapname = i->second.c_str(); + } + i = opts.find("snapid"); + if (i != opts.end()) { + snapid = strtoll(i->second.c_str(), NULL, 10); + } + i = opts.find("min-object-size"); + if (i != opts.end()) { + min_obj_len = strtoll(i->second.c_str(), NULL, 10); + } + i = opts.find("max-object-size"); + if (i != opts.end()) { + max_obj_len = strtoll(i->second.c_str(), NULL, 10); + } + i = opts.find("min-op-len"); + if (i != opts.end()) { + min_op_len = strtoll(i->second.c_str(), NULL, 10); + } + i = opts.find("max-op-len"); + if (i != opts.end()) { + max_op_len = strtoll(i->second.c_str(), NULL, 10); + } + i = opts.find("max-ops"); + if (i != opts.end()) { + max_ops = strtoll(i->second.c_str(), NULL, 10); + } + i = opts.find("max-backlog"); + if (i != opts.end()) { + max_backlog = strtoll(i->second.c_str(), NULL, 10); + } + i = opts.find("target-throughput"); + if (i != opts.end()) { + target_throughput = strtoll(i->second.c_str(), NULL, 10); + } + i = opts.find("read-percent"); + if (i != opts.end()) { + read_percent = strtoll(i->second.c_str(), NULL, 10); + } + i = opts.find("num-objects"); + if (i != opts.end()) { + num_objs = strtoll(i->second.c_str(), NULL, 10); + } + i = opts.find("run-length"); + if (i != opts.end()) { + run_length = strtol(i->second.c_str(), NULL, 10); + } + i = opts.find("show-time"); + if (i != opts.end()) { + show_time = true; + } + i = opts.find("no-cleanup"); + if (i != opts.end()) { + cleanup = false; + } + i = opts.find("pretty-format"); + if (i != opts.end()) { + pretty_format = true; + } + i = opts.find("format"); + if (i != opts.end()) { + const char *format = i->second.c_str(); + if (strcmp(format, "xml") == 0) + formatter = new XMLFormatter(pretty_format); + else if (strcmp(format, "json") == 0) + formatter = new JSONFormatter(pretty_format); + else { + cerr << "unrecognized format: " << format << std::endl; + return -EINVAL; + } + } + i = opts.find("namespace"); + if (i != opts.end()) { + nspace = i->second; + } + + + // open rados + ret = rados.init_with_context(g_ceph_context); + if (ret) { + cerr << "couldn't initialize rados! error " << ret << std::endl; + ret = -1; + goto out; + } + + ret = rados.connect(); + if (ret) { + cerr << "couldn't connect to cluster! error " << ret << std::endl; + ret = -1; + goto out; + } + char buf[80]; + + if (create_pool && !pool_name) { + cerr << "--create-pool requested but pool_name was not specified!" << std::endl; + usage_exit(); + } + + if (create_pool) { + ret = rados.pool_create(pool_name, 0, 0); + if (ret < 0) { + cerr << "error creating pool " << pool_name << ": " + << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + } + + // open io context. + if (pool_name) { + ret = rados.ioctx_create(pool_name, io_ctx); + if (ret < 0) { + cerr << "error opening pool " << pool_name << ": " + << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + } + + // snapname? + if (snapname) { + ret = io_ctx.snap_lookup(snapname, &snapid); + if (ret < 0) { + cerr << "error looking up snap '" << snapname << "': " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + } + if (oloc.size()) { + io_ctx.locator_set_key(oloc); + } + if (!nspace.empty()) { + io_ctx.set_namespace(nspace); + } + if (snapid != CEPH_NOSNAP) { + string name; + ret = io_ctx.snap_get_name(snapid, &name); + if (ret < 0) { + cerr << "snapid " << snapid << " doesn't exist in pool " + << io_ctx.get_pool_name() << std::endl; + goto out; + } + io_ctx.snap_set_read(snapid); + cout << "selected snap " << snapid << " '" << snapname << "'" << std::endl; + } + + assert(!nargs.empty()); + + // list pools? + if (strcmp(nargs[0], "lspools") == 0) { + list vec; + rados.pool_list(vec); + for (list::iterator i = vec.begin(); i != vec.end(); ++i) + cout << *i << std::endl; + } + else if (strcmp(nargs[0], "df") == 0) { + // pools + list vec; + + if (!pool_name) + rados.pool_list(vec); + else + vec.push_back(pool_name); + + map > stats; + rados.get_pool_stats(vec, category, stats); + + if (!formatter) { + printf("%-15s %-15s" + "%12s %12s %12s %12s " + "%12s %12s %12s %12s %12s\n", + "pool name", + "category", + "KB", "objects", "clones", "degraded", + "unfound", "rd", "rd KB", "wr", "wr KB"); + } else { + formatter->open_object_section("stats"); + formatter->open_array_section("pools"); + } + for (map::iterator c = stats.begin(); c != stats.end(); ++c) { + const char *pool_name = c->first.c_str(); + stats_map& m = c->second; + if (formatter) { + formatter->open_object_section("pool"); + int64_t pool_id = rados.pool_lookup(pool_name); + formatter->dump_string("name", pool_name); + if (pool_id >= 0) + formatter->dump_format("id", "%lld", pool_id); + else + cerr << "ERROR: lookup_pg_pool_name for name=" << pool_name << " returned " << pool_id << std::endl; + formatter->open_array_section("categories"); + } + for (stats_map::iterator i = m.begin(); i != m.end(); ++i) { + const char *category = (i->first.size() ? i->first.c_str() : ""); + pool_stat_t& s = i->second; + if (!formatter) { + if (!*category) + category = "-"; + printf("%-15s " + "%-15s " + "%12lld %12lld %12lld %12lld" + "%12lld %12lld %12lld %12lld %12lld\n", + pool_name, + category, + (long long)s.num_kb, + (long long)s.num_objects, + (long long)s.num_object_clones, + (long long)s.num_objects_degraded, + (long long)s.num_objects_unfound, + (long long)s.num_rd, (long long)s.num_rd_kb, + (long long)s.num_wr, (long long)s.num_wr_kb); + } else { + formatter->open_object_section("category"); + if (category) + formatter->dump_string("name", category); + formatter->dump_format("size_bytes", "%lld", s.num_bytes); + formatter->dump_format("size_kb", "%lld", s.num_kb); + formatter->dump_format("num_objects", "%lld", s.num_objects); + formatter->dump_format("num_object_clones", "%lld", s.num_object_clones); + formatter->dump_format("num_object_copies", "%lld", s.num_object_copies); + formatter->dump_format("num_objects_missing_on_primary", "%lld", s.num_objects_missing_on_primary); + formatter->dump_format("num_objects_unfound", "%lld", s.num_objects_unfound); + formatter->dump_format("num_objects_degraded", "%lld", s.num_objects_degraded); + formatter->dump_format("read_bytes", "%lld", s.num_rd); + formatter->dump_format("read_kb", "%lld", s.num_rd_kb); + formatter->dump_format("write_bytes", "%lld", s.num_wr); + formatter->dump_format("write_kb", "%lld", s.num_wr_kb); + formatter->flush(cout); + } + if (formatter) { + formatter->close_section(); + } + } + if (formatter) { + formatter->close_section(); + formatter->close_section(); + formatter->flush(cout); + } + } + + // total + cluster_stat_t tstats; + rados.cluster_stat(tstats); + if (!formatter) { + printf(" total used %12lld %12lld\n", (long long unsigned)tstats.kb_used, + (long long unsigned)tstats.num_objects); + printf(" total avail %12lld\n", (long long unsigned)tstats.kb_avail); + printf(" total space %12lld\n", (long long unsigned)tstats.kb); + } else { + formatter->close_section(); + formatter->dump_format("total_objects", "%lld", (long long unsigned)tstats.num_objects); + formatter->dump_format("total_used", "%lld", (long long unsigned)tstats.kb_used); + formatter->dump_format("total_avail", "%lld", (long long unsigned)tstats.kb_avail); + formatter->dump_format("total_space", "%lld", (long long unsigned)tstats.kb); + formatter->close_section(); + formatter->flush(cout); + } + } + + else if (strcmp(nargs[0], "ls") == 0) { + if (!pool_name) { + cerr << "pool name was not specified" << std::endl; + ret = -1; + goto out; + } + + bool stdout = (nargs.size() < 2) || (strcmp(nargs[1], "-") == 0); + ostream *outstream; + if(stdout) + outstream = &cout; + else + outstream = new ofstream(nargs[1]); + + { + try { + librados::ObjectIterator i = io_ctx.objects_begin(); + librados::ObjectIterator i_end = io_ctx.objects_end(); + for (; i != i_end; ++i) { + if (i->second.size()) + *outstream << i->first << "\t" << i->second << std::endl; + else + *outstream << i->first << std::endl; + } + } + catch (const std::runtime_error& e) { + cerr << e.what() << std::endl; + ret = -1; + goto out; + } + } + if (!stdout) + delete outstream; + } + else if (strcmp(nargs[0], "chown") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + + uint64_t new_auid = strtol(nargs[1], 0, 10); + ret = io_ctx.set_auid(new_auid); + if (ret < 0) { + cerr << "error changing auid on pool " << io_ctx.get_pool_name() << ':' + << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + } else cerr << "changed auid on pool " << io_ctx.get_pool_name() + << " to " << new_auid << std::endl; + } + else if (strcmp(nargs[0], "mapext") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + string oid(nargs[1]); + std::map m; + ret = io_ctx.mapext(oid, 0, -1, m); + if (ret < 0) { + cerr << "mapext error on " << pool_name << "/" << oid << ": " << cpp_strerror(ret) << std::endl; + goto out; + } + std::map::iterator iter; + for (iter = m.begin(); iter != m.end(); ++iter) { + cout << hex << iter->first << "\t" << iter->second << dec << std::endl; + } + } + else if (strcmp(nargs[0], "stat") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + string oid(nargs[1]); + uint64_t size; + time_t mtime; + ret = io_ctx.stat(oid, &size, &mtime); + if (ret < 0) { + cerr << " error stat-ing " << pool_name << "/" << oid << ": " + << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } else { + cout << pool_name << "/" << oid + << " mtime " << mtime << ", size " << size << std::endl; + } + } + else if (strcmp(nargs[0], "get") == 0) { + if (!pool_name || nargs.size() < 3) + usage_exit(); + ret = do_get(io_ctx, nargs[1], nargs[2], op_size); + if (ret < 0) { + cerr << "error getting " << pool_name << "/" << nargs[1] << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + } + else if (strcmp(nargs[0], "put") == 0) { + if (!pool_name || nargs.size() < 3) + usage_exit(); + ret = do_put(io_ctx, nargs[1], nargs[2], op_size); + if (ret < 0) { + cerr << "error putting " << pool_name << "/" << nargs[1] << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + } + else if (strcmp(nargs[0], "truncate") == 0) { + if (!pool_name || nargs.size() < 3) + usage_exit(); + + string oid(nargs[1]); + long size = atol(nargs[2]); + if (size < 0) { + cerr << "error, cannot truncate to negative value" << std::endl; + usage_exit(); + } + ret = io_ctx.trunc(oid, size); + if (ret < 0) { + cerr << "error truncating oid " + << oid << " to " << size << ": " + << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + } else { + ret = 0; + } + } + else if (strcmp(nargs[0], "setxattr") == 0) { + if (!pool_name || nargs.size() < 4) + usage_exit(); + + string oid(nargs[1]); + string attr_name(nargs[2]); + string attr_val(nargs[3]); + + bufferlist bl; + bl.append(attr_val.c_str(), attr_val.length()); + + ret = io_ctx.setxattr(oid, attr_name.c_str(), bl); + if (ret < 0) { + cerr << "error setting xattr " << pool_name << "/" << oid << "/" << attr_name << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + else + ret = 0; + } + else if (strcmp(nargs[0], "getxattr") == 0) { + if (!pool_name || nargs.size() < 3) + usage_exit(); + + string oid(nargs[1]); + string attr_name(nargs[2]); + + bufferlist bl; + ret = io_ctx.getxattr(oid, attr_name.c_str(), bl); + if (ret < 0) { + cerr << "error getting xattr " << pool_name << "/" << oid << "/" << attr_name << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + else + ret = 0; + string s(bl.c_str(), bl.length()); + cout << s << std::endl; + } else if (strcmp(nargs[0], "rmxattr") == 0) { + if (!pool_name || nargs.size() < 3) + usage_exit(); + + string oid(nargs[1]); + string attr_name(nargs[2]); + + ret = io_ctx.rmxattr(oid, attr_name.c_str()); + if (ret < 0) { + cerr << "error removing xattr " << pool_name << "/" << oid << "/" << attr_name << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + } else if (strcmp(nargs[0], "listxattr") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + + string oid(nargs[1]); + map attrset; + bufferlist bl; + ret = io_ctx.getxattrs(oid, attrset); + if (ret < 0) { + cerr << "error getting xattr set " << pool_name << "/" << oid << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + + for (map::iterator iter = attrset.begin(); + iter != attrset.end(); ++iter) { + cout << iter->first << std::endl; + } + } else if (strcmp(nargs[0], "getomapheader") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + + string oid(nargs[1]); + + bufferlist header; + ret = io_ctx.omap_get_header(oid, &header); + if (ret < 0) { + cerr << "error getting omap header " << pool_name << "/" << oid + << ": " << cpp_strerror(ret) << std::endl; + goto out; + } else { + cout << "header (" << header.length() << " bytes) :\n"; + header.hexdump(cout); + cout << std::endl; + ret = 0; + } + } else if (strcmp(nargs[0], "setomapheader") == 0) { + if (!pool_name || nargs.size() < 3) + usage_exit(); + + string oid(nargs[1]); + string val(nargs[2]); + + bufferlist bl; + bl.append(val); + + ret = io_ctx.omap_set_header(oid, bl); + if (ret < 0) { + cerr << "error setting omap value " << pool_name << "/" << oid + << ": " << cpp_strerror(ret) << std::endl; + goto out; + } else { + ret = 0; + } + } else if (strcmp(nargs[0], "setomapval") == 0) { + if (!pool_name || nargs.size() < 4) + usage_exit(); + + string oid(nargs[1]); + string key(nargs[2]); + string val(nargs[3]); + + map values; + bufferlist bl; + bl.append(val); + values[key] = bl; + + ret = io_ctx.omap_set(oid, values); + if (ret < 0) { + cerr << "error setting omap value " << pool_name << "/" << oid << "/" + << key << ": " << cpp_strerror(ret) << std::endl; + goto out; + } else { + ret = 0; + } + } else if (strcmp(nargs[0], "getomapval") == 0) { + if (!pool_name || nargs.size() < 3) + usage_exit(); + + string oid(nargs[1]); + string key(nargs[2]); + set keys; + keys.insert(key); + + map values; + ret = io_ctx.omap_get_vals_by_keys(oid, keys, &values); + if (ret < 0) { + cerr << "error getting omap value " << pool_name << "/" << oid << "/" + << key << ": " << cpp_strerror(ret) << std::endl; + goto out; + } else { + ret = 0; + } + + if (values.size() && values.begin()->first == key) { + cout << " (length " << values.begin()->second.length() << ") : "; + values.begin()->second.hexdump(cout); + cout << std::endl; + } else { + cout << "No such key: " << pool_name << "/" << oid << "/" << key + << std::endl; + ret = -1; + goto out; + } + } else if (strcmp(nargs[0], "rmomapkey") == 0) { + if (!pool_name || nargs.size() < 3) + usage_exit(); + + string oid(nargs[1]); + string key(nargs[2]); + set keys; + keys.insert(key); + + ret = io_ctx.omap_rm_keys(oid, keys); + if (ret < 0) { + cerr << "error removing omap key " << pool_name << "/" << oid << "/" + << key << ": " << cpp_strerror(ret) << std::endl; + goto out; + } else { + ret = 0; + } + } else if (strcmp(nargs[0], "listomapvals") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + + string oid(nargs[1]); + string last_read = ""; + int MAX_READ = 512; + do { + map values; + ret = io_ctx.omap_get_vals(oid, last_read, MAX_READ, &values); + if (ret < 0) { + cerr << "error getting omap keys " << pool_name << "/" << oid << ": " + << cpp_strerror(ret) << std::endl; + return 1; + } + for (map::const_iterator it = values.begin(); + it != values.end(); ++it) { + // dump key in hex if it contains nonprintable characters + if (std::count_if(it->first.begin(), it->first.end(), + (int (*)(int))isprint) < (int)it->first.length()) { + cout << "key: (" << it->first.length() << " bytes):\n"; + bufferlist keybl; + keybl.append(it->first); + keybl.hexdump(cout); + } else { + cout << it->first; + } + cout << std::endl; + cout << "value: (" << it->second.length() << " bytes) :\n"; + it->second.hexdump(cout); + cout << std::endl; + } + } while (ret == MAX_READ); + ret = 0; + } + else if (strcmp(nargs[0], "cp") == 0) { + if (!pool_name) + usage_exit(); + + if (nargs.size() < 2 || nargs.size() > 3) + usage_exit(); + + const char *target = target_pool_name; + if (!target) + target = pool_name; + + const char *target_obj; + if (nargs.size() < 3) { + if (strcmp(target, pool_name) == 0) { + cerr << "cannot copy object into itself" << std::endl; + ret = -1; + goto out; + } + target_obj = nargs[1]; + } else { + target_obj = nargs[2]; + } + + // open io context. + IoCtx target_ctx; + ret = rados.ioctx_create(target, target_ctx); + if (ret < 0) { + cerr << "error opening target pool " << target << ": " + << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + if (target_oloc.size()) { + target_ctx.locator_set_key(target_oloc); + } + + ret = do_copy(io_ctx, nargs[1], target_ctx, target_obj); + if (ret < 0) { + cerr << "error copying " << pool_name << "/" << nargs[1] << " => " << target << "/" << target_obj << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + } + else if (strcmp(nargs[0], "clonedata") == 0) { + if (!pool_name) + usage_exit(); + + if (nargs.size() < 2 || nargs.size() > 3) + usage_exit(); + + const char *target = target_pool_name; + if (!target) + target = pool_name; + + const char *target_obj; + if (nargs.size() < 3) { + if (strcmp(target, pool_name) == 0) { + cerr << "cannot copy object into itself" << std::endl; + ret = -1; + goto out; + } + target_obj = nargs[1]; + } else { + target_obj = nargs[2]; + } + + // open io context. + IoCtx target_ctx; + ret = rados.ioctx_create(target, target_ctx); + if (ret < 0) { + cerr << "error opening target pool " << target << ": " + << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + if (oloc.size()) { + target_ctx.locator_set_key(oloc); + } else { + cerr << "must specify locator for clone" << std::endl; + ret = -1; + goto out; + } + + ret = do_clone_data(io_ctx, nargs[1], target_ctx, target_obj); + if (ret < 0) { + cerr << "error cloning " << pool_name << "/" << nargs[1] << " => " << target << "/" << target_obj << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + } else if (strcmp(nargs[0], "rm") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + vector::iterator iter = nargs.begin(); + ++iter; + for (; iter != nargs.end(); ++iter) { + const string & oid = *iter; + ret = io_ctx.remove(oid); + if (ret < 0) { + cerr << "error removing " << pool_name << "/" << oid << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + } + } + else if (strcmp(nargs[0], "create") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + string oid(nargs[1]); + if (nargs.size() > 2) { + string category(nargs[2]); + ret = io_ctx.create(oid, true, category); + } else { + ret = io_ctx.create(oid, true); + } + if (ret < 0) { + cerr << "error creating " << pool_name << "/" << oid << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + } + + else if (strcmp(nargs[0], "tmap") == 0) { + if (nargs.size() < 3) + usage_exit(); + if (strcmp(nargs[1], "dump") == 0) { + bufferlist outdata; + string oid(nargs[2]); + ret = io_ctx.read(oid, outdata, 0, 0); + if (ret < 0) { + cerr << "error reading " << pool_name << "/" << oid << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + bufferlist::iterator p = outdata.begin(); + bufferlist header; + map kv; + ::decode(header, p); + ::decode(kv, p); + cout << "header (" << header.length() << " bytes):\n"; + header.hexdump(cout); + cout << "\n"; + cout << kv.size() << " keys\n"; + for (map::iterator q = kv.begin(); q != kv.end(); ++q) { + cout << "key '" << q->first << "' (" << q->second.length() << " bytes):\n"; + q->second.hexdump(cout); + cout << "\n"; + } + } + else if (strcmp(nargs[1], "set") == 0 || + strcmp(nargs[1], "create") == 0) { + if (nargs.size() < 5) + usage_exit(); + string oid(nargs[2]); + string k(nargs[3]); + string v(nargs[4]); + bufferlist bl; + char c = (strcmp(nargs[1], "set") == 0) ? CEPH_OSD_TMAP_SET : CEPH_OSD_TMAP_CREATE; + ::encode(c, bl); + ::encode(k, bl); + ::encode(v, bl); + ret = io_ctx.tmap_update(oid, bl); + } + } + + else if (strcmp(nargs[0], "mkpool") == 0) { + int auid = 0; + __u8 crush_rule = 0; + if (nargs.size() < 2) + usage_exit(); + if (nargs.size() > 2) { + auid = strtol(nargs[2], 0, 10); + cerr << "setting auid:" << auid << std::endl; + if (nargs.size() > 3) { + crush_rule = (__u8)strtol(nargs[3], 0, 10); + cerr << "using crush rule " << (int)crush_rule << std::endl; + } + } + ret = rados.pool_create(nargs[1], auid, crush_rule); + if (ret < 0) { + cerr << "error creating pool " << nargs[1] << ": " + << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + cout << "successfully created pool " << nargs[1] << std::endl; + } + else if (strcmp(nargs[0], "cppool") == 0) { + if (nargs.size() != 3) + usage_exit(); + const char *src_pool = nargs[1]; + const char *target_pool = nargs[2]; + + if (strcmp(src_pool, target_pool) == 0) { + cerr << "cannot copy pool into itself" << std::endl; + ret = -1; + goto out; + } + + ret = do_copy_pool(rados, src_pool, target_pool); + if (ret < 0) { + cerr << "error copying pool " << src_pool << " => " << target_pool << ": " + << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + cout << "successfully copied pool " << nargs[1] << std::endl; + } + else if (strcmp(nargs[0], "rmpool") == 0) { + if (nargs.size() < 2) + usage_exit(); + if (nargs.size() < 4 || + strcmp(nargs[1], nargs[2]) != 0 || + strcmp(nargs[3], "--yes-i-really-really-mean-it") != 0) { + cerr << "WARNING:\n" + << " This will PERMANENTLY DESTROY an entire pool of objects with no way back.\n" + << " To confirm, pass the pool to remove twice, followed by\n" + << " --yes-i-really-really-mean-it" << std::endl; + ret = -1; + goto out; + } + ret = rados.pool_delete(nargs[1]); + if (ret >= 0) { + cout << "successfully deleted pool " << nargs[1] << std::endl; + } else { //error + cerr << "pool " << nargs[1] << " does not exist" << std::endl; + } + } + else if (strcmp(nargs[0], "lssnap") == 0) { + if (!pool_name || nargs.size() != 1) + usage_exit(); + + vector snaps; + io_ctx.snap_list(&snaps); + for (vector::iterator i = snaps.begin(); + i != snaps.end(); + ++i) { + string s; + time_t t; + if (io_ctx.snap_get_name(*i, &s) < 0) + continue; + if (io_ctx.snap_get_stamp(*i, &t) < 0) + continue; + struct tm bdt; + localtime_r(&t, &bdt); + cout << *i << "\t" << s << "\t"; + + cout.setf(std::ios::right); + cout.fill('0'); + cout << std::setw(4) << (bdt.tm_year+1900) + << '.' << std::setw(2) << (bdt.tm_mon+1) + << '.' << std::setw(2) << bdt.tm_mday + << ' ' + << std::setw(2) << bdt.tm_hour + << ':' << std::setw(2) << bdt.tm_min + << ':' << std::setw(2) << bdt.tm_sec + << std::endl; + cout.unsetf(std::ios::right); + } + cout << snaps.size() << " snaps" << std::endl; + } + + else if (strcmp(nargs[0], "mksnap") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + + ret = io_ctx.snap_create(nargs[1]); + if (ret < 0) { + cerr << "error creating pool " << pool_name << " snapshot " << nargs[1] + << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + cout << "created pool " << pool_name << " snap " << nargs[1] << std::endl; + } + + else if (strcmp(nargs[0], "rmsnap") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + + ret = io_ctx.snap_remove(nargs[1]); + if (ret < 0) { + cerr << "error removing pool " << pool_name << " snapshot " << nargs[1] + << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + cout << "removed pool " << pool_name << " snap " << nargs[1] << std::endl; + } + + else if (strcmp(nargs[0], "rollback") == 0) { + if (!pool_name || nargs.size() < 3) + usage_exit(); + + ret = io_ctx.rollback(nargs[1], nargs[2]); + if (ret < 0) { + cerr << "error rolling back pool " << pool_name << " to snapshot " << nargs[1] + << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + cout << "rolled back pool " << pool_name + << " to snapshot " << nargs[2] << std::endl; + } + else if (strcmp(nargs[0], "bench") == 0) { + if (!pool_name || nargs.size() < 3) + usage_exit(); + int seconds = atoi(nargs[1]); + int operation = 0; + if (strcmp(nargs[2], "write") == 0) + operation = OP_WRITE; + else if (strcmp(nargs[2], "seq") == 0) + operation = OP_SEQ_READ; + else if (strcmp(nargs[2], "rand") == 0) + operation = OP_RAND_READ; + else + usage_exit(); + RadosBencher bencher(g_ceph_context, rados, io_ctx); + bencher.set_show_time(show_time); + ret = bencher.aio_bench(operation, seconds, num_objs, + concurrent_ios, op_size, cleanup); + if (ret != 0) + cerr << "error during benchmark: " << ret << std::endl; + } + else if (strcmp(nargs[0], "cleanup") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + const char *prefix = nargs[1]; + RadosBencher bencher(g_ceph_context, rados, io_ctx); + ret = bencher.clean_up(prefix, concurrent_ios); + if (ret != 0) + cerr << "error during cleanup: " << ret << std::endl; + } + else if (strcmp(nargs[0], "watch") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + string oid(nargs[1]); + RadosWatchCtx ctx(oid.c_str()); + uint64_t cookie; + ret = io_ctx.watch(oid, 0, &cookie, &ctx); + if (ret != 0) + cerr << "error calling watch: " << ret << std::endl; + else { + cout << "press enter to exit..." << std::endl; + getchar(); + } + } + else if (strcmp(nargs[0], "notify") == 0) { + if (!pool_name || nargs.size() < 3) + usage_exit(); + string oid(nargs[1]); + string msg(nargs[2]); + bufferlist bl; + ::encode(msg, bl); + ret = io_ctx.notify(oid, 0, bl); + if (ret != 0) + cerr << "error calling notify: " << ret << std::endl; + } else if (strcmp(nargs[0], "load-gen") == 0) { + if (!pool_name) { + cerr << "error: must specify pool" << std::endl; + usage_exit(); + } + LoadGen lg(&rados); + if (min_obj_len) + lg.min_obj_len = min_obj_len; + if (max_obj_len) + lg.max_obj_len = max_obj_len; + if (min_op_len) + lg.min_op_len = min_op_len; + if (max_op_len) + lg.max_op_len = max_op_len; + if (max_ops) + lg.max_ops = max_ops; + if (max_backlog) + lg.max_backlog = max_backlog; + if (target_throughput) + lg.target_throughput = target_throughput << 20; + if (read_percent >= 0) + lg.read_percent = read_percent; + if (num_objs) + lg.num_objs = num_objs; + if (run_length) + lg.run_length = run_length; + + cout << "run length " << run_length << " seconds" << std::endl; + cout << "preparing " << lg.num_objs << " objects" << std::endl; + ret = lg.bootstrap(pool_name); + if (ret < 0) { + cerr << "load-gen bootstrap failed" << std::endl; + exit(1); + } + cout << "load-gen will run " << lg.run_length << " seconds" << std::endl; + lg.run(); + lg.cleanup(); + } else if (strcmp(nargs[0], "listomapkeys") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + + librados::ObjectReadOperation read; + set out_keys; + read.omap_get_keys("", LONG_MAX, &out_keys, &ret); + io_ctx.operate(nargs[1], &read, NULL); + if (ret < 0) { + cerr << "error getting omap key set " << pool_name << "/" + << nargs[1] << ": " << cpp_strerror(ret) << std::endl; + goto out; + } + + for (set::iterator iter = out_keys.begin(); + iter != out_keys.end(); ++iter) { + cout << *iter << std::endl; + } + } else if (strcmp(nargs[0], "lock") == 0) { + if (!pool_name) + usage_exit(); + + if (!formatter) { + formatter = new JSONFormatter(pretty_format); + } + ret = do_lock_cmd(nargs, opts, &io_ctx, formatter); + } else if (strcmp(nargs[0], "listwatchers") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + + string oid(nargs[1]); + std::list lw; + + ret = io_ctx.list_watchers(oid, &lw); + if (ret < 0) { + cerr << "error listing watchers " << pool_name << "/" << oid << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + else + ret = 0; + + for (std::list::iterator i = lw.begin(); i != lw.end(); ++i) { + cout << "watcher=" << i->addr << " client." << i->watcher_id << " cookie=" << i->cookie << std::endl; + } + } else if (strcmp(nargs[0], "listsnaps") == 0) { + if (!pool_name || nargs.size() < 2) + usage_exit(); + + string oid(nargs[1]); + snap_set_t ls; + + io_ctx.snap_set_read(LIBRADOS_SNAP_DIR); + ret = io_ctx.list_snaps(oid, &ls); + if (ret < 0) { + cerr << "error listing snap shots " << pool_name << "/" << oid << ": " << strerror_r(-ret, buf, sizeof(buf)) << std::endl; + goto out; + } + else + ret = 0; + + map snamemap; + if (formatter || pretty_format) { + vector snaps; + io_ctx.snap_list(&snaps); + for (vector::iterator i = snaps.begin(); + i != snaps.end(); ++i) { + string s; + if (io_ctx.snap_get_name(*i, &s) < 0) + continue; + snamemap.insert(pair(*i, s)); + } + } + + if (formatter) { + formatter->open_object_section("object"); + formatter->dump_string("name", oid); + formatter->open_array_section("clones"); + } else { + cout << oid << ":" << std::endl; + cout << "cloneid snaps size overlap" << std::endl; + } + + for (std::vector::iterator ci = ls.clones.begin(); + ci != ls.clones.end(); ++ci) { + + if (formatter) formatter->open_object_section("clone"); + + if (ci->cloneid == librados::SNAP_HEAD) { + if (formatter) + formatter->dump_string("id", "head"); + else + cout << "head"; + } else { + if (formatter) + formatter->dump_unsigned("id", ci->cloneid); + else + cout << ci->cloneid; + } + + if (formatter) + formatter->open_array_section("snapshots"); + else + cout << "\t"; + + if (!formatter && ci->snaps.empty()) { + cout << "-"; + } + for (std::vector::const_iterator snapindex = ci->snaps.begin(); + snapindex != ci->snaps.end(); ++snapindex) { + + map::iterator si; + + if (formatter || pretty_format) si = snamemap.find(*snapindex); + + if (formatter) { + formatter->open_object_section("snapshot"); + formatter->dump_unsigned("id", *snapindex); + if (si != snamemap.end()) + formatter->dump_string("name", si->second); + formatter->close_section(); //snapshot + } else { + if (snapindex != ci->snaps.begin()) cout << ","; + if (!pretty_format || (si == snamemap.end())) + cout << *snapindex; + else + cout << si->second << "(" << *snapindex << ")"; + } + } + + if (formatter) { + formatter->close_section(); //Snapshots + formatter->dump_unsigned("size", ci->size); + } else { + cout << "\t" << ci->size; + } + + if (ci->cloneid != librados::SNAP_HEAD) { + if (formatter) + formatter->open_array_section("overlaps"); + else + cout << "\t["; + + for (std::vector< std::pair >::iterator ovi = ci->overlap.begin(); + ovi != ci->overlap.end(); ++ovi) { + if (formatter) { + formatter->open_object_section("section"); + formatter->dump_unsigned("start", ovi->first); + formatter->dump_unsigned("length", ovi->second); + formatter->close_section(); //section + } else { + if (ovi != ci->overlap.begin()) cout << ","; + cout << ovi->first << "~" << ovi->second; + } + } + if (formatter) + formatter->close_section(); //overlaps + else + cout << "]" << std::endl; + } + if (formatter) formatter->close_section(); //clone + } + if (formatter) { + formatter->close_section(); //clones + formatter->close_section(); //object + formatter->flush(cout); + } else { + cout << std::endl; + } + + } else { + cerr << "unrecognized command " << nargs[0] << std::endl; + usage_exit(); + } + + if (ret < 0) + cerr << "error " << (-ret) << ": " << cpp_strerror(ret) << std::endl; + +out: + delete formatter; + return (ret < 0) ? 1 : 0; +} + +int main(int argc, const char **argv) +{ + vector args; + argv_to_vec(argc, argv, args); + env_to_vec(args); + + global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + common_init_finish(g_ceph_context); + + std::map < std::string, std::string > opts; + std::vector::iterator i; + std::string val; + for (i = args.begin(); i != args.end(); ) { + if (ceph_argparse_double_dash(args, i)) { + break; + } else if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { + usage(cout); + exit(0); + } else if (ceph_argparse_flag(args, i, "-f", "--force", (char*)NULL)) { + opts["force"] = "true"; + } else if (ceph_argparse_flag(args, i, "-d", "--delete-after", (char*)NULL)) { + opts["delete-after"] = "true"; + } else if (ceph_argparse_flag(args, i, "-C", "--create", "--create-pool", + (char*)NULL)) { + opts["create"] = "true"; + } else if (ceph_argparse_flag(args, i, "--pretty-format", (char*)NULL)) { + opts["pretty-format"] = "true"; + } else if (ceph_argparse_flag(args, i, "--show-time", (char*)NULL)) { + opts["show-time"] = "true"; + } else if (ceph_argparse_flag(args, i, "--no-cleanup", (char*)NULL)) { + opts["no-cleanup"] = "true"; + } else if (ceph_argparse_witharg(args, i, &val, "-p", "--pool", (char*)NULL)) { + opts["pool"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--target-pool", (char*)NULL)) { + opts["target_pool"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--object-locator" , (char *)NULL)) { + opts["object_locator"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--target-locator" , (char *)NULL)) { + opts["target_locator"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--category", (char*)NULL)) { + opts["category"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "-t", "--concurrent-ios", (char*)NULL)) { + opts["concurrent-ios"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--block-size", (char*)NULL)) { + opts["block-size"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "-b", (char*)NULL)) { + opts["block-size"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "-s", "--snap", (char*)NULL)) { + opts["snap"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "-S", "--snapid", (char*)NULL)) { + opts["snapid"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--min-object-size", (char*)NULL)) { + opts["min-object-size"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--max-object-size", (char*)NULL)) { + opts["max-object-size"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--min-op-len", (char*)NULL)) { + opts["min-op-len"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--max-op-len", (char*)NULL)) { + opts["max-op-len"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--max-ops", (char*)NULL)) { + opts["max-ops"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--max-backlog", (char*)NULL)) { + opts["max-backlog"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--target-throughput", (char*)NULL)) { + opts["target-throughput"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--read-percent", (char*)NULL)) { + opts["read-percent"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--num-objects", (char*)NULL)) { + opts["num-objects"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--run-length", (char*)NULL)) { + opts["run-length"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--workers", (char*)NULL)) { + opts["workers"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--format", (char*)NULL)) { + opts["format"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--lock-tag", (char*)NULL)) { + opts["lock-tag"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--lock-cookie", (char*)NULL)) { + opts["lock-cookie"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--lock-description", (char*)NULL)) { + opts["lock-description"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--lock-duration", (char*)NULL)) { + opts["lock-duration"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "--lock-type", (char*)NULL)) { + opts["lock-type"] = val; + } else if (ceph_argparse_witharg(args, i, &val, "-N", "--namespace", (char*)NULL)) { + opts["namespace"] = val; + } else { + if (val[0] == '-') + usage_exit(); + ++i; + } + } + + if (args.empty()) { + cerr << "rados: you must give an action. Try --help" << std::endl; + return 1; + } + if ((strcmp(args[0], "import") == 0) || (strcmp(args[0], "export") == 0)) + return rados_tool_sync(opts, args); + else + return rados_tool_common(opts, args); +} diff --git a/src/tools/rados/rados_export.cc b/src/tools/rados/rados_export.cc new file mode 100644 index 00000000000..bf6654114c5 --- /dev/null +++ b/src/tools/rados/rados_export.cc @@ -0,0 +1,229 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2011 New Dream Network + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ +#include "include/int_types.h" + +#include "rados_sync.h" +#include "common/errno.h" +#include "common/strtol.h" +#include "include/rados/librados.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "include/compat.h" +#include "common/xattr.h" + +using namespace librados; + +class ExportLocalFileWQ : public RadosSyncWQ { +public: + ExportLocalFileWQ(IoCtxDistributor *io_ctx_dist, time_t ti, + ThreadPool *tp, ExportDir *export_dir, bool force) + : RadosSyncWQ(io_ctx_dist, ti, 0, tp), + m_export_dir(export_dir), + m_force(force) + { + } +private: + void _process(std::string *s) { + IoCtx &io_ctx(m_io_ctx_dist->get_ioctx()); + int flags = 0; + auto_ptr sobj; + auto_ptr dobj; + const std::string &rados_name(*s); + std::list < std::string > only_in_a; + std::list < std::string > only_in_b; + std::list < std::string > diff; + int ret = BackedUpObject::from_rados(io_ctx, rados_name.c_str(), sobj); + if (ret) { + cerr << ERR_PREFIX << "couldn't get '" << rados_name << "' from rados: error " + << ret << std::endl; + _exit(ret); + } + std::string obj_path(sobj->get_fs_path(m_export_dir)); + if (m_force) { + flags |= (CHANGED_CONTENTS | CHANGED_XATTRS); + } + else { + ret = BackedUpObject::from_path(obj_path.c_str(), dobj); + if (ret == ENOENT) { + sobj->get_xattrs(only_in_a); + flags |= CHANGED_CONTENTS; + } + else if (ret) { + cerr << ERR_PREFIX << "BackedUpObject::from_path returned " + << ret << std::endl; + _exit(ret); + } + else { + sobj->xattr_diff(dobj.get(), only_in_a, only_in_b, diff); + if ((sobj->get_rados_size() == dobj->get_rados_size()) && + (sobj->get_mtime() == dobj->get_mtime())) { + flags |= CHANGED_CONTENTS; + } + } + } + if (flags & CHANGED_CONTENTS) { + ret = sobj->download(io_ctx, obj_path.c_str()); + if (ret) { + cerr << ERR_PREFIX << "download error: " << ret << std::endl; + _exit(ret); + } + } + diff.splice(diff.begin(), only_in_a); + for (std::list < std::string >::const_iterator x = diff.begin(); + x != diff.end(); ++x) { + flags |= CHANGED_XATTRS; + const Xattr *xattr = sobj->get_xattr(*x); + if (xattr == NULL) { + cerr << ERR_PREFIX << "internal error on line: " << __LINE__ << std::endl; + _exit(ret); + } + std::string xattr_fs_name(USER_XATTR_PREFIX); + xattr_fs_name += x->c_str(); + ret = ceph_os_setxattr(obj_path.c_str(), xattr_fs_name.c_str(), + xattr->data, xattr->len); + if (ret) { + ret = errno; + cerr << ERR_PREFIX << "setxattr error: " << cpp_strerror(ret) << std::endl; + _exit(ret); + } + } + for (std::list < std::string >::const_iterator x = only_in_b.begin(); + x != only_in_b.end(); ++x) { + flags |= CHANGED_XATTRS; + ret = ceph_os_removexattr(obj_path.c_str(), x->c_str()); + if (ret) { + ret = errno; + cerr << ERR_PREFIX << "removexattr error: " << cpp_strerror(ret) << std::endl; + _exit(ret); + } + } + if (m_force) { + cout << "[force] " << rados_name << std::endl; + } + else if (flags & CHANGED_CONTENTS) { + cout << "[exported] " << rados_name << std::endl; + } + else if (flags & CHANGED_XATTRS) { + cout << "[xattr] " << rados_name << std::endl; + } + } + ExportDir *m_export_dir; + bool m_force; +}; + +class ExportValidateExistingWQ : public RadosSyncWQ { +public: + ExportValidateExistingWQ(IoCtxDistributor *io_ctx_dist, time_t ti, + ThreadPool *tp, const char *dir_name) + : RadosSyncWQ(io_ctx_dist, ti, 0, tp), + m_dir_name(dir_name) + { + } +private: + void _process(std::string *s) { + IoCtx &io_ctx(m_io_ctx_dist->get_ioctx()); + auto_ptr lobj; + const std::string &local_name(*s); + int ret = BackedUpObject::from_file(local_name.c_str(), m_dir_name, lobj); + if (ret) { + cout << ERR_PREFIX << "BackedUpObject::from_file: delete loop: " + << "got error " << ret << std::endl; + _exit(ret); + } + auto_ptr robj; + ret = BackedUpObject::from_rados(io_ctx, lobj->get_rados_name(), robj); + if (ret == -ENOENT) { + // The entry doesn't exist on the remote server; delete it locally + char path[strlen(m_dir_name) + local_name.size() + 2]; + snprintf(path, sizeof(path), "%s/%s", m_dir_name, local_name.c_str()); + if (unlink(path)) { + ret = errno; + cerr << ERR_PREFIX << "error unlinking '" << path << "': " + << cpp_strerror(ret) << std::endl; + _exit(ret); + } + cout << "[deleted] " << "removed '" << local_name << "'" << std::endl; + } + else if (ret) { + cerr << ERR_PREFIX << "BackedUpObject::from_rados: delete loop: " + << "got error " << ret << std::endl; + _exit(ret); + } + } + const char *m_dir_name; +}; + +int do_rados_export(ThreadPool *tp, IoCtx& io_ctx, + IoCtxDistributor *io_ctx_dist, const char *dir_name, + bool create, bool force, bool delete_after) +{ + librados::ObjectIterator oi = io_ctx.objects_begin(); + librados::ObjectIterator oi_end = io_ctx.objects_end(); + auto_ptr export_dir; + export_dir.reset(ExportDir::create_for_writing(dir_name, 1, create)); + if (!export_dir.get()) + return -EIO; + ExportLocalFileWQ export_object_wq(io_ctx_dist, time(NULL), + tp, export_dir.get(), force); + for (; oi != oi_end; ++oi) { + export_object_wq.queue(new std::string((*oi).first)); + } + export_object_wq.drain(); + + if (delete_after) { + ExportValidateExistingWQ export_val_wq(io_ctx_dist, time(NULL), + tp, dir_name); + DirHolder dh; + int err = dh.opendir(dir_name); + if (err) { + cerr << ERR_PREFIX << "opendir(" << dir_name << ") error: " + << cpp_strerror(err) << std::endl; + return err; + } + while (true) { + struct dirent *de = readdir(dh.dp); + if (!de) + break; + if ((strcmp(de->d_name, ".") == 0) || (strcmp(de->d_name, "..") == 0)) + continue; + if (is_suffix(de->d_name, RADOS_SYNC_TMP_SUFFIX)) { + char path[strlen(dir_name) + strlen(de->d_name) + 2]; + snprintf(path, sizeof(path), "%s/%s", dir_name, de->d_name); + if (unlink(path)) { + int ret = errno; + cerr << ERR_PREFIX << "error unlinking temporary file '" << path << "': " + << cpp_strerror(ret) << std::endl; + return ret; + } + cout << "[deleted] " << "removed temporary file '" << de->d_name << "'" << std::endl; + continue; + } + export_val_wq.queue(new std::string(de->d_name)); + } + export_val_wq.drain(); + } + cout << "[done]" << std::endl; + return 0; +} diff --git a/src/tools/rados/rados_import.cc b/src/tools/rados/rados_import.cc new file mode 100644 index 00000000000..a6a398d767b --- /dev/null +++ b/src/tools/rados/rados_import.cc @@ -0,0 +1,239 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2011 New Dream Network + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ +#include "include/int_types.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rados_sync.h" +#include "common/errno.h" +#include "common/strtol.h" +#include "include/rados/librados.hpp" + +using namespace librados; +using std::auto_ptr; + +class ImportLocalFileWQ : public RadosSyncWQ { +public: + ImportLocalFileWQ(const char *dir_name, bool force, + IoCtxDistributor *io_ctx_dist, time_t ti, ThreadPool *tp) + : RadosSyncWQ(io_ctx_dist, ti, 0, tp), + m_dir_name(dir_name), + m_force(force) + { + } +private: + void _process(std::string *s) { + IoCtx &io_ctx(m_io_ctx_dist->get_ioctx()); + const std::string &local_name(*s); + auto_ptr sobj; + auto_ptr dobj; + std::list < std::string > only_in_a; + std::list < std::string > only_in_b; + std::list < std::string > diff; + int flags = 0; + + int ret = BackedUpObject::from_file(local_name.c_str(), + m_dir_name.c_str(), sobj); + if (ret) { + cerr << ERR_PREFIX << "BackedUpObject::from_file: got error " + << ret << std::endl; + _exit(ret); + } + const char *rados_name(sobj->get_rados_name()); + if (m_force) { + flags |= (CHANGED_CONTENTS | CHANGED_XATTRS); + } + else { + ret = BackedUpObject::from_rados(io_ctx, rados_name, dobj); + if (ret == -ENOENT) { + flags |= CHANGED_CONTENTS; + sobj->get_xattrs(only_in_a); + } + else if (ret) { + cerr << ERR_PREFIX << "BackedUpObject::from_rados returned " + << ret << std::endl; + _exit(ret); + } + else { + sobj->xattr_diff(dobj.get(), only_in_a, only_in_b, diff); + if ((sobj->get_rados_size() == dobj->get_rados_size()) && + (sobj->get_mtime() == dobj->get_mtime())) { + flags |= CHANGED_CONTENTS; + } + } + } + if (flags & CHANGED_CONTENTS) { + ret = sobj->upload(io_ctx, local_name.c_str(), m_dir_name.c_str()); + if (ret) { + cerr << ERR_PREFIX << "upload error: " << ret << std::endl; + _exit(ret); + } + } + for (std::list < std::string >::const_iterator x = only_in_a.begin(); + x != only_in_a.end(); ++x) { + flags |= CHANGED_XATTRS; + const Xattr *xattr = sobj->get_xattr(*x); + if (xattr == NULL) { + cerr << ERR_PREFIX << "internal error on line: " << __LINE__ << std::endl; + _exit(ret); + } + bufferlist bl; + bl.append(xattr->data, xattr->len); + ret = io_ctx.setxattr(rados_name, x->c_str(), bl); + if (ret < 0) { + ret = errno; + cerr << ERR_PREFIX << "io_ctx.setxattr(rados_name='" << rados_name + << "', xattr_name='" << x->c_str() << "'): " << cpp_strerror(ret) + << std::endl; + _exit(ret); + } + } + for (std::list < std::string >::const_iterator x = diff.begin(); + x != diff.end(); ++x) { + flags |= CHANGED_XATTRS; + const Xattr *xattr = sobj->get_xattr(*x); + if (xattr == NULL) { + cerr << ERR_PREFIX << "internal error on line: " << __LINE__ << std::endl; + _exit(ret); + } + bufferlist bl; + bl.append(xattr->data, xattr->len); + ret = io_ctx.rmxattr(rados_name, x->c_str()); + if (ret < 0) { + cerr << ERR_PREFIX << "io_ctx.rmxattr error2: " << cpp_strerror(ret) + << std::endl; + _exit(ret); + } + ret = io_ctx.setxattr(rados_name, x->c_str(), bl); + if (ret < 0) { + ret = errno; + cerr << ERR_PREFIX << "io_ctx.setxattr(rados_name='" << rados_name + << "', xattr='" << x->c_str() << "'): " << cpp_strerror(ret) << std::endl; + _exit(ret); + } + } + for (std::list < std::string >::const_iterator x = only_in_b.begin(); + x != only_in_b.end(); ++x) { + flags |= CHANGED_XATTRS; + ret = io_ctx.rmxattr(rados_name, x->c_str()); + if (ret < 0) { + ret = errno; + cerr << ERR_PREFIX << "rmxattr error3: " << cpp_strerror(ret) << std::endl; + _exit(ret); + } + } + if (m_force) { + cout << "[force] " << rados_name << std::endl; + } + else if (flags & CHANGED_CONTENTS) { + cout << "[imported] " << rados_name << std::endl; + } + else if (flags & CHANGED_XATTRS) { + cout << "[xattr] " << rados_name << std::endl; + } + } + std::string m_dir_name; + bool m_force; +}; + +class ImportValidateExistingWQ : public RadosSyncWQ { +public: + ImportValidateExistingWQ(ExportDir *export_dir, + IoCtxDistributor *io_ctx_dist, time_t ti, ThreadPool *tp) + : RadosSyncWQ(io_ctx_dist, ti, 0, tp), + m_export_dir(export_dir) + { + } +private: + void _process(std::string *s) { + IoCtx &io_ctx(m_io_ctx_dist->get_ioctx()); + const std::string &rados_name(*s); + auto_ptr robj; + int ret = BackedUpObject::from_rados(io_ctx, rados_name.c_str(), robj); + if (ret) { + cerr << ERR_PREFIX << "BackedUpObject::from_rados in delete loop " + << "returned " << ret << std::endl; + _exit(ret); + } + std::string obj_path(robj->get_fs_path(m_export_dir)); + auto_ptr lobj; + ret = BackedUpObject::from_path(obj_path.c_str(), lobj); + if (ret == ENOENT) { + ret = io_ctx.remove(rados_name); + if (ret && ret != -ENOENT) { + cerr << ERR_PREFIX << "io_ctx.remove(" << obj_path << ") failed " + << "with error " << ret << std::endl; + _exit(ret); + } + cout << "[deleted] " << "removed '" << rados_name << "'" << std::endl; + } + else if (ret) { + cerr << ERR_PREFIX << "BackedUpObject::from_path in delete loop " + << "returned " << ret << std::endl; + _exit(ret); + } + } + ExportDir *m_export_dir; +}; + +int do_rados_import(ThreadPool *tp, IoCtx &io_ctx, IoCtxDistributor* io_ctx_dist, + const char *dir_name, bool force, bool delete_after) +{ + auto_ptr export_dir; + export_dir.reset(ExportDir::from_file_system(dir_name)); + if (!export_dir.get()) + return -EIO; + DirHolder dh; + int ret = dh.opendir(dir_name); + if (ret) { + cerr << ERR_PREFIX << "opendir(" << dir_name << ") error: " + << cpp_strerror(ret) << std::endl; + return ret; + } + ImportLocalFileWQ import_file_wq(dir_name, force, + io_ctx_dist, time(NULL), tp); + while (true) { + struct dirent *de = readdir(dh.dp); + if (!de) + break; + if ((strcmp(de->d_name, ".") == 0) || (strcmp(de->d_name, "..") == 0)) + continue; + if (is_suffix(de->d_name, RADOS_SYNC_TMP_SUFFIX)) + continue; + import_file_wq.queue(new std::string(de->d_name)); + } + import_file_wq.drain(); + + if (delete_after) { + ImportValidateExistingWQ import_val_wq(export_dir.get(), io_ctx_dist, + time(NULL), tp); + librados::ObjectIterator oi = io_ctx.objects_begin(); + librados::ObjectIterator oi_end = io_ctx.objects_end(); + for (; oi != oi_end; ++oi) { + import_val_wq.queue(new std::string((*oi).first)); + } + import_val_wq.drain(); + } + cout << "[done]" << std::endl; + return 0; +} diff --git a/src/tools/rados/rados_sync.cc b/src/tools/rados/rados_sync.cc new file mode 100644 index 00000000000..03293d3402a --- /dev/null +++ b/src/tools/rados/rados_sync.cc @@ -0,0 +1,901 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2011 New Dream Network + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ +#include "include/int_types.h" + +#include "common/ceph_argparse.h" +#include "common/config.h" +#include "common/errno.h" +#include "common/strtol.h" +#include "global/global_context.h" +#include "global/global_init.h" +#include "include/rados/librados.hpp" +#include "rados_sync.h" +#include "include/compat.h" + +#include "common/xattr.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace librados; +using std::auto_ptr; + +static const char * const XATTR_RADOS_SYNC_VER = "user.rados_sync_ver"; +static const char * const XATTR_FULLNAME = "user.rados_full_name"; +const char USER_XATTR_PREFIX[] = "user.rados."; +static const size_t USER_XATTR_PREFIX_LEN = + sizeof(USER_XATTR_PREFIX) / sizeof(USER_XATTR_PREFIX[0]) - 1; +/* It's important that RADOS_SYNC_TMP_SUFFIX contain at least one character + * that we wouldn't normally alllow in a file name-- in this case, $ */ +const char RADOS_SYNC_TMP_SUFFIX[] = "$tmp"; +static const size_t RADOS_SYNC_TMP_SUFFIX_LEN = + sizeof(RADOS_SYNC_TMP_SUFFIX) / sizeof(RADOS_SYNC_TMP_SUFFIX[0]) - 1; + +std::string get_user_xattr_name(const char *fs_xattr_name) +{ + if (strncmp(fs_xattr_name, USER_XATTR_PREFIX, USER_XATTR_PREFIX_LEN)) + return ""; + return fs_xattr_name + USER_XATTR_PREFIX_LEN; +} + +bool is_suffix(const char *str, const char *suffix) +{ + size_t strlen_str = strlen(str); + size_t strlen_suffix = strlen(suffix); + if (strlen_str < strlen_suffix) + return false; + return (strcmp(str + (strlen_str - strlen_suffix), suffix) == 0); +} + +ExportDir* ExportDir::create_for_writing(const std::string &path, int version, + bool create) +{ + if (access(path.c_str(), R_OK | W_OK) == 0) { + return ExportDir::from_file_system(path); + } + if (!create) { + cerr << ERR_PREFIX << "ExportDir: directory '" + << path << "' does not exist. Use --create to create it." + << std::endl; + return NULL; + } + int ret = mkdir(path.c_str(), 0700); + if (ret < 0) { + int err = errno; + if (err != EEXIST) { + cerr << ERR_PREFIX << "ExportDir: mkdir error: " + << cpp_strerror(err) << std::endl; + return NULL; + } + } + char buf[32]; + snprintf(buf, sizeof(buf), "%d", version); + ret = ceph_os_setxattr(path.c_str(), XATTR_RADOS_SYNC_VER, buf, strlen(buf) + 1); + if (ret < 0) { + int err = errno; + cerr << ERR_PREFIX << "ExportDir: setxattr error :" + << cpp_strerror(err) << std::endl; + return NULL; + } + return new ExportDir(version, path); +} + +ExportDir* ExportDir::from_file_system(const std::string &path) +{ + if (access(path.c_str(), R_OK)) { + cerr << "ExportDir: source directory '" << path + << "' appears to be inaccessible." << std::endl; + return NULL; + } + int ret; + char buf[32]; + memset(buf, 0, sizeof(buf)); + ret = ceph_os_getxattr(path.c_str(), XATTR_RADOS_SYNC_VER, buf, sizeof(buf) - 1); + if (ret < 0) { + ret = errno; + if (ret == ENODATA) { + cerr << ERR_PREFIX << "ExportDir: directory '" << path + << "' does not appear to have been created by a rados " + << "export operation." << std::endl; + return NULL; + } + cerr << ERR_PREFIX << "ExportDir: getxattr error :" + << cpp_strerror(ret) << std::endl; + return NULL; + } + std::string err; + ret = strict_strtol(buf, 10, &err); + if (!err.empty()) { + cerr << ERR_PREFIX << "ExportDir: invalid value for " + << XATTR_RADOS_SYNC_VER << ": " << buf << ". parse error: " + << err << std::endl; + return NULL; + } + if (ret != 1) { + cerr << ERR_PREFIX << "ExportDir: can't handle any naming " + << "convention besides version 1. You must upgrade this program to " + << "handle the data in the new format." << std::endl; + return NULL; + } + return new ExportDir(ret, path); +} + +std::string ExportDir::get_fs_path(const std::string &rados_name) const +{ + static int HASH_LENGTH = 17; + size_t i; + size_t strlen_rados_name = strlen(rados_name.c_str()); + size_t sz; + bool need_hash = false; + if (strlen_rados_name > 200) { + sz = 200; + need_hash = true; + } + else { + sz = strlen_rados_name; + } + char fs_path[sz + HASH_LENGTH + 1]; + for (i = 0; i < sz; ++i) { + // Just replace anything that looks funny with an 'at' sign. + // Unicode also gets turned into 'at' signs. + signed char c = rados_name[i]; + if (c < 0x20) { + // Since c is signed, this also eliminates bytes with the high bit set + c = '@'; + need_hash = true; + } + else if (c == 0x7f) { + c = '@'; + need_hash = true; + } + else if (c == '/') { + c = '@'; + need_hash = true; + } + else if (c == '\\') { + c = '@'; + need_hash = true; + } + else if (c == '$') { + c = '@'; + need_hash = true; + } + else if (c == ' ') { + c = '_'; + need_hash = true; + } + fs_path[i] = c; + } + + if (need_hash) { + uint64_t hash = 17; + for (i = 0; i < strlen_rados_name; ++i) { + hash += (rados_name[i] * 33); + } + // The extra byte of length is because snprintf always NULL-terminates. + snprintf(fs_path + i, HASH_LENGTH + 1, "_%016" PRIx64, hash); + } + else { + // NULL-terminate. + fs_path[i] = '\0'; + } + + ostringstream oss; + oss << path << "/" << fs_path; + return oss.str(); +} + +ExportDir::ExportDir(int version_, const std::string &path_) + : version(version_), + path(path_) +{ +} + +DirHolder::DirHolder() + : dp(NULL) +{ +} + +DirHolder::~DirHolder() { + if (!dp) + return; + if (closedir(dp)) { + int err = errno; + cerr << ERR_PREFIX << "closedir failed: " << cpp_strerror(err) << std::endl; + } + dp = NULL; +} + +int DirHolder::opendir(const char *dir_name) { + dp = ::opendir(dir_name); + if (!dp) { + int err = errno; + return err; + } + return 0; +} + +static __thread int t_iod_idx = -1; + +static pthread_mutex_t io_ctx_distributor_lock = PTHREAD_MUTEX_INITIALIZER; + +IoCtxDistributor* IoCtxDistributor::instance() { + IoCtxDistributor *ret; + pthread_mutex_lock(&io_ctx_distributor_lock); + if (s_instance == NULL) { + s_instance = new IoCtxDistributor(); + } + ret = s_instance; + pthread_mutex_unlock(&io_ctx_distributor_lock); + return ret; +} + +int IoCtxDistributor::init(Rados &cluster, const char *pool_name, + int num_ioctxes) { + m_io_ctxes.resize(num_ioctxes); + for (std::vector::iterator i = m_io_ctxes.begin(); + i != m_io_ctxes.end(); ++i) { + IoCtx &io_ctx(*i); + int ret = cluster.ioctx_create(pool_name, io_ctx); + if (ret) { + return ret; + } + } + m_highest_iod_idx.set(0); + return 0; +} + +void IoCtxDistributor::clear() { + for (std::vector::iterator i = m_io_ctxes.begin(); + i != m_io_ctxes.end(); ++i) { + IoCtx &io_ctx(*i); + io_ctx.close(); + } + m_io_ctxes.clear(); + m_highest_iod_idx.set(0); +} + +IoCtx& IoCtxDistributor::get_ioctx() { + if (t_iod_idx == -1) { + t_iod_idx = m_highest_iod_idx.inc() - 1; + } + if (m_io_ctxes.size() <= (unsigned int)t_iod_idx) { + cerr << ERR_PREFIX << "IoCtxDistributor: logic error on line " + << __LINE__ << std::endl; + _exit(1); + } + return m_io_ctxes[t_iod_idx]; +} + +IoCtxDistributor *IoCtxDistributor::s_instance = NULL; + +IoCtxDistributor::IoCtxDistributor() { + clear(); +} + +IoCtxDistributor::~IoCtxDistributor() { + clear(); +} + +RadosSyncWQ::RadosSyncWQ(IoCtxDistributor *io_ctx_dist, time_t timeout, time_t suicide_timeout, ThreadPool *tp) + : ThreadPool::WorkQueue("FileStore::OpWQ", timeout, suicide_timeout, tp), + m_io_ctx_dist(io_ctx_dist) +{ +} + +bool RadosSyncWQ::_enqueue(std::string *s) { + m_items.push_back(s); + return true; +} + +void RadosSyncWQ::_dequeue(std::string *o) { + assert(0); +} + +bool RadosSyncWQ::_empty() { + return m_items.empty(); +} + +std::string *RadosSyncWQ::_dequeue() { + if (m_items.empty()) + return NULL; + std::string *ret = m_items.front(); + m_items.pop_front(); + return ret; +} + +void RadosSyncWQ::_process_finish(std::string *s) { + delete s; +} + +void RadosSyncWQ::_clear() { + for (std::deque::iterator i = m_items.begin(); + i != m_items.end(); ++i) { + delete *i; + } + m_items.clear(); +} + +Xattr::Xattr(char *data_, ssize_t len_) + : data(data_), len(len_) +{ +} + +Xattr::~Xattr() { + free(data); +} + +bool Xattr::operator==(const class Xattr &rhs) const { + if (len != rhs.len) + return false; + return (memcmp(data, rhs.data, len) == 0); +} + +bool Xattr::operator!=(const class Xattr &rhs) const { + return !((*this) == rhs); +} + +int BackedUpObject::from_file(const char *file_name, const char *dir_name, + std::auto_ptr &obj) +{ + char obj_path[strlen(dir_name) + strlen(file_name) + 2]; + snprintf(obj_path, sizeof(obj_path), "%s/%s", dir_name, file_name); + return BackedUpObject::from_path(obj_path, obj); +} + +int BackedUpObject::from_path(const char *path, std::auto_ptr &obj) +{ + int ret; + FILE *fp = fopen(path, "r"); + if (!fp) { + ret = errno; + if (ret != ENOENT) { + cerr << ERR_PREFIX << "BackedUpObject::from_path: error while trying to " + << "open '" << path << "': " << cpp_strerror(ret) << std::endl; + } + return ret; + } + int fd = fileno(fp); + struct stat st_buf; + memset(&st_buf, 0, sizeof(st_buf)); + ret = fstat(fd, &st_buf); + if (ret) { + ret = errno; + fclose(fp); + cerr << ERR_PREFIX << "BackedUpObject::from_path: error while trying " + << "to stat '" << path << "': " << cpp_strerror(ret) << std::endl; + return ret; + } + + // get fullname + ssize_t res = ceph_os_fgetxattr(fd, XATTR_FULLNAME, NULL, 0); + if (res <= 0) { + fclose(fp); + ret = errno; + if (res == 0) { + cerr << ERR_PREFIX << "BackedUpObject::from_path: found empty " + << XATTR_FULLNAME << " attribute on '" << path + << "'" << std::endl; + ret = ENODATA; + } else if (ret == ENODATA) { + cerr << ERR_PREFIX << "BackedUpObject::from_path: there was no " + << XATTR_FULLNAME << " attribute found on '" << path + << "'" << std::endl; + } else { + cerr << ERR_PREFIX << "getxattr error: " << cpp_strerror(ret) << std::endl; + } + return ret; + } + char rados_name_[res + 1]; + memset(rados_name_, 0, sizeof(rados_name_)); + res = ceph_os_fgetxattr(fd, XATTR_FULLNAME, rados_name_, res); + if (res < 0) { + ret = errno; + fclose(fp); + cerr << ERR_PREFIX << "BackedUpObject::getxattr(" << XATTR_FULLNAME + << ") error: " << cpp_strerror(ret) << std::endl; + return ret; + } + + BackedUpObject *o = new BackedUpObject(rados_name_, + st_buf.st_size, st_buf.st_mtime); + if (!o) { + fclose(fp); + return ENOBUFS; + } + ret = o->read_xattrs_from_file(fileno(fp)); + if (ret) { + fclose(fp); + cerr << ERR_PREFIX << "BackedUpObject::from_path(path = '" + << path << "): read_xattrs_from_file returned " << ret << std::endl; + delete o; + return ret; + } + + fclose(fp); + obj.reset(o); + return 0; +} + +int BackedUpObject::from_rados(IoCtx& io_ctx, const char *rados_name_, + auto_ptr &obj) +{ + uint64_t rados_size_ = 0; + time_t rados_time_ = 0; + int ret = io_ctx.stat(rados_name_, &rados_size_, &rados_time_); + if (ret == -ENOENT) { + // don't complain here about ENOENT + return ret; + } else if (ret < 0) { + cerr << ERR_PREFIX << "BackedUpObject::from_rados(rados_name_ = '" + << rados_name_ << "'): stat failed with error " << ret << std::endl; + return ret; + } + BackedUpObject *o = new BackedUpObject(rados_name_, rados_size_, rados_time_); + ret = o->read_xattrs_from_rados(io_ctx); + if (ret) { + cerr << ERR_PREFIX << "BackedUpObject::from_rados(rados_name_ = '" + << rados_name_ << "'): read_xattrs_from_rados returned " + << ret << std::endl; + delete o; + return ret; + } + obj.reset(o); + return 0; +} + +BackedUpObject::~BackedUpObject() +{ + for (std::map < std::string, Xattr* >::iterator x = xattrs.begin(); + x != xattrs.end(); ++x) + { + delete x->second; + x->second = NULL; + } + free(rados_name); +} + +std::string BackedUpObject::get_fs_path(const ExportDir *export_dir) const +{ + return export_dir->get_fs_path(rados_name); +} + +std::string BackedUpObject::xattrs_to_str() const +{ + ostringstream oss; + std::string prefix; + for (std::map < std::string, Xattr* >::const_iterator x = xattrs.begin(); + x != xattrs.end(); ++x) + { + char buf[x->second->len + 1]; + memcpy(buf, x->second->data, x->second->len); + buf[x->second->len] = '\0'; + oss << prefix << "{" << x->first << ":" << buf << "}"; + prefix = ", "; + } + return oss.str(); +} + +void BackedUpObject::xattr_diff(const BackedUpObject *rhs, + std::list < std::string > &only_in_a, + std::list < std::string > &only_in_b, + std::list < std::string > &diff) const +{ + only_in_a.clear(); + only_in_b.clear(); + diff.clear(); + for (std::map < std::string, Xattr* >::const_iterator x = xattrs.begin(); + x != xattrs.end(); ++x) + { + std::map < std::string, Xattr* >::const_iterator r = rhs->xattrs.find(x->first); + if (r == rhs->xattrs.end()) { + only_in_a.push_back(x->first); + } + else { + const Xattr &r_obj(*r->second); + const Xattr &x_obj(*x->second); + if (r_obj != x_obj) + diff.push_back(x->first); + } + } + for (std::map < std::string, Xattr* >::const_iterator r = rhs->xattrs.begin(); + r != rhs->xattrs.end(); ++r) + { + std::map < std::string, Xattr* >::const_iterator x = rhs->xattrs.find(r->first); + if (x == xattrs.end()) { + only_in_b.push_back(r->first); + } + } +} + +void BackedUpObject::get_xattrs(std::list < std::string > &xattrs_) const +{ + for (std::map < std::string, Xattr* >::const_iterator r = xattrs.begin(); + r != xattrs.end(); ++r) + { + xattrs_.push_back(r->first); + } +} + +const Xattr* BackedUpObject::get_xattr(const std::string name) const +{ + std::map < std::string, Xattr* >::const_iterator x = xattrs.find(name); + if (x == xattrs.end()) + return NULL; + else + return x->second; +} + +const char *BackedUpObject::get_rados_name() const { + return rados_name; +} + +uint64_t BackedUpObject::get_rados_size() const { + return rados_size; +} + +time_t BackedUpObject::get_mtime() const { + return rados_time; +} + +int BackedUpObject::download(IoCtx &io_ctx, const char *path) +{ + char tmp_path[strlen(path) + RADOS_SYNC_TMP_SUFFIX_LEN + 1]; + snprintf(tmp_path, sizeof(tmp_path), "%s%s", path, RADOS_SYNC_TMP_SUFFIX); + FILE *fp = fopen(tmp_path, "w"); + if (!fp) { + int err = errno; + cerr << ERR_PREFIX << "download: error opening '" << tmp_path << "':" + << cpp_strerror(err) << std::endl; + return err; + } + int fd = fileno(fp); + uint64_t off = 0; + static const int CHUNK_SZ = 32765; + while (true) { + bufferlist bl; + int rlen = io_ctx.read(rados_name, bl, CHUNK_SZ, off); + if (rlen < 0) { + cerr << ERR_PREFIX << "download: io_ctx.read(" << rados_name << ") returned " + << rlen << std::endl; + return rlen; + } + if (rlen < CHUNK_SZ) + off = 0; + else + off += rlen; + size_t flen = fwrite(bl.c_str(), 1, rlen, fp); + if (flen != (size_t)rlen) { + int err = errno; + cerr << ERR_PREFIX << "download: fwrite(" << tmp_path << ") error: " + << cpp_strerror(err) << std::endl; + fclose(fp); + return err; + } + if (off == 0) + break; + } + size_t attr_sz = strlen(rados_name) + 1; + int res = ceph_os_fsetxattr(fd, XATTR_FULLNAME, rados_name, attr_sz); + if (res) { + int err = errno; + cerr << ERR_PREFIX << "download: fsetxattr(" << tmp_path << ") error: " + << cpp_strerror(err) << std::endl; + fclose(fp); + return err; + } + if (fclose(fp)) { + int err = errno; + cerr << ERR_PREFIX << "download: fclose(" << tmp_path << ") error: " + << cpp_strerror(err) << std::endl; + return err; + } + if (rename(tmp_path, path)) { + int err = errno; + cerr << ERR_PREFIX << "download: rename(" << tmp_path << ", " + << path << ") error: " << cpp_strerror(err) << std::endl; + return err; + } + return 0; +} + +int BackedUpObject::upload(IoCtx &io_ctx, const char *file_name, const char *dir_name) +{ + char path[strlen(file_name) + strlen(dir_name) + 2]; + snprintf(path, sizeof(path), "%s/%s", dir_name, file_name); + FILE *fp = fopen(path, "r"); + if (!fp) { + int err = errno; + cerr << ERR_PREFIX << "upload: error opening '" << path << "': " + << cpp_strerror(err) << std::endl; + return err; + } + // Need to truncate RADOS object to size 0, in case there is + // already something there. + int ret = io_ctx.trunc(rados_name, 0); + if (ret) { + cerr << ERR_PREFIX << "upload: trunc failed with error " << ret << std::endl; + fclose(fp); + return ret; + } + uint64_t off = 0; + static const int CHUNK_SZ = 32765; + while (true) { + char buf[CHUNK_SZ]; + int flen = fread(buf, 1, CHUNK_SZ, fp); + if (flen < 0) { + int err = errno; + cerr << ERR_PREFIX << "upload: fread(" << file_name << ") error: " + << cpp_strerror(err) << std::endl; + fclose(fp); + return err; + } + if ((flen == 0) && (off != 0)) { + fclose(fp); + break; + } + // There must be a zero-copy way to do this? + bufferlist bl; + bl.append(buf, flen); + int rlen = io_ctx.write(rados_name, bl, flen, off); + if (rlen < 0) { + fclose(fp); + cerr << ERR_PREFIX << "upload: rados_write error: " << rlen << std::endl; + return rlen; + } + if (rlen != flen) { + fclose(fp); + cerr << ERR_PREFIX << "upload: rados_write error: short write" << std::endl; + return -EIO; + } + off += rlen; + if (flen < CHUNK_SZ) { + fclose(fp); + return 0; + } + } + return 0; +} + +BackedUpObject::BackedUpObject(const char *rados_name_, + uint64_t rados_size_, time_t rados_time_) + : rados_name(strdup(rados_name_)), + rados_size(rados_size_), + rados_time(rados_time_) +{ +} + +int BackedUpObject::read_xattrs_from_file(int fd) +{ + ssize_t blen = ceph_os_flistxattr(fd, NULL, 0); + if (blen > 0x1000000) { + cerr << ERR_PREFIX << "BackedUpObject::read_xattrs_from_file: unwilling " + << "to allocate a buffer of size " << blen << " on the stack for " + << "flistxattr." << std::endl; + return ENOBUFS; + } + char buf[blen + 1]; + memset(buf, 0, sizeof(buf)); + ssize_t blen2 = ceph_os_flistxattr(fd, buf, blen); + if (blen != blen2) { + cerr << ERR_PREFIX << "BackedUpObject::read_xattrs_from_file: xattrs changed while " + << "we were trying to " + << "list them? First length was " << blen << ", but now it's " << blen2 + << std::endl; + return EDOM; + } + const char *b = buf; + while (*b) { + size_t bs = strlen(b); + std::string xattr_name = get_user_xattr_name(b); + if (!xattr_name.empty()) { + ssize_t attr_len = ceph_os_fgetxattr(fd, b, NULL, 0); + if (attr_len < 0) { + int err = errno; + cerr << ERR_PREFIX << "BackedUpObject::read_xattrs_from_file: " + << "fgetxattr(rados_name = '" << rados_name << "', xattr_name='" + << xattr_name << "') failed: " << cpp_strerror(err) << std::endl; + return EDOM; + } + char *attr = (char*)malloc(attr_len); + if (!attr) { + cerr << ERR_PREFIX << "BackedUpObject::read_xattrs_from_file: " + << "malloc(" << attr_len << ") failed for xattr_name='" + << xattr_name << "'" << std::endl; + return ENOBUFS; + } + ssize_t attr_len2 = ceph_os_fgetxattr(fd, b, attr, attr_len); + if (attr_len2 < 0) { + int err = errno; + cerr << ERR_PREFIX << "BackedUpObject::read_xattrs_from_file: " + << "fgetxattr(rados_name = '" << rados_name << "', " + << "xattr_name='" << xattr_name << "') failed: " + << cpp_strerror(err) << std::endl; + free(attr); + return EDOM; + } + if (attr_len2 != attr_len) { + cerr << ERR_PREFIX << "BackedUpObject::read_xattrs_from_file: xattr " + << "changed while we were trying to get it? " + << "fgetxattr(rados_name = '"<< rados_name + << "', xattr_name='" << xattr_name << "') returned a different length " + << "than when we first called it! old_len = " << attr_len + << "new_len = " << attr_len2 << std::endl; + free(attr); + return EDOM; + } + xattrs[xattr_name] = new Xattr(attr, attr_len); + } + b += (bs + 1); + } + return 0; +} + +int BackedUpObject::read_xattrs_from_rados(IoCtx &io_ctx) +{ + map attrset; + int ret = io_ctx.getxattrs(rados_name, attrset); + if (ret) { + cerr << ERR_PREFIX << "BackedUpObject::read_xattrs_from_rados: " + << "getxattrs failed with error code " << ret << std::endl; + return ret; + } + for (map::iterator i = attrset.begin(); + i != attrset.end(); ) + { + bufferlist& bl(i->second); + char *data = (char*)malloc(bl.length()); + if (!data) + return ENOBUFS; + memcpy(data, bl.c_str(), bl.length()); + Xattr *xattr = new Xattr(data, bl.length()); + if (!xattr) { + free(data); + return ENOBUFS; + } + xattrs[i->first] = xattr; + attrset.erase(i++); + } + return 0; +} + +int rados_tool_sync(const std::map < std::string, std::string > &opts, + std::vector &args) +{ + int ret; + bool force = opts.count("force"); + bool delete_after = opts.count("delete-after"); + bool create = opts.count("create"); + + std::map < std::string, std::string >::const_iterator n = opts.find("workers"); + int num_threads; + if (n == opts.end()) { + num_threads = DEFAULT_NUM_RADOS_WORKER_THREADS; + } + else { + std::string err; + num_threads = strict_strtol(n->second.c_str(), 10, &err); + if (!err.empty()) { + cerr << "rados: can't parse number of worker threads given: " + << err << std::endl; + return 1; + } + if ((num_threads < 1) || (num_threads > 9000)) { + cerr << "rados: unreasonable value given for num_threads: " + << num_threads << std::endl; + return 1; + } + } + + + std::string action, src, dst; + std::vector::iterator i = args.begin(); + if ((i != args.end()) && + ((strcmp(*i, "import") == 0) || (strcmp(*i, "export") == 0))) { + action = *i; + ++i; + } + else { + cerr << "rados" << ": You must specify either 'import' or 'export'.\n"; + cerr << "Use --help to show help.\n"; + exit(1); + } + if (i != args.end()) { + src = *i; + ++i; + } + else { + cerr << "rados" << ": You must give a source.\n"; + cerr << "Use --help to show help.\n"; + exit(1); + } + if (i != args.end()) { + dst = *i; + ++i; + } + else { + cerr << "rados" << ": You must give a destination.\n"; + cerr << "Use --help to show help.\n"; + exit(1); + } + + // open rados + Rados rados; + if (rados.init_with_context(g_ceph_context) < 0) { + cerr << "rados" << ": failed to initialize Rados!" << std::endl; + exit(1); + } + if (rados.connect() < 0) { + cerr << "rados" << ": failed to connect to Rados cluster!" << std::endl; + exit(1); + } + IoCtx io_ctx; + std::string pool_name = (action == "import") ? dst : src; + ret = rados.ioctx_create(pool_name.c_str(), io_ctx); + if ((ret == -ENOENT) && (action == "import")) { + if (create) { + ret = rados.pool_create(pool_name.c_str()); + if (ret) { + cerr << "rados" << ": pool_create failed with error " << ret + << std::endl; + exit(ret); + } + ret = rados.ioctx_create(pool_name.c_str(), io_ctx); + } + else { + cerr << "rados" << ": pool '" << pool_name << "' does not exist. Use " + << "--create to try to create it." << std::endl; + exit(ENOENT); + } + } + if (ret < 0) { + cerr << "rados" << ": error opening pool " << pool_name << ": " + << cpp_strerror(ret) << std::endl; + exit(ret); + } + + IoCtxDistributor *io_ctx_dist = IoCtxDistributor::instance(); + ret = io_ctx_dist->init(rados, pool_name.c_str(), num_threads); + if (ret) { + cerr << ERR_PREFIX << "failed to initialize Rados io contexts." + << std::endl; + _exit(ret); + } + + ThreadPool thread_pool(g_ceph_context, "rados_sync_threadpool", num_threads); + thread_pool.start(); + + if (action == "import") { + ret = do_rados_import(&thread_pool, io_ctx, io_ctx_dist, src.c_str(), + force, delete_after); + thread_pool.stop(); + return ret; + } + else { + ret = do_rados_export(&thread_pool, io_ctx, io_ctx_dist, dst.c_str(), + create, force, delete_after); + thread_pool.stop(); + return ret; + } +} diff --git a/src/tools/rados/rados_sync.h b/src/tools/rados/rados_sync.h new file mode 100644 index 00000000000..0f7226e0239 --- /dev/null +++ b/src/tools/rados/rados_sync.h @@ -0,0 +1,217 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_RADOS_SYNC_H +#define CEPH_RADOS_SYNC_H + +#include +#include "include/atomic.h" +#include "common/WorkQueue.h" + +#include +#include + +namespace librados { + class IoCtx; + class Rados; +} + +extern const char USER_XATTR_PREFIX[]; +extern const char RADOS_SYNC_TMP_SUFFIX[]; +#define ERR_PREFIX "[ERROR] " +#define DEFAULT_NUM_RADOS_WORKER_THREADS 5 + +/* Linux seems to use ENODATA instead of ENOATTR when an extended attribute + * is missing */ +#ifndef ENOATTR +#define ENOATTR ENODATA +#endif + +enum { + CHANGED_XATTRS = 0x1, + CHANGED_CONTENTS = 0x2, +}; + +/** Given the name of an extended attribute from a file in the filesystem, + * returns an empty string if the extended attribute does not represent a rados + * user extended attribute. Otherwise, returns the name of the rados extended + * attribute. + * + * Rados user xattrs are prefixed with USER_XATTR_PREFIX. + */ +std::string get_user_xattr_name(const char *fs_xattr_name); + +/* Returns true if 'suffix' is a suffix of str */ +bool is_suffix(const char *str, const char *suffix); + +/** Represents a directory in the filesystem that we export rados objects to (or + * import them from.) + */ +class ExportDir +{ +public: + static ExportDir* create_for_writing(const std::string &path, int version, + bool create); + static ExportDir* from_file_system(const std::string &path); + + /* Given a rados object name, return something which looks kind of like the + * first part of the name. + * + * The actual file name that the backed-up object is stored in is irrelevant + * to rados_sync. The only reason to make it human-readable at all is to make + * things easier on sysadmins. The XATTR_FULLNAME extended attribute has the + * real, full object name. + * + * This function turns unicode into a bunch of 'at' signs. This could be + * fixed. If you try, be sure to handle all the multibyte characters + * correctly. + * I guess a better hash would be nice too. + */ + std::string get_fs_path(const std::string &rados_name) const; + +private: + ExportDir(int version_, const std::string &path_); + + int version; + std::string path; +}; + +/** Smart pointer wrapper for a DIR* + */ +class DirHolder { +public: + DirHolder(); + ~DirHolder(); + int opendir(const char *dir_name); + DIR *dp; +}; + +/** IoCtxDistributor is a singleton that distributes out IoCtx instances to + * different threads. + */ +class IoCtxDistributor +{ +public: + static IoCtxDistributor* instance(); + int init(librados::Rados &cluster, const char *pool_name, int num_ioctxes); + void clear(); + librados::IoCtx& get_ioctx(); +private: + static IoCtxDistributor *s_instance; + IoCtxDistributor(); + ~IoCtxDistributor(); + + ceph::atomic_t m_highest_iod_idx; + + /* NB: there might be some false sharing here that we could optimize + * away in the future */ + std::vector m_io_ctxes; +}; + +class RadosSyncWQ : public ThreadPool::WorkQueue { +public: + RadosSyncWQ(IoCtxDistributor *io_ctx_dist, time_t timeout, time_t suicide_timeout, ThreadPool *tp); +protected: + IoCtxDistributor *m_io_ctx_dist; +private: + bool _enqueue(std::string *s); + void _dequeue(std::string *o); + bool _empty(); + std::string *_dequeue(); + void _process_finish(std::string *s); + void _clear(); + std::deque m_items; +}; + +/* Stores a length and a chunk of malloc()ed data */ +class Xattr { +public: + Xattr(char *data_, ssize_t len_); + ~Xattr(); + bool operator==(const class Xattr &rhs) const; + bool operator!=(const class Xattr &rhs) const; + + char *data; + ssize_t len; +}; + +/* Represents an object that we are backing up */ +class BackedUpObject +{ +public: + static int from_file(const char *file_name, const char *dir_name, + std::auto_ptr &obj); + static int from_path(const char *path, std::auto_ptr &obj); + static int from_rados(librados::IoCtx& io_ctx, const char *rados_name_, + auto_ptr &obj); + ~BackedUpObject(); + + /* Get the mangled name for this rados object. */ + std::string get_fs_path(const ExportDir *export_dir) const; + + /* Convert the xattrs on this BackedUpObject to a kind of JSON-like string. + * This is only used for debugging. + * Note that we're assuming we can just treat the xattr data as a + * null-terminated string, which isn't true. Again, this is just for debugging, + * so it doesn't matter. + */ + std::string xattrs_to_str() const; + + /* Diff the extended attributes on this BackedUpObject with those found on a + * different BackedUpObject + */ + void xattr_diff(const BackedUpObject *rhs, + std::list < std::string > &only_in_a, + std::list < std::string > &only_in_b, + std::list < std::string > &diff) const; + + void get_xattrs(std::list < std::string > &xattrs_) const; + + const Xattr* get_xattr(const std::string name) const; + + const char *get_rados_name() const; + + uint64_t get_rados_size() const; + + time_t get_mtime() const; + + int download(librados::IoCtx &io_ctx, const char *path); + + int upload(librados::IoCtx &io_ctx, const char *file_name, const char *dir_name); + +private: + BackedUpObject(const char *rados_name_, uint64_t rados_size_, time_t rados_time_); + + int read_xattrs_from_file(int fd); + + int read_xattrs_from_rados(librados::IoCtx &io_ctx); + + // don't allow copying + BackedUpObject &operator=(const BackedUpObject &rhs); + BackedUpObject(const BackedUpObject &rhs); + + char *rados_name; + uint64_t rados_size; + uint64_t rados_time; + std::map < std::string, Xattr* > xattrs; +}; + +extern int do_rados_import(ThreadPool *tp, librados::IoCtx &io_ctx, + IoCtxDistributor* io_ctx_dist, const char *dir_name, + bool force, bool delete_after); +extern int do_rados_export(ThreadPool *tp, librados::IoCtx& io_ctx, + IoCtxDistributor *io_ctx_dist, const char *dir_name, + bool create, bool force, bool delete_after); + +#endif