From: Yunchuan Wen Date: Mon, 16 Nov 2015 02:40:30 +0000 (+0800) Subject: add rbd-nbd tool X-Git-Tag: v10.0.2~132^2~11 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=37f1e84e578c6317e5eb2fc072096a76b0a92418;p=ceph.git add rbd-nbd tool Signed-off-by: Yunchuan Wen Signed-off-by: Li Wang --- diff --git a/src/tools/Makefile-client.am b/src/tools/Makefile-client.am index 1764eac05583..9717345fa122 100644 --- a/src/tools/Makefile-client.am +++ b/src/tools/Makefile-client.am @@ -67,6 +67,12 @@ rbd_LDADD = \ $(BOOST_REGEX_LIBS) $(BOOST_PROGRAM_OPTIONS_LIBS) if LINUX bin_PROGRAMS += rbd + +rbd_nbd_SOURCES = tools/rbd_nbd/rbd-nbd.cc +rbd_nbd_CXXFLAGS = $(AM_CXXFLAGS) +rbd_nbd_LDADD = $(LIBRBD) $(LIBRADOS) $(CEPH_GLOBAL) $(BOOST_REGEX_LIBS) +bin_PROGRAMS += rbd-nbd + endif # LINUX endif # WITH_RBD diff --git a/src/tools/rbd_nbd/rbd-nbd.cc b/src/tools/rbd_nbd/rbd-nbd.cc new file mode 100644 index 000000000000..b86bcebdeee4 --- /dev/null +++ b/src/tools/rbd_nbd/rbd-nbd.cc @@ -0,0 +1,739 @@ +#include "include/int_types.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include "mon/MonClient.h" +#include "common/config.h" + +#include "common/errno.h" +#include "common/module.h" +#include "common/safe_io.h" +#include "common/ceph_argparse.h" +#include "common/Preforker.h" +#include "global/global_init.h" + +#include "include/rados/librados.hpp" +#include "include/rbd/librbd.hpp" + +static void usage() +{ + std::cout << "Usage: rbd-nbd [options] map Map a image to nbd device\n" + << " unmap Unmap nbd device\n" + << " list-mapped List mapped nbd devices\n" + << "Options: --device Specify nbd device path\n" + << " --read-only Map readonly\n" + << " --nbds_max Override for module param\n" + << std::endl; +} + +static Preforker forker; +static std::string devpath, poolname("rbd"), imgname, snapname; +static bool readonly = false; +static int nbds_max = 0; + +#ifdef CEPH_BIG_ENDIAN +#define ntohll(a) (a) +#elif defined(CEPH_LITTLE_ENDIAN) +#define ntohll(a) swab64(a) +#else +#error "Could not determine endianess" +#endif +#define htonll(a) ntohll(a) + +class NBDServer +{ +private: + int fd; + librbd::Image ℑ + +public: + NBDServer(int _fd, librbd::Image& _image) + : fd(_fd) + , image(_image) + , terminated(false) + , lock("NBDServer::Locker") + , reader_thread(*this, &NBDServer::reader_entry) + , writer_thread(*this, &NBDServer::writer_entry) + , started(false) + {} + +private: + atomic_t terminated; + + void shutdown() + { + if (terminated.compare_and_swap(false, true)) { + ::shutdown(fd, SHUT_RDWR); + + Mutex::Locker l(lock); + cond.Signal(); + } + } + + struct IOContext + { + xlist::item item; + NBDServer *server; + struct nbd_request request; + struct nbd_reply reply; + bufferlist data; + int command; + + IOContext() + : item(this) + {} + }; + + Mutex lock; + Cond cond; + xlist io_pending; + xlist io_finished; + + void io_start(IOContext *ctx) + { + Mutex::Locker l(lock); + io_pending.push_back(&ctx->item); + } + + void io_finish(IOContext *ctx) + { + Mutex::Locker l(lock); + assert(ctx->item.is_on_list()); + ctx->item.remove_myself(); + io_finished.push_back(&ctx->item); + cond.Signal(); + } + + IOContext *wait_io_finish() + { + Mutex::Locker l(lock); + while(io_finished.empty() && !terminated.read()) + cond.Wait(lock); + + if (io_finished.empty()) + return NULL; + + IOContext *ret = io_finished.front(); + io_finished.pop_front(); + + return ret; + } + + void wait_clean() + { + assert(!reader_thread.is_started()); + Mutex::Locker l(lock); + while(!io_pending.empty()) + cond.Wait(lock); + + while(!io_finished.empty()) { + ceph::unique_ptr free_ctx(io_finished.front()); + io_finished.pop_front(); + } + } + + static void aio_callback(librbd::completion_t cb, void *arg) + { + librbd::RBD::AioCompletion *aio_completion = + reinterpret_cast(cb); + + IOContext *ctx = reinterpret_cast(arg); + int ret = aio_completion->get_return_value(); + if (ret > 0) + ret = 0; + ctx->reply.error = htonl(ret); + ctx->server->io_finish(ctx); + + aio_completion->release(); + } + + void reader_entry() + { + while (!terminated.read()) { + ceph::unique_ptr ctx(new IOContext()); + ctx->server = this; + if (safe_read_exact(fd, &ctx->request, sizeof(struct nbd_request)) < 0) + return; + + if (ctx->request.magic != htonl(NBD_REQUEST_MAGIC)) + return; + + ctx->request.from = ntohll(ctx->request.from); + ctx->request.type = ntohl(ctx->request.type); + ctx->request.len = ntohl(ctx->request.len); + + ctx->reply.magic = htonl(NBD_REPLY_MAGIC); + memcpy(ctx->reply.handle, ctx->request.handle, sizeof(ctx->reply.handle)); + + ctx->command = ctx->request.type & 0x0000ffff; + + switch (ctx->command) + { + case NBD_CMD_DISC: + return; + case NBD_CMD_WRITE: + bufferptr ptr(ctx->request.len); + if (safe_read_exact(fd, ptr.c_str(), ctx->request.len) < 0) + return; + ctx->data.push_back(ptr); + break; + } + + IOContext *pctx = ctx.release(); + io_start(pctx); + librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(pctx, aio_callback); + switch (pctx->command) + { + case NBD_CMD_WRITE: + image.aio_write(pctx->request.from, pctx->request.len, pctx->data, c); + break; + case NBD_CMD_READ: + image.aio_read(pctx->request.from, pctx->request.len, pctx->data, c); + break; + case NBD_CMD_FLUSH: + image.aio_flush(c); + break; + case NBD_CMD_TRIM: + image.aio_discard(pctx->request.from, pctx->request.len, c); + break; + default: + return; + } + } + } + + void writer_entry() + { + while (!terminated.read()) { + ceph::unique_ptr ctx(wait_io_finish()); + if (!ctx) + return; + + if (safe_write(fd, &ctx->reply, sizeof(struct nbd_reply)) < 0) + return; + if (ctx->command == NBD_CMD_READ && ctx->reply.error == htonl(0)) { + if (ctx->data.write_fd(fd) < 0) + return; + } + } + } + + class ThreadHelper : public Thread + { + public: + typedef void (NBDServer::*entry_func)(); + private: + NBDServer &server; + entry_func func; + public: + ThreadHelper(NBDServer &_server, entry_func _func) + :server(_server) + ,func(_func) + {} + protected: + virtual void* entry() + { + (server.*func)(); + server.shutdown(); + return NULL; + } + } reader_thread, writer_thread; + + bool started; +public: + void start() + { + if (!started) { + started = true; + + reader_thread.create(); + writer_thread.create(); + } + } + + void stop() + { + if (started) { + shutdown(); + + reader_thread.join(); + writer_thread.join(); + + wait_clean(); + + started = false; + } + } + + ~NBDServer() + { + stop(); + } +}; + + +class NBDWatchCtx : public librados::WatchCtx2 +{ +private: + int fd; + librados::IoCtx &io_ctx; + librbd::Image ℑ + std::string header_oid; + unsigned long size; +public: + NBDWatchCtx(int _fd, + librados::IoCtx &_io_ctx, + librbd::Image &_image, + std::string &_header_oid, + unsigned long _size) + : fd(_fd) + , io_ctx(_io_ctx) + , image(_image) + , header_oid(_header_oid) + , size(_size) + { } + + virtual ~NBDWatchCtx() {} + + virtual void handle_notify(uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl) + { + librbd::image_info_t info; + if (image.stat(info, sizeof(info)) == 0) { + unsigned long new_size = info.size; + + if (new_size != size) { + if (ioctl(fd, BLKFLSBUF, NULL) < 0) + std::cerr << "rbd-nbd: invalidate page cache failed status: " << cpp_strerror(errno) << std::endl; + if (ioctl(fd, NBD_SET_SIZE, new_size) < 0) + std::cerr << "rbd-nbd: resize failed status: " << cpp_strerror(errno) << std::endl; + if (image.invalidate_cache() < 0) + std::cerr << "rbd-nbd: invalidate rbd cache failed" << std::endl; + size = new_size; + } + } + + bufferlist reply; + io_ctx.notify_ack(header_oid, notify_id, cookie, reply); + } + + virtual void handle_error(uint64_t cookie, int err) + { + //ignore + } +}; + +static int open_device(const char* path, bool try_load_moudle = false) +{ + int nbd = open(path, O_RDWR); + if (nbd < 0 && try_load_moudle && access("/sys/module/nbd", F_OK) != 0) { + int r; + if (nbds_max) { + ostringstream param; + param << "nbds_max=" << nbds_max; + r = module_load("nbd", param.str().c_str()); + } else { + r = module_load("nbd", NULL); + } + if (r < 0) { + cerr << "rbd-nbd: failed to load nbd kernel module: " << cpp_strerror(-r) << std::endl; + return r; + } + nbd = open(path, O_RDWR); + } + return nbd; +} + +static int do_map() +{ + int r; + + librados::Rados rados; + librbd::RBD rbd; + librados::IoCtx io_ctx; + librbd::Image image; + + int read_only; + unsigned long flags; + unsigned long size; + + int fd[2]; + int nbd; + int null_fd = -1; + + uint8_t old_format; + librbd::image_info_t info; + + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) == -1) { + r = -errno; + goto close_ret; + } + + if (devpath.empty()) { + char dev[64]; + int index = 0; + while (true) { + snprintf(dev, sizeof(dev), "/dev/nbd%d", index); + + nbd = open_device(dev, true); + if (nbd < 0) { + r = nbd; + cerr << "rbd-nbd: failed to find unused device" << std::endl; + goto close_fd; + } + + r = ioctl(nbd, NBD_SET_SOCK, fd[0]); + if (r < 0) { + close(nbd); + ++index; + continue; + } + + devpath = dev; + break; + } + } else { + nbd = open_device(devpath.c_str(), true); + if (nbd < 0) { + r = nbd; + cerr << "rbd-nbd: failed to open device: " << devpath << std::endl; + goto close_fd; + } + + r = ioctl(nbd, NBD_SET_SOCK, fd[0]); + if (r < 0) { + r = -errno; + cerr << "rbd-nbd: the device " << devpath << " is busy" << std::endl; + close(nbd); + goto close_fd; + } + } + + flags = NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_TRIM | NBD_FLAG_HAS_FLAGS; + if (!snapname.empty() || readonly) + flags |= NBD_FLAG_READ_ONLY; + + r = rados.init_with_context(g_ceph_context); + if (r < 0) + goto close_nbd; + + r = rados.connect(); + if (r < 0) + goto close_nbd; + + r = rados.ioctx_create(poolname.c_str(), io_ctx); + if (r < 0) + goto close_nbd; + + r = rbd.open(io_ctx, image, imgname.c_str()); + if (r < 0) + goto close_nbd; + + if (!snapname.empty()) { + r = image.snap_set(snapname.c_str()); + if (r < 0) + goto close_nbd; + } + + r = image.stat(info, sizeof(info)); + if (r < 0) + goto close_nbd; + + r = ioctl(nbd, NBD_SET_BLKSIZE, 512UL); + if (r < 0) { + r = -errno; + goto close_nbd; + } + + size = info.size; + r = ioctl(nbd, NBD_SET_SIZE, size); + if (r < 0) { + r = -errno; + goto close_nbd; + } + + ioctl(nbd, NBD_SET_FLAGS, flags); + + read_only = snapname.empty() ? 0 : 1; + r = ioctl(nbd, BLKROSET, (unsigned long) &read_only); + if (r < 0) { + r = -errno; + goto close_nbd; + } + + r = image.old_format(&old_format); + if (r < 0) + goto close_nbd; + + { + string header_oid; + uint64_t watcher; + + if (old_format != 0) { + header_oid = imgname + RBD_SUFFIX; + } else { + char prefix[RBD_MAX_BLOCK_NAME_SIZE + 1]; + strncpy(prefix, info.block_name_prefix, RBD_MAX_BLOCK_NAME_SIZE); + prefix[RBD_MAX_BLOCK_NAME_SIZE] = '\0'; + + std::string image_id(prefix + strlen(RBD_DATA_PREFIX)); + header_oid = RBD_HEADER_PREFIX + image_id; + } + + NBDWatchCtx watch_ctx(nbd, io_ctx, image, header_oid, info.size); + r = io_ctx.watch2(header_oid, &watcher, &watch_ctx); + if (r < 0) + goto close_nbd; + + if (g_conf->daemonize) { + r = open("/dev/null", O_RDWR); + if (r < 0) + goto close_watcher; + null_fd = r; + } + + cout << devpath << std::endl; + + if (g_conf->daemonize) { + forker.daemonize(); + + ::dup2(null_fd, STDIN_FILENO); + ::dup2(null_fd, STDOUT_FILENO); + ::dup2(null_fd, STDERR_FILENO); + close(null_fd); + } + + { + NBDServer server(fd[1], image); + + server.start(); + ioctl(nbd, NBD_DO_IT); + server.stop(); + } + +close_watcher: + io_ctx.unwatch2(watcher); + } + +close_nbd: + if (r < 0) { + ioctl(nbd, NBD_CLEAR_SOCK); + cerr << "rbd-nbd: failed to map, status: " << cpp_strerror(-r) << std::endl; + } + close(nbd); +close_fd: + close(fd[0]); + close(fd[1]); +close_ret: + image.close(); + io_ctx.close(); + rados.shutdown(); + return r; +} + +static int do_unmap() +{ + int nbd = open_device(devpath.c_str()); + if (nbd < 0) { + cerr << "rbd-nbd: failed to open device: " << devpath << std::endl; + return nbd; + } + + if (ioctl(nbd, NBD_DISCONNECT) < 0) + cerr << "rbd-nbd: the device is not used" << std::endl; + ioctl(nbd, NBD_CLEAR_SOCK); + close(nbd); + + return 0; +} + +static int parse_imgpath(const std::string &imgpath) +{ + boost::regex pattern("^(?:([^/@]+)/)?([^/@]+)(?:@([^/@]+))?$"); + boost::smatch match; + if (!boost::regex_match(imgpath, match, pattern)) { + std::cerr << "rbd-nbd: invalid spec '" << imgpath << "'" << std::endl; + return -EINVAL; + } + + if (match[1].matched) + poolname = match[1]; + + imgname = match[2]; + + if (match[3].matched) + snapname = match[3]; + + return 0; +} + +static void list_mapped_devices() +{ + char path[64]; + int m = 0; + int fd[2]; + + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) == -1) + return; + + while (true) { + snprintf(path, sizeof(path), "/dev/nbd%d", m); + int nbd = open_device(path); + if (nbd < 0) + break; + if (ioctl(nbd, NBD_SET_SOCK, fd[0]) != 0) + cout << path << std::endl; + else + ioctl(nbd, NBD_CLEAR_SOCK); + close(nbd); + m++; + } + + close(fd[0]); + close(fd[1]); +} + +static int rbd_nbd(int argc, const char *argv[]) +{ + int r; + enum { + None, + Connect, + Disconnect, + List + } cmd = None; + + vector args; + + argv_to_vec(argc, argv, args); + env_to_vec(args); + global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + + std::vector::iterator i; + + for (i = args.begin(); i != args.end(); ) { + if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { + usage(); + return 0; + } else if (ceph_argparse_witharg(args, i, &devpath, "--device", (char *)NULL)) { + } else if (ceph_argparse_witharg(args, i, &nbds_max, cerr, "--nbds_max", (char *)NULL)) { + } else if (ceph_argparse_flag(args, i, "--read-only", (char *)NULL)) { + readonly = true; + } else { + ++i; + } + } + + if (args.begin() != args.end()) { + if (strcmp(*args.begin(), "map") == 0) { + cmd = Connect; + } else if (strcmp(*args.begin(), "unmap") == 0) { + cmd = Disconnect; + } else if (strcmp(*args.begin(), "list-mapped") == 0) { + cmd = List; + } else { + cerr << "rbd-nbd: unknown command: " << *args.begin() << std::endl; + return EXIT_FAILURE; + } + args.erase(args.begin()); + } + + if (cmd == None) { + cerr << "rbd-nbd: must specify command" << std::endl; + return EXIT_FAILURE; + } + + switch (cmd) { + case Connect: + if (args.begin() == args.end()) { + cerr << "rbd-nbd: must specify image-or-snap-spec" << std::endl; + return EXIT_FAILURE; + } + if (parse_imgpath(string(*args.begin())) < 0) + return EXIT_FAILURE; + args.erase(args.begin()); + break; + case Disconnect: + if (args.begin() == args.end()) { + cerr << "rbd-nbd: must specify nbd device path" << std::endl; + return EXIT_FAILURE; + } + devpath = *args.begin(); + args.erase(args.begin()); + break; + default: + //shut up gcc; + break; + } + + if (args.begin() != args.end()) { + cerr << "rbd-nbd: unknown args: " << *args.begin() << std::endl; + return EXIT_FAILURE; + } + + switch (cmd) { + case Connect: + common_init_finish(g_ceph_context); + + if (imgname.empty()) { + cerr << "rbd-nbd: image name was not specified" << std::endl; + return EXIT_FAILURE; + } + + r = do_map(); + if (r < 0) + return EXIT_FAILURE; + break; + case Disconnect: + r = do_unmap(); + if (r < 0) + return EXIT_FAILURE; + break; + case List: + list_mapped_devices(); + break; + default: + usage(); + return EXIT_FAILURE; + } + + return 0; +} + +int main(int argc, const char *argv[]) +{ + std::string err; + + if (forker.prefork(err) < 0) { + cerr << err << std::endl; + return EXIT_FAILURE; + } + + if (forker.is_child()) { + forker.exit(rbd_nbd(argc, argv)); + } else if (forker.parent_wait(err) < 0) { + cerr << err << std::endl; + return EXIT_FAILURE; + } else { + return 0; + } +}