From: Mykola Golub Date: Sat, 19 Sep 2020 09:06:27 +0000 (+0100) Subject: rbd-nbd: introduce new attach and detach commands X-Git-Tag: v16.1.0~754^2~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7d75c1d6b8f281575dca20f0cfd7fe33a375596e;p=ceph.git rbd-nbd: introduce new attach and detach commands The commands allow to restart a daemon without destroying the nbd device. Now, if the netlink is used, a dead connection timeout is set on the nbd device setup, so the device is not immediately released if the rbd-nbd process terminates without disconnect (unmap). The attach command just sends terminate signal to the rbd-nbd process. The detach command starts a new process and connects to the existing device. Signed-off-by: Mykola Golub --- diff --git a/qa/workunits/rbd/rbd-nbd.sh b/qa/workunits/rbd/rbd-nbd.sh index db425bbee458..fce369212141 100755 --- a/qa/workunits/rbd/rbd-nbd.sh +++ b/qa/workunits/rbd/rbd-nbd.sh @@ -64,7 +64,7 @@ function cleanup() set +e - mount | fgrep ${TEMPDIR}/mnt && umount ${TEMPDIR}/mnt + mount | fgrep ${TEMPDIR}/mnt && _sudo umount -f ${TEMPDIR}/mnt rm -Rf ${TEMPDIR} if [ -n "${DEV}" ] @@ -311,7 +311,31 @@ rbd snap create ${POOL}/${IMAGE}@quiesce3 _sudo dd if=${DATA} of=${TEMPDIR}/mnt/test bs=1M count=1 oflag=direct _sudo umount ${TEMPDIR}/mnt unmap_device ${DEV} ${PID} +DEV= cat ${LOG_FILE} expect_false grep 'quiesce failed' ${LOG_FILE} +# test detach/attach +DEV=`_sudo rbd-nbd map --try-netlink ${POOL}/${IMAGE}` +get_pid +_sudo mount ${DEV} ${TEMPDIR}/mnt +_sudo rbd-nbd detach ${POOL}/${IMAGE} +attached= +for i in `seq 10`; do + if _sudo rbd-nbd attach --device ${DEV} ${POOL}/${IMAGE}; then + attached=1 + break + fi + rbd-nbd list-mapped + ps auxww | grep rbd-nbd + sleep 1 +done +test "${attached}" = 1 +get_pid +ls ${TEMPDIR}/mnt/ +dd if=${TEMPDIR}/mnt/test of=/dev/null bs=1M count=1 +_sudo dd if=${DATA} of=${TEMPDIR}/mnt/test1 bs=1M count=1 oflag=direct +_sudo umount ${TEMPDIR}/mnt +unmap_device ${DEV} ${PID} + echo OK diff --git a/src/tools/rbd_nbd/rbd-nbd.cc b/src/tools/rbd_nbd/rbd-nbd.cc index 5f09fd1b3915..6ca68a49c475 100644 --- a/src/tools/rbd_nbd/rbd-nbd.cc +++ b/src/tools/rbd_nbd/rbd-nbd.cc @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -39,11 +40,19 @@ #include #include +#if __has_include() +#include +namespace fs = std::filesystem; +#else +#include +namespace fs = std::experimental::filesystem; +#endif #include #include #include #include #include +#include #include "common/Formatter.h" #include "common/Preforker.h" @@ -72,6 +81,15 @@ #undef dout_prefix #define dout_prefix *_dout << "rbd-nbd: " +enum Command { + None, + Map, + Unmap, + Attach, + Detach, + List +}; + struct Config { int nbds_max = 0; int max_part = 255; @@ -92,14 +110,34 @@ struct Config { std::string format; bool pretty_format = false; + + Command command = None; + int pid = 0; + + std::string image_spec() const { + std::string spec = poolname + "/"; + + if (!nsname.empty()) { + spec += "/" + nsname; + } + spec += imgname; + + if (!snapname.empty()) { + spec += "@" + snapname; + } + + return spec; + } }; static void usage() { - std::cout << "Usage: rbd-nbd [options] map Map an image to nbd device\n" - << " unmap Unmap nbd device\n" - << " [options] list-mapped List mapped nbd devices\n" - << "Map options:\n" + std::cout << "Usage: rbd-nbd [options] map Map image to nbd device\n" + << " detach Detach image from nbd device\n" + << " [options] attach Attach image to nbd device\n" + << " unmap Unmap nbd device\n" + << " [options] list-mapped List mapped nbd devices\n" + << "Map and attach options:\n" << " --device Specify nbd device path (/dev/nbd{num})\n" << " --exclusive Forbid writes by other clients\n" << " --max_part Override for module param max_part\n" @@ -120,15 +158,7 @@ static void usage() static int nbd = -1; static int nbd_index = -1; - -enum Command { - None, - Connect, - Disconnect, - List -}; - -static Command cmd = None; +static bool nbd_terminate = false; #define RBD_NBD_BLKSIZE 512UL @@ -145,7 +175,7 @@ static Command cmd = None; #define htonll(a) ntohll(a) static int parse_args(vector& args, std::ostream *err_msg, - Command *command, Config *cfg); + Config *cfg); static int netlink_resize(int nbd_index, uint64_t size); static int run_quiesce_hook(const std::string &quiesce_hook, @@ -183,6 +213,10 @@ public: } } + Config *get_cfg() const { + return cfg; + } + private: ceph::mutex disconnect_lock = ceph::make_mutex("NBDServer::DisconnectLocker"); @@ -300,13 +334,43 @@ private: void reader_entry() { + struct pollfd poll_fds[1]; + memset(poll_fds, 0, sizeof(struct pollfd)); + poll_fds[0].fd = fd; + poll_fds[0].events = POLLIN; + while (!terminated) { std::unique_ptr ctx(new IOContext()); ctx->server = this; dout(20) << __func__ << ": waiting for nbd request" << dendl; - int r = safe_read_exact(fd, &ctx->request, sizeof(struct nbd_request)); + // We can not block on read because we have to check periodically + // nbd_terminate flag set by the signal handler. + // So to prevent blocking use poll with timeout to ensure we + // have some data comes before calling read. + + int r = poll(poll_fds, 1, 1000); + if (r == -1) { + if (errno == EINTR) { + continue; + } + r = -errno; + derr << "failed to poll nbd: " << cpp_strerror(r) << dendl; + goto signal; + } + + if (nbd_terminate) { + dout(0) << "terminate received" << dendl; + goto signal; + } + + if ((poll_fds[0].revents & POLLIN) == 0) { + dout(20) << __func__ << ": poll timed out" << dendl; + continue; + } + + r = safe_read_exact(fd, &ctx->request, sizeof(struct nbd_request)); if (r < 0) { derr << "failed to read nbd request header: " << cpp_strerror(r) << dendl; @@ -591,6 +655,9 @@ std::ostream &operator<<(std::ostream &os, const NBDServer::IOContext &ctx) { case NBD_CMD_TRIM: os << " TRIM "; break; + case NBD_CMD_DISC: + os << " DISC "; + break; default: os << " UNKNOWN(" << ctx.command << ") "; break; @@ -682,7 +749,7 @@ public: class NBDListIterator { public: - bool get(int *pid, Config *cfg) { + bool get(Config *cfg) { while (true) { std::string nbd_path = "/sys/block/nbd" + stringify(m_index); if(access(nbd_path.c_str(), F_OK) != 0) { @@ -692,26 +759,52 @@ public: *cfg = Config(); cfg->devpath = "/dev/nbd" + stringify(m_index++); + int pid; std::ifstream ifs; ifs.open(nbd_path + "/pid", std::ifstream::in); if (!ifs.is_open()) { continue; } - ifs >> *pid; - - int r = get_mapped_info(*pid, cfg); - if (r < 0) { - continue; - } + ifs >> pid; + + // If the rbd-nbd is re-attached the pid may store garbage + // here. We are sure this is the case when it is negative or + // zero. Then we just try to find the attached process scanning + // /proc fs. If it is positive we check the process with this + // pid first and if it is not rbd-nbd fallback to searching the + // attached process. + do { + if (pid <= 0) { + pid = find_attached(cfg->devpath); + if (pid <= 0) { + break; + } + } - return true; + if (get_mapped_info(pid, cfg) >= 0) { + return true; + } + pid = -1; + } while (true); } } private: int m_index = 0; + std::map m_mapped_info_cache; int get_mapped_info(int pid, Config *cfg) { + auto it = m_mapped_info_cache.find(pid); + if (it != m_mapped_info_cache.end()) { + if (it->second.devpath.empty()) { + return -EINVAL; + } + *cfg = it->second; + return 0; + } + + m_mapped_info_cache[pid] = {}; + int r; std::string path = "/proc/" + stringify(pid) + "/cmdline"; std::ifstream ifs; @@ -723,6 +816,10 @@ private: return -1; ifs >> cmdline; + if (cmdline.empty()) { + return -EINVAL; + } + for (unsigned i = 0; i < cmdline.size(); i++) { char *arg = &cmdline[i]; if (i == 0) { @@ -739,18 +836,47 @@ private: } std::ostringstream err_msg; - Command command; - r = parse_args(args, &err_msg, &command, cfg); + r = parse_args(args, &err_msg, cfg); if (r < 0) { return r; } - if (command != Connect) { + if (cfg->command != Map && cfg->command != Attach) { return -ENOENT; } + cfg->pid = pid; + + m_mapped_info_cache.erase(pid); + if (!cfg->devpath.empty()) { + m_mapped_info_cache[pid] = *cfg; + } + return 0; } + + int find_attached(const std::string &devpath) { + for (auto &entry : fs::directory_iterator("/proc")) { + if (!fs::is_directory(entry.status())) { + continue; + } + + int pid; + try { + pid = boost::lexical_cast(entry.path().filename().c_str()); + } catch (boost::bad_lexical_cast&) { + continue; + } + + Config cfg; + if (get_mapped_info(pid, &cfg) >=0 && cfg.devpath == devpath && + cfg.command == Attach) { + return cfg.pid; + } + } + + return -1; + } }; static int load_module(Config *cfg) @@ -894,11 +1020,13 @@ static int try_ioctl_setup(Config *cfg, int fd, uint64_t size, uint64_t flags) r = ioctl(nbd, NBD_SET_BLKSIZE, RBD_NBD_BLKSIZE); if (r < 0) { r = -errno; + cerr << "rbd-nbd: NBD_SET_BLKSIZE failed" << std::endl; goto close_nbd; } r = ioctl(nbd, NBD_SET_SIZE, size); if (r < 0) { + cerr << "rbd-nbd: NBD_SET_SIZE failed" << std::endl; r = -errno; goto close_nbd; } @@ -1100,14 +1228,21 @@ static int netlink_connect_cb(struct nl_msg *msg, void *arg) } static int netlink_connect(Config *cfg, struct nl_sock *sock, int nl_id, int fd, - uint64_t size, uint64_t flags) + uint64_t size, uint64_t flags, bool reconnect) { struct nlattr *sock_attr; struct nlattr *sock_opt; struct nl_msg *msg; int ret; - nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, netlink_connect_cb, cfg); + if (reconnect) { + dout(10) << "netlink try reconnect for " << cfg->devpath << dendl; + + nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, genl_handle_msg, NULL); + } else { + nl_socket_modify_cb(sock, NL_CB_VALID, NL_CB_CUSTOM, netlink_connect_cb, + cfg); + } msg = nlmsg_alloc(); if (!msg) { @@ -1115,8 +1250,8 @@ static int netlink_connect(Config *cfg, struct nl_sock *sock, int nl_id, int fd, return -ENOMEM; } - if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0, NBD_CMD_CONNECT, - 0)) { + if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, nl_id, 0, 0, + reconnect ? NBD_CMD_RECONFIGURE : NBD_CMD_CONNECT, 0)) { cerr << "rbd-nbd: Could not setup message." << std::endl; goto free_msg; } @@ -1127,6 +1262,9 @@ static int netlink_connect(Config *cfg, struct nl_sock *sock, int nl_id, int fd, goto free_msg; NLA_PUT_U32(msg, NBD_ATTR_INDEX, ret); + if (reconnect) { + nbd_index = ret; + } } if (cfg->timeout >= 0) @@ -1135,6 +1273,8 @@ static int netlink_connect(Config *cfg, struct nl_sock *sock, int nl_id, int fd, NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size); NLA_PUT_U64(msg, NBD_ATTR_BLOCK_SIZE_BYTES, RBD_NBD_BLKSIZE); NLA_PUT_U64(msg, NBD_ATTR_SERVER_FLAGS, flags); + NLA_PUT_U64(msg, NBD_ATTR_DEAD_CONN_TIMEOUT, + cfg->timeout >= 0 ? cfg->timeout : 30); sock_attr = nla_nest_start(msg, NBD_ATTR_SOCKETS); if (!sock_attr) { @@ -1168,7 +1308,8 @@ free_msg: return -EIO; } -static int try_netlink_setup(Config *cfg, int fd, uint64_t size, uint64_t flags) +static int try_netlink_setup(Config *cfg, int fd, uint64_t size, uint64_t flags, + bool reconnect) { struct nl_sock *sock; int nl_id, ret; @@ -1182,7 +1323,7 @@ static int try_netlink_setup(Config *cfg, int fd, uint64_t size, uint64_t flags) dout(10) << "netlink interface supported." << dendl; - ret = netlink_connect(cfg, sock, nl_id, fd, size, flags); + ret = netlink_connect(cfg, sock, nl_id, fd, size, flags, reconnect); netlink_cleanup(sock); if (ret != 0) @@ -1229,26 +1370,11 @@ static int run_quiesce_hook(const std::string &quiesce_hook, static void handle_signal(int signum) { - int ret; - ceph_assert(signum == SIGINT || signum == SIGTERM); derr << "*** Got signal " << sig_str(signum) << " ***" << dendl; - if (nbd < 0 || nbd_index < 0) { - dout(20) << __func__ << ": " << "disconnect not needed." << dendl; - return; - } - - dout(20) << __func__ << ": " << "sending NBD_DISCONNECT" << dendl; - ret = netlink_disconnect(nbd_index); - if (ret == 1) - ret = ioctl(nbd, NBD_DISCONNECT); - - if (ret != 0) { - derr << "rbd-nbd: disconnect failed. Error: " << ret << dendl; - } else { - dout(20) << __func__ << ": " << "disconnected" << dendl; - } + dout(20) << __func__ << ": " << "setting terminate flag" << dendl; + nbd_terminate = true; } static NBDServer *start_server(int fd, librbd::Image& image, Config *cfg) @@ -1284,7 +1410,7 @@ static void run_server(Preforker& forker, NBDServer *server, bool netlink_used) shutdown_async_signal_handler(); } -static int do_map(int argc, const char *argv[], Config *cfg) +static int do_map(int argc, const char *argv[], Config *cfg, bool reconnect) { int r; @@ -1403,9 +1529,9 @@ static int do_map(int argc, const char *argv[], Config *cfg) server = start_server(fd[1], image, cfg); - use_netlink = cfg->try_netlink; + use_netlink = cfg->try_netlink || reconnect; if (use_netlink) { - r = try_netlink_setup(cfg, fd[0], size, flags); + r = try_netlink_setup(cfg, fd[0], size, flags, reconnect); if (r < 0) { goto free_server; } else if (r == 1) { @@ -1486,6 +1612,19 @@ close_ret: return r; } +static int do_detach(Config *cfg) +{ + int r = kill(cfg->pid, SIGTERM); + if (r != 0) { + r = -errno; + cerr << "rbd-nbd: failed to terminate " << cfg->pid << ": " + << cpp_strerror(r) << std::endl; + return r; + } + + return 0; +} + static int do_unmap(Config *cfg) { int r, nbd; @@ -1564,13 +1703,12 @@ static int do_list_mapped_devices(const std::string &format, bool pretty_format) tbl.define_column("device", TextTable::LEFT, TextTable::LEFT); } - int pid; Config cfg; NBDListIterator it; - while (it.get(&pid, &cfg)) { + while (it.get(&cfg)) { if (f) { f->open_object_section("device"); - f->dump_int("id", pid); + f->dump_int("id", cfg.pid); f->dump_string("pool", cfg.poolname); f->dump_string("namespace", cfg.nsname); f->dump_string("image", cfg.imgname); @@ -1582,8 +1720,8 @@ static int do_list_mapped_devices(const std::string &format, bool pretty_format) if (cfg.snapname.empty()) { cfg.snapname = "-"; } - tbl << pid << cfg.poolname << cfg.nsname << cfg.imgname << cfg.snapname - << cfg.devpath << TextTable::endrow; + tbl << cfg.pid << cfg.poolname << cfg.nsname << cfg.imgname + << cfg.snapname << cfg.devpath << TextTable::endrow; } } @@ -1597,12 +1735,12 @@ static int do_list_mapped_devices(const std::string &format, bool pretty_format) return 0; } -static bool find_mapped_dev_by_spec(Config *cfg) { - int pid; +static bool find_mapped_dev_by_spec(Config *cfg, int skip_pid=-1) { Config c; NBDListIterator it; - while (it.get(&pid, &c)) { - if (c.poolname == cfg->poolname && c.nsname == cfg->nsname && + while (it.get(&c)) { + if (c.pid != skip_pid && + c.poolname == cfg->poolname && c.nsname == cfg->nsname && c.imgname == cfg->imgname && c.snapname == cfg->snapname) { *cfg = c; return true; @@ -1613,7 +1751,7 @@ static bool find_mapped_dev_by_spec(Config *cfg) { static int parse_args(vector& args, std::ostream *err_msg, - Command *command, Config *cfg) { + Config *cfg) { std::string conf_file_list; std::string cluster; CephInitParameters iparams = ceph_argparse_early_args( @@ -1692,9 +1830,13 @@ static int parse_args(vector& args, std::ostream *err_msg, Command cmd = None; if (args.begin() != args.end()) { if (strcmp(*args.begin(), "map") == 0) { - cmd = Connect; + cmd = Map; } else if (strcmp(*args.begin(), "unmap") == 0) { - cmd = Disconnect; + cmd = Unmap; + } else if (strcmp(*args.begin(), "attach") == 0) { + cmd = Attach; + } else if (strcmp(*args.begin(), "detach") == 0) { + cmd = Detach; } else if (strcmp(*args.begin(), "list-mapped") == 0) { cmd = List; } else { @@ -1710,7 +1852,13 @@ static int parse_args(vector& args, std::ostream *err_msg, } switch (cmd) { - case Connect: + case Attach: + if (cfg->devpath.empty()) { + *err_msg << "rbd-nbd: must specify device to attach"; + return -EINVAL; + } + [[fallthrough]]; + case Map: if (args.begin() == args.end()) { *err_msg << "rbd-nbd: must specify image-or-snap-spec"; return -EINVAL; @@ -1720,7 +1868,8 @@ static int parse_args(vector& args, std::ostream *err_msg, } args.erase(args.begin()); break; - case Disconnect: + case Detach: + case Unmap: if (args.begin() == args.end()) { *err_msg << "rbd-nbd: must specify nbd device or image-or-snap-spec"; return -EINVAL; @@ -1731,10 +1880,6 @@ static int parse_args(vector& args, std::ostream *err_msg, if (parse_imgpath(*args.begin(), cfg, err_msg) < 0) { return -EINVAL; } - if (!find_mapped_dev_by_spec(cfg)) { - *err_msg << "rbd-nbd: " << *args.begin() << " is not mapped"; - return -ENOENT; - } } args.erase(args.begin()); break; @@ -1748,7 +1893,7 @@ static int parse_args(vector& args, std::ostream *err_msg, return -EINVAL; } - *command = cmd; + cfg->command = cmd; return 0; } @@ -1760,7 +1905,7 @@ static int rbd_nbd(int argc, const char *argv[]) argv_to_vec(argc, argv, args); std::ostringstream err_msg; - r = parse_args(args, &err_msg, &cmd, &cfg); + r = parse_args(args, &err_msg, &cfg); if (r == HELP_INFO) { usage(); return 0; @@ -1772,18 +1917,41 @@ static int rbd_nbd(int argc, const char *argv[]) return r; } - switch (cmd) { - case Connect: + switch (cfg.command) { + case Attach: + ceph_assert(!cfg.devpath.empty()); + if (find_mapped_dev_by_spec(&cfg, getpid())) { + cerr << "rbd-nbd: " << cfg.devpath << " has process " << cfg.pid + << " connected" << std::endl; + return -EBUSY; + } + [[fallthrough]]; + case Map: if (cfg.imgname.empty()) { cerr << "rbd-nbd: image name was not specified" << std::endl; return -EINVAL; } - r = do_map(argc, argv, &cfg); + r = do_map(argc, argv, &cfg, cfg.command == Attach); if (r < 0) return -EINVAL; break; - case Disconnect: + case Detach: + if (cfg.devpath.empty() && !find_mapped_dev_by_spec(&cfg)) { + cerr << "rbd-nbd: " << cfg.image_spec() << " is not mapped" + << std::endl; + return -ENOENT; + } + r = do_detach(&cfg); + if (r < 0) + return -EINVAL; + break; + case Unmap: + if (cfg.devpath.empty() && !find_mapped_dev_by_spec(&cfg)) { + cerr << "rbd-nbd: " << cfg.image_spec() << " is not mapped" + << std::endl; + return -ENOENT; + } r = do_unmap(&cfg); if (r < 0) return -EINVAL;