From: Dan Mick Date: Fri, 7 Jun 2013 02:12:32 +0000 (-0700) Subject: Merge branch 'wip-ceph-cli' into master X-Git-Tag: v0.65~136^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3ac6ffe802eeeae6614c6989b643eff2a3a11f2e;p=ceph.git Merge branch 'wip-ceph-cli' into master Conflicts: src/include/rados/librados.h src/librados/librados.cc src/osdc/Objecter.cc src/pybind/rados.py Required modifications to: src/osd/OSD.cc Signed-off-by: Dan Mick --- 3ac6ffe802eeeae6614c6989b643eff2a3a11f2e diff --cc debian/ceph-test.install index fe5af5759fb9,45dca23e681d..a5712603bf0e --- a/debian/ceph-test.install +++ b/debian/ceph-test.install @@@ -43,9 -43,9 +43,10 @@@ usr/bin/ceph_test_objectcacher_stres usr/bin/ceph_test_rados usr/bin/ceph_test_rados_api_aio usr/bin/ceph_test_rados_api_cls + usr/bin/ceph_test_rados_api_cmd usr/bin/ceph_test_rados_api_io usr/bin/ceph_test_rados_api_list +usr/bin/ceph_test_rados_api_lock usr/bin/ceph_test_rados_api_misc usr/bin/ceph_test_rados_api_pool usr/bin/ceph_test_rados_api_snapshots diff --cc src/include/rados/librados.h index cf38e011e17a,739c29eeaf49..98f4900927c3 --- a/src/include/rados/librados.h +++ b/src/include/rados/librados.h @@@ -1576,101 -1596,88 +1601,183 @@@ int rados_notify(rados_ioctx_t io, cons /** @} Watch/Notify */ +/** + * Take an exclusive lock on an object. + * + * @param io the context to operate in + * @param oid the name of the object + * @param name the name of the lock + * @param cookie user-defined identifier for this instance of the lock + * @param desc user-defined lock description + * @param duration the duration of the lock. Set to NULL for infinite duration. + * @param flags lock flags + * @returns 0 on success, negative error code on failure + * @returns -EBUSY if the lock is already held by another (client, cookie) pair + * @returns -EEXIST if the lock is already held by the same (client, cookie) pair + */ +int rados_lock_exclusive(rados_ioctx_t io, const char * o, const char * name, + const char * cookie, const char * desc, struct timeval * duration, + uint8_t flags); + +/** + * Take a shared lock on an object. + * + * @param io the context to operate in + * @param o the name of the object + * @param name the name of the lock + * @param cookie user-defined identifier for this instance of the lock + * @param tag The tag of the lock + * @param desc user-defined lock description + * @param duration the duration of the lock. Set to NULL for infinite duration. + * @param flags lock flags + * @returns 0 on success, negative error code on failure + * @returns -EBUSY if the lock is already held by another (client, cookie) pair + * @returns -EEXIST if the lock is already held by the same (client, cookie) pair + */ +int rados_lock_shared(rados_ioctx_t io, const char * o, const char * name, + const char * cookie, const char * tag, const char * desc, + struct timeval * duration, uint8_t flags); + +/** + * Release a shared or exclusive lock on an object. + * + * @param io the context to operate in + * @param o the name of the object + * @param name the name of the lock + * @param cookie user-defined identifier for the instance of the lock + * @returns 0 on success, negative error code on failure + * @returns -ENOENT if the lock is not held by the specified (client, cookie) pair + */ +int rados_unlock(rados_ioctx_t io, const char *o, const char *name, + const char *cookie); + +/** + * List clients that have locked the named object lock and information about + * the lock. + * + * The number of bytes required in each buffer is put in the + * corresponding size out parameter. If any of the provided buffers + * are too short, -ERANGE is returned after these sizes are filled in. + * + * @param io the context to operate in + * @param o the name of the object + * @param name the name of the lock + * @param exclusive where to store whether the lock is exclusive (1) or shared (0) + * @param tag where to store the tag associated with the object lock + * @param tag_len number of bytes in tag buffer + * @param clients buffer in which locker clients are stored, separated by '\0' + * @param clients_len number of bytes in the clients buffer + * @param cookies buffer in which locker cookies are stored, separated by '\0' + * @param cookies_len number of bytes in the cookies buffer + * @param addrs buffer in which locker addresses are stored, separated by '\0' + * @param addrs_len number of bytes in the clients buffer + * @returns number of lockers on success, negative error code on failure + * @returns -ERANGE if any of the buffers are too short + */ +ssize_t rados_list_lockers(rados_ioctx_t io, const char *o, + const char *name, int *exclusive, + char *tag, size_t *tag_len, + char *clients, size_t *clients_len, + char *cookies, size_t *cookies_len, + char *addrs, size_t *addrs_len); + +/** + * Releases a shared or exclusive lock on an object, which was taken by the + * specified client. + * + * @param io the context to operate in + * @param o the name of the object + * @param name the name of the lock + * @param client the client currently holding the lock + * @param cookie user-defined identifier for the instance of the lock + * @returns 0 on success, negative error code on failure + * @returns -ENOENT if the lock is not held by the specified (client, cookie) pair + * @returns -EINVAL if the client cannot be parsed + */ +int rados_break_lock(rados_ioctx_t io, const char *o, const char *name, + const char *client, const char *cookie); + /** + * @defgroup librados_h_commands Mon/OSD/PG Commands + * + * These interfaces send commands relating to the monitor, OSD, or PGs. + * + * @{ + */ + + /** + * Send monitor command. + * + * @note Takes command string in carefully-formatted JSON; must match + * defined commands, types, etc. + * + * The result buffers are allocated on the heapt; the caller is + * expected to release that memory with rados_buffer_free(). The + * buffer and length pointers can all be NULL, in which case they are + * not filled in. + * + * @param cluster cluster handle + * @param cmd an array of char *'s representing the command + * @param cmdlen count of valid entries in cmd + * @param inbuf any bulk input data (crush map, etc.) + * @param outbuf double pointer to output buffer + * @param outbuflen pointer to output buffer length + * @param outs double pointer to status string + * @param outslen pointer to status string length + * @returns 0 on success, negative error code on failure + */ + int rados_mon_command(rados_t cluster, const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen); + + /** + * free a rados-allocated buffer + * + * Release memory allocated by librados calls like rados_mon_command(). + * + * @param buf buffer pointer + */ + void rados_buffer_free(char *buf); + + int rados_osd_command(rados_t cluster, int osdid, const char **cmd, + size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen); + + int rados_pg_command(rados_t cluster, const char *pgstr, const char **cmd, + size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen); + + /** + * monitor cluster log + * + * Monitor events logged to the cluster log. The callback get each + * log entry both as a single formatted line and with each field in a + * separate arg. + * + * Calling with a cb argument of NULL will deregister any previously + * registered callback. + * + * @param cluster cluster handle + * @param level minimum log level (debug, info, warn|warning, err|error) + * @param cb callback to run for each log message + * @param arg void argument to pass to cb + * @returns 0 on success, negative code on error + */ + typedef void (*rados_log_callback_t)(void *arg, + const char *line, + const char *who, + uint64_t sec, uint64_t nsec, + uint64_t seq, const char *level, + const char *msg); + + int rados_monitor_log(rados_t cluster, const char *level, rados_log_callback_t cb, void *arg); + + /** @} Mon/OSD/PG commands */ + #ifdef __cplusplus } #endif diff --cc src/librados/RadosClient.h index f3d1d0f81b5a,812c0db1a01e..9374553b0ad2 --- a/src/librados/RadosClient.h +++ b/src/librados/RadosClient.h @@@ -100,9 -106,18 +106,19 @@@ public void register_watcher(librados::WatchContext *wc, uint64_t *cookie); void unregister_watcher(uint64_t cookie); void watch_notify(MWatchNotify *m); + int mon_command(const vector& cmd, bufferlist &inbl, + bufferlist *outbl, string *outs); + int osd_command(int osd, vector& cmd, bufferlist& inbl, + bufferlist *poutbl, string *prs); + int pg_command(pg_t pgid, vector& cmd, bufferlist& inbl, + bufferlist *poutbl, string *prs); + + void handle_log(MLog *m); + int monitor_log(const string& level, rados_log_callback_t cb, void *arg); + void get(); bool put(); + void blacklist_self(bool set); }; #endif diff --cc src/osd/OSD.cc index 569e89ef63c1,205d5a5f64f1..192b8f51adf5 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@@ -1208,45 -1151,83 +1211,93 @@@ int OSD::init( AdminSocket *admin_socket = cct->get_admin_socket(); asok_hook = new OSDSocketHook(this); - r = admin_socket->register_command("dump_ops_in_flight", asok_hook, + r = admin_socket->register_command("dump_ops_in_flight", + "dump_ops_in_flight", asok_hook, "show the ops currently in flight"); assert(r == 0); - r = admin_socket->register_command("dump_historic_ops", asok_hook, + r = admin_socket->register_command("dump_historic_ops", "dump_historic_ops", + asok_hook, "show slowest recent ops"); assert(r == 0); - r = admin_socket->register_command("dump_op_pq_state", asok_hook, + r = admin_socket->register_command("dump_op_pq_state", "dump_op_pq_state", + asok_hook, "dump op priority queue state"); assert(r == 0); - r = admin_socket->register_command("dump_blacklist", asok_hook, - "dump_blacklist"); ++ r = admin_socket->register_command("dump_blacklist", "dump_blacklist", ++ asok_hook, ++ "dump blacklisted clients and times"); + assert(r == 0); - r = admin_socket->register_command("dump_watchers", asok_hook, - "dump_watchers"); ++ r = admin_socket->register_command("dump_watchers", "dump_watchers", ++ asok_hook, ++ "show clients which have active watches," ++ " and on which objects"); + assert(r == 0); + test_ops_hook = new TestOpsSocketHook(&(this->service), this->store); - r = admin_socket->register_command("setomapval", test_ops_hook, - "setomapval "); + r = admin_socket->register_command( + "setomapval", + "setomapval " \ + "name=pool,type=CephPoolname " \ + "name=objname,type=CephObjectname " \ + "name=key,type=CephString "\ + "name=val,type=CephString", + test_ops_hook, + "set omap key"); assert(r == 0); - r = admin_socket->register_command("rmomapkey", test_ops_hook, - "rmomapkey "); + r = admin_socket->register_command( + "rmomapkey", + "rmomapkey " \ + "name=pool,type=CephPoolname " \ + "name=objname,type=CephObjectname " \ + "name=key,type=CephString", + test_ops_hook, + "remove omap key"); assert(r == 0); - r = admin_socket->register_command("setomapheader", test_ops_hook, - "setomapheader
"); + r = admin_socket->register_command( + "setomapheader", + "setomapheader " \ + "name=pool,type=CephPoolname " \ + "name=objname,type=CephObjectname " \ + "name=header,type=CephString", + test_ops_hook, + "set omap header"); assert(r == 0); - r = admin_socket->register_command("getomap", test_ops_hook, - "getomap "); + + r = admin_socket->register_command( + "getomap", + "getomap " \ + "name=pool,type=CephPoolname " \ + "name=objname,type=CephObjectname", + test_ops_hook, + "output entire object map"); assert(r == 0); - r = admin_socket->register_command("truncobj", test_ops_hook, - "truncobj "); + + r = admin_socket->register_command( + "truncobj", + "truncobj " \ + "name=pool,type=CephPoolname " \ + "name=objname,type=CephObjectname " \ + "name=len,type=CephInt", + test_ops_hook, + "truncate object to length"); assert(r == 0); - r = admin_socket->register_command("injectdataerr", test_ops_hook, - "injectdataerr "); + r = admin_socket->register_command( + "injectdataerr", + "injectdataerr " \ + "name=pool,type=CephPoolname " \ + "name=objname,type=CephObjectname", + test_ops_hook, + "inject data error into omap"); assert(r == 0); - r = admin_socket->register_command("injectmdataerr", test_ops_hook, - "injectmdataerr "); + r = admin_socket->register_command( + "injectmdataerr", + "injectmdataerr " \ + "name=pool,type=CephPoolname " \ + "name=objname,type=CephObjectname", + test_ops_hook, + "inject metadata error"); assert(r == 0); service.init(); diff --cc src/osdc/Objecter.cc index e5f4a3cc9450,53fd4414c5b2..1bd5d90c8f2e --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@@ -35,8 -35,10 +35,11 @@@ #include "messages/MStatfsReply.h" #include "messages/MOSDFailure.h" +#include "messages/MMonCommand.h" + #include "messages/MCommand.h" + #include "messages/MCommandReply.h" + #include #include "common/config.h" @@@ -2180,23 -2291,104 +2292,124 @@@ bool Objecter::RequestStateHook::call(s return true; } +void Objecter::blacklist_self(bool set) +{ + ldout(cct, 10) << "blacklist_self " << (set ? "add" : "rm") << dendl; + + vector cmd; - cmd.push_back("osd"); - cmd.push_back("blacklist"); ++ cmd.push_back("{\"prefix\":\"osd blacklist\", "); + if (set) - cmd.push_back("add"); ++ cmd.push_back("\"blacklistop\":\"add\","); + else - cmd.push_back("rm"); ++ cmd.push_back("\"blacklistop\":\"rm\","); + stringstream ss; + ss << messenger->get_myaddr(); - cmd.push_back(ss.str()); ++ cmd.push_back("\"addr\":\"" + ss.str() + "\""); + + MMonCommand *m = new MMonCommand(monc->get_fsid(), last_seen_osdmap_version); + m->cmd = cmd; + + monc->send_mon_message(m); +} ++ + // commands + + void Objecter::handle_command_reply(MCommandReply *m) + { + map::iterator p = command_ops.find(m->get_tid()); + if (p == command_ops.end()) { + ldout(cct, 10) << "handle_command_reply tid " << m->get_tid() << " not found" << dendl; + m->put(); + return; + } + + CommandOp *c = p->second; + if (!c->session || + m->get_connection() != c->session->con) { + ldout(cct, 10) << "handle_command_reply tid " << m->get_tid() << " got reply from wrong connection " + << m->get_connection() << " " << m->get_source_inst() << dendl; + m->put(); + return; + } + if (c->poutbl) + c->poutbl->claim(m->get_data()); + _finish_command(c, m->r, m->rs); + m->put(); + } + + int Objecter::_submit_command(CommandOp *c, tid_t *ptid) + { + tid_t tid = ++last_tid; + ldout(cct, 10) << "_submit_command " << tid << " " << c->cmd << dendl; + c->tid = tid; + command_ops[tid] = c; + num_homeless_ops++; + int r = recalc_command_target(c); + if (r == RECALC_OP_TARGET_OSD_DNE) { + // XXX take back tid incr? + return -ENXIO; + } + if (c->session) + _send_command(c); + else + maybe_request_map(); + if (r == RECALC_OP_TARGET_POOL_DNE) + _send_command_map_check(c); + *ptid = tid; + return 0; + } + + int Objecter::recalc_command_target(CommandOp *c) + { + OSDSession *s = NULL; + if (c->target_osd >= 0) { + if (!osdmap->exists(c->target_osd)) + return RECALC_OP_TARGET_OSD_DNE; + s = get_session(c->target_osd); + } else { + if (!osdmap->have_pg_pool(c->target_pg.pool())) + return RECALC_OP_TARGET_POOL_DNE; + vector acting; + osdmap->pg_to_acting_osds(c->target_pg, acting); + if (!acting.empty()) + s = get_session(acting[0]); + } + if (c->session != s) { + ldout(cct, 10) << "recalc_command_target " << c->tid << " now " << c->session << dendl; + if (s) { + if (!c->session) + num_homeless_ops--; + c->session = s; + s->command_ops.push_back(&c->session_item); + } else { + num_homeless_ops++; + } + return RECALC_OP_TARGET_NEED_RESEND; + } + ldout(cct, 20) << "recalc_command_target " << c->tid << " no change, " << c->session << dendl; + return RECALC_OP_TARGET_NO_ACTION; + } + + void Objecter::_send_command(CommandOp *c) + { + ldout(cct, 10) << "_send_command " << c->tid << dendl; + assert(c->session); + assert(c->session->con); + MCommand *m = new MCommand(monc->monmap.fsid); + m->cmd = c->cmd; + m->set_data(c->inbl); + m->set_tid(c->tid); + messenger->send_message(m, c->session->con); + } + + void Objecter::_finish_command(CommandOp *c, int r, string rs) + { + ldout(cct, 10) << "_finish_command " << c->tid << " = " << r << " " << rs << dendl; + c->session_item.remove_myself(); + if (c->prs) + *c->prs = rs; + if (c->onfinish) + c->onfinish->complete(r); + command_ops.erase(c->tid); + c->put(); + } diff --cc src/pybind/rados.py index f18f179d030b,d9980c265042..4a5ed961603b --- a/src/pybind/rados.py +++ b/src/pybind/rados.py @@@ -217,8 -302,9 +302,9 @@@ Rados object in state %s." % (self.stat length = 20 while True: ret_buf = create_string_buffer(length) - ret = self.librados.rados_conf_get(self.cluster, c_char_p(option), - ret_buf, c_size_t(length)) + ret = run_in_thread(self.librados.rados_conf_get, - (self.cluster, option, ret_buf, ++ (self.cluster, c_char_p(option), ret_buf, + c_size_t(length))) if (ret == 0): return ret_buf.value elif (ret == -errno.ENAMETOOLONG): @@@ -394,11 -487,10 +487,10 @@@ :returns: str - cluster fsid """ self.require_state("connected") - fsid_len = 36 - fsid = create_string_buffer(fsid_len + 1) + buf_len = 37 + fsid = create_string_buffer(buf_len) - ret = self.librados.rados_cluster_fsid(self.cluster, - byref(fsid), - c_size_t(buf_len)) + ret = run_in_thread(self.librados.rados_cluster_fsid, - (self.cluster, byref(fsid), fsid_len + 1)) ++ (self.cluster, byref(fsid), c_size_t(buf_len))) if ret < 0: raise make_ex(ret, "error getting cluster fsid") return fsid.value @@@ -500,8 -690,8 +690,8 @@@ class SnapIterator(object) num_snaps = 10 while True: self.snaps = (ctypes.c_uint64 * num_snaps)() - ret = self.ioctx.librados.rados_ioctx_snap_list(self.ioctx.io, - self.snaps, c_int(num_snaps)) + ret = run_in_thread(self.ioctx.librados.rados_ioctx_snap_list, - (self.ioctx.io, self.snaps, num_snaps)) ++ (self.ioctx.io, self.snaps, c_int(num_snaps))) if (ret >= 0): self.max_snap = ret break @@@ -527,8 -717,8 +717,9 @@@ ioctx '%s'" % self.ioctx.name name_len = 10 while True: name = create_string_buffer(name_len) - ret = self.ioctx.librados.rados_ioctx_snap_get_name(self.ioctx.io, \ - c_uint64(snap_id), byref(name), c_int(name_len)) + ret = run_in_thread(self.ioctx.librados.rados_ioctx_snap_get_name, - (self.ioctx.io, snap_id, byref(name), name_len)) ++ (self.ioctx.io, c_uint64(snap_id), byref(name), ++ c_int(name_len))) if (ret == 0): name_len = ret break @@@ -1102,7 -1279,8 +1280,8 @@@ written." % (self.name, ret, length) self.require_ioctx_open() if not isinstance(key, str): raise TypeError('key must be a string') - ret = self.librados.rados_trunc(self.io, c_char_p(key), c_uint64(size)) + ret = run_in_thread(self.librados.rados_trunc, - (self.io, c_char_p(key), c_size_t(size))) ++ (self.io, c_char_p(key), c_uint64(size))) if ret < 0: raise make_ex(ret, "Ioctx.trunc(%s): failed to truncate %s" % (self.name, key)) return ret