From a6735ab009a0348ba3257885d7d88236fd880e2c Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Thu, 19 Jul 2012 17:43:17 -0700 Subject: [PATCH] OpRequest,OSD: track recent slow ops This should be helpful while investigating slow performance. OpRequests now track events with timestamp in addition to dumping them to the log. OpHistory keeps up to a configurable number of the slowest ops over a configurable recent time interval. The admin socket interface for the OSD now has a dump_historic_ops command which dumps the stored slow ops. Reviewed-by: Greg Farnum Reviewed-by: Sage Weil Signed-off-by: Samuel Just --- src/common/config_opts.h | 2 + src/include/utime.h | 2 +- src/osd/OSD.cc | 19 ++++++ src/osd/OSD.h | 6 ++ src/osd/OpRequest.cc | 122 ++++++++++++++++++++++++++++++++------- src/osd/OpRequest.h | 31 +++++++++- 6 files changed, 159 insertions(+), 23 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index c93d21beb2e65..43e726decb5f8 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -340,6 +340,8 @@ OPTION(osd_debug_drop_ping_probability, OPT_DOUBLE, 0) OPTION(osd_debug_drop_ping_duration, OPT_INT, 0) OPTION(osd_debug_drop_pg_create_probability, OPT_DOUBLE, 0) OPTION(osd_debug_drop_pg_create_duration, OPT_INT, 1) +OPTION(osd_op_history_size, OPT_U32, 20) // Max number of completed ops to track +OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track OPTION(filestore, OPT_BOOL, false) OPTION(filestore_debug_omap_check, OPT_BOOL, 0) // Expensive debugging check on sync // Use omap for xattrs for attrs over diff --git a/src/include/utime.h b/src/include/utime.h index f88c9290a6511..2ef92a0eb2ed4 100644 --- a/src/include/utime.h +++ b/src/include/utime.h @@ -119,7 +119,7 @@ public: } // cast to double - operator double() { + operator double() const { return (double)sec() + ((double)nsec() / 1000000000.0L); } operator ceph_timespec() { diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 2a7d27a202704..4753b762ab378 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -692,6 +692,7 @@ OSD::OSD(int id, Messenger *internal_messenger, Messenger *external_messenger, stat_lock("OSD::stat_lock"), finished_lock("OSD::finished_lock"), admin_ops_hook(NULL), + historic_ops_hook(NULL), op_queue_len(0), op_wq(this, g_conf->osd_op_thread_timeout, &op_tp), peering_wq(this, g_conf->osd_op_thread_timeout, &op_tp, 200), @@ -758,6 +759,19 @@ int OSD::pre_init() return 0; } +class HistoricOpsSocketHook : public AdminSocketHook { + OSD *osd; +public: + HistoricOpsSocketHook(OSD *o) : osd(o) {} + bool call(std::string command, std::string args, bufferlist& out) { + stringstream ss; + osd->dump_historic_ops(ss); + out.append(ss); + return true; + } +}; + + class OpsFlightSocketHook : public AdminSocketHook { OSD *osd; public: @@ -892,6 +906,9 @@ int OSD::init() AdminSocket *admin_socket = cct->get_admin_socket(); r = admin_socket->register_command("dump_ops_in_flight", admin_ops_hook, "show the ops currently in flight"); + historic_ops_hook = new HistoricOpsSocketHook(this); + r = admin_socket->register_command("dump_historic_ops", historic_ops_hook, + "show slowest recent ops"); assert(r == 0); return 0; @@ -1015,7 +1032,9 @@ int OSD::shutdown() cct->get_admin_socket()->unregister_command("dump_ops_in_flight"); delete admin_ops_hook; + delete historic_ops_hook; admin_ops_hook = NULL; + historic_ops_hook = NULL; recovery_tp.stop(); dout(10) << "recovery tp stopped" << dendl; diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 13a61b22092a9..43643f7d62920 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -128,6 +128,7 @@ class ReplicatedPG; class AuthAuthorizeHandlerRegistry; class OpsFlightSocketHook; +class HistoricOpsSocketHook; extern const coll_t meta_coll; @@ -525,8 +526,13 @@ private: OpTracker op_tracker; void check_ops_in_flight(); void dump_ops_in_flight(ostream& ss); + void dump_historic_ops(ostream& ss) { + return op_tracker.dump_historic_ops(ss); + } friend class OpsFlightSocketHook; + friend class HistoricOpsSocketHook; OpsFlightSocketHook *admin_ops_hook; + HistoricOpsSocketHook *historic_ops_hook; // -- op queue -- list op_queue; diff --git a/src/osd/OpRequest.cc b/src/osd/OpRequest.cc index cb030594cf511..fe2d604832e37 100644 --- a/src/osd/OpRequest.cc +++ b/src/osd/OpRequest.cc @@ -20,6 +20,62 @@ static ostream& _prefix(std::ostream* _dout) return *_dout << "--OSD::tracker-- "; } +void OpHistory::insert(utime_t now, OpRequest *op) { + duration.insert(make_pair(op->get_duration(), op)); + arrived.insert(make_pair(op->get_arrived(), op)); + cleanup(now); +} + +void OpHistory::cleanup(utime_t now) { + while (arrived.size() && + now - arrived.begin()->first > + (double)(g_conf->osd_op_history_duration)) { + delete arrived.begin()->second; + duration.erase(make_pair( + arrived.begin()->second->get_duration(), + arrived.begin()->second)); + arrived.erase(arrived.begin()); + } + + while (duration.size() > g_conf->osd_op_history_size) { + delete duration.begin()->second; + arrived.erase(make_pair( + duration.begin()->second->get_arrived(), + duration.begin()->second)); + duration.erase(duration.begin()); + } +} + +void OpHistory::dump_ops(utime_t now, Formatter *f) +{ + cleanup(now); + f->open_object_section("OpHistory"); + f->dump_int("num to keep", g_conf->osd_op_history_size); + f->dump_int("duration to keep", g_conf->osd_op_history_duration); + { + f->open_array_section("Ops"); + for (set >::const_iterator i = + arrived.begin(); + i != arrived.end(); + ++i) { + f->open_object_section("Op"); + i->second->dump(now, f); + f->close_section(); + } + f->close_section(); + } + f->close_section(); +} + +void OpTracker::dump_historic_ops(ostream &ss) +{ + JSONFormatter jf(true); + Mutex::Locker locker(ops_in_flight_lock); + utime_t now = ceph_clock_now(g_ceph_context); + history.dump_ops(now, &jf); + jf.flush(ss); +} + void OpTracker::dump_ops_in_flight(ostream &ss) { JSONFormatter jf(true); @@ -29,22 +85,8 @@ void OpTracker::dump_ops_in_flight(ostream &ss) jf.open_array_section("ops"); // list of OpRequests utime_t now = ceph_clock_now(g_ceph_context); for (xlist::iterator p = ops_in_flight.begin(); !p.end(); ++p) { - stringstream name; - Message *m = (*p)->request; - m->print(name); jf.open_object_section("op"); - jf.dump_string("description", name.str().c_str()); // this OpRequest - jf.dump_stream("received_at") << (*p)->received_time; - jf.dump_float("age", now - (*p)->received_time); - jf.dump_string("flag_point", (*p)->state_string()); - if (m->get_orig_source().is_client()) { - jf.open_object_section("client_info"); - stringstream client_name; - client_name << m->get_orig_source(); - jf.dump_string("client", client_name.str()); - jf.dump_int("tid", m->get_tid()); - jf.close_section(); // client_info - } + (*p)->dump(now, &jf); jf.close_section(); // this OpRequest } jf.close_section(); // list of OpRequests @@ -59,11 +101,14 @@ void OpTracker::register_inflight_op(xlist::item *i) ops_in_flight.back()->seq = seq++; } -void OpTracker::unregister_inflight_op(xlist::item *i) +void OpTracker::unregister_inflight_op(OpRequest *i) { Mutex::Locker locker(ops_in_flight_lock); - assert(i->get_list() == &ops_in_flight); - i->remove_myself(); + assert(i->xitem.get_list() == &ops_in_flight); + utime_t now = ceph_clock_now(g_ceph_context); + i->xitem.remove_myself(); + i->request->clear_data(); + history.insert(now, i); } bool OpTracker::check_ops_in_flight(std::vector &warning_vector) @@ -128,6 +173,38 @@ bool OpTracker::check_ops_in_flight(std::vector &warning_vector) return warning_vector.size(); } +void OpRequest::dump(utime_t now, Formatter *f) const +{ + Message *m = request; + stringstream name; + m->print(name); + f->dump_string("description", name.str().c_str()); // this OpRequest + f->dump_stream("received_at") << received_time; + f->dump_float("age", now - received_time); + f->dump_float("duration", get_duration()); + f->dump_string("flag_point", state_string()); + if (m->get_orig_source().is_client()) { + f->open_object_section("client_info"); + stringstream client_name; + client_name << m->get_orig_source(); + f->dump_string("client", client_name.str()); + f->dump_int("tid", m->get_tid()); + f->close_section(); // client_info + } + { + f->open_array_section("events"); + for (list >::const_iterator i = events.begin(); + i != events.end(); + ++i) { + f->open_object_section("event"); + f->dump_stream("time") << i->first; + f->dump_string("event", i->second); + f->close_section(); + } + f->close_section(); + } +} + void OpTracker::mark_event(OpRequest *op, const string &dest) { utime_t now = ceph_clock_now(g_ceph_context); @@ -145,8 +222,8 @@ void OpTracker::_mark_event(OpRequest *op, const string &evt, void OpTracker::RemoveOnDelete::operator()(OpRequest *op) { op->mark_event("done"); - tracker->unregister_inflight_op(&(op->xitem)); - delete op; + tracker->unregister_inflight_op(op); + // Do not delete op, unregister_inflight_op took control } OpRequestRef OpTracker::create_request(Message *ref) @@ -168,5 +245,10 @@ OpRequestRef OpTracker::create_request(Message *ref) void OpRequest::mark_event(const string &event) { + utime_t now = ceph_clock_now(g_ceph_context); + { + Mutex::Locker l(lock); + events.push_back(make_pair(now, event)); + } tracker->mark_event(this, event); } diff --git a/src/osd/OpRequest.h b/src/osd/OpRequest.h index 011bbb272bbc7..5e0367c967267 100644 --- a/src/osd/OpRequest.h +++ b/src/osd/OpRequest.h @@ -25,6 +25,17 @@ #include "common/TrackedOp.h" #include "osd/osd_types.h" +class OpRequest; +class OpHistory { + set > arrived; + set > duration; + void cleanup(utime_t now); + +public: + void insert(utime_t now, OpRequest *op); + void dump_ops(utime_t now, Formatter *f); +}; + class OpRequest; typedef std::tr1::shared_ptr OpRequestRef; class OpTracker { @@ -38,12 +49,15 @@ class OpTracker { uint64_t seq; Mutex ops_in_flight_lock; xlist ops_in_flight; + OpHistory history; public: OpTracker() : seq(0), ops_in_flight_lock("OpTracker mutex") {} void dump_ops_in_flight(std::ostream& ss); + void dump_historic_ops(std::ostream& ss); void register_inflight_op(xlist::item *i); - void unregister_inflight_op(xlist::item *i); + void unregister_inflight_op(OpRequest *i); + /** * Look for Ops which are too old, and insert warning * strings for each Op that is too old. @@ -67,11 +81,23 @@ public: */ struct OpRequest : public TrackedOp { friend class OpTracker; + friend class OpHistory; Message *request; xlist::item xitem; utime_t received_time; uint8_t warn_interval_multiplier; + utime_t get_arrived() const { + return received_time; + } + double get_duration() const { + return events.size() ? + (events.rbegin()->first - received_time) : + 0.0; + } + void dump(utime_t now, Formatter *f) const; private: + list > events; + Mutex lock; OpTracker *tracker; osd_reqid_t reqid; uint8_t hit_flag_points; @@ -86,6 +112,7 @@ private: OpRequest(Message *req, OpTracker *tracker) : request(req), xitem(this), warn_interval_multiplier(1), + lock("OpRequest::lock"), tracker(tracker), seq(0) { received_time = request->get_recv_stamp(); @@ -108,7 +135,7 @@ public: bool currently_started() { return latest_flag_point & flag_started; } bool currently_sub_op_sent() { return latest_flag_point & flag_sub_op_sent; } - const char *state_string() { + const char *state_string() const { switch(latest_flag_point) { case flag_queued_for_pg: return "queued for pg"; case flag_reached_pg: return "reached pg"; -- 2.47.3