From: John Spray Date: Wed, 8 Feb 2017 16:24:24 +0000 (+0000) Subject: mds: expose progress during PurgeQueue drain X-Git-Tag: v12.0.1~140^2~12 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0952ce99101aad644e8a485f597c6428bfa2d2c0;p=ceph.git mds: expose progress during PurgeQueue drain We don't track an item count, but we do have a number of bytes left in the Journaler, so can use that to give an indication of progress while the MDS rank shutdown is waiting for the PurgeQueue to do its thing. Also lift the ops limit on the PurgeQueue when it goes into the drain phase. Signed-off-by: John Spray --- diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 62ae2764a2a1..9be0c08b0a9e 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -226,13 +226,19 @@ void MDSRankDispatcher::tick() if (is_stopping()) { mdlog->trim(); if (mdcache->shutdown_pass()) { - if (!purge_queue.is_idle()) { + uint64_t pq_progress = 0 ; + uint64_t pq_total = 0; + size_t pq_in_flight = 0; + if (!purge_queue.drain(&pq_progress, &pq_total, &pq_in_flight)) { dout(7) << "shutdown_pass=true, but still waiting for purge queue" << dendl; // This takes unbounded time, so we must indicate progress - // to the administrator - // TODO include progress in message - clog->info() << "MDS rank " << whoami << " waiting for purge queue"; + // to the administrator: we do it in a slightly imperfect way + // by sending periodic (tick frequency) clog messages while + // in this state. + clog->info() << "MDS rank " << whoami << " waiting for purge queue (" + << std::dec << pq_progress << "/" << pq_total << " " << pq_in_flight + << " files purging" << ")"; } else { dout(7) << "shutdown_pass=true, finished w/ shutdown, moving to " "down:stopped" << dendl; diff --git a/src/mds/PurgeQueue.cc b/src/mds/PurgeQueue.cc index b63f362bdbe9..8c99a47215e2 100644 --- a/src/mds/PurgeQueue.cc +++ b/src/mds/PurgeQueue.cc @@ -54,9 +54,6 @@ void PurgeItem::decode(bufferlist::iterator &p) DECODE_FINISH(p); } -// TODO: when we're deactivating, lift all limits on -// how many OSD ops we're allowed to emit at a time to -// race through the queue as fast as we can. // TODO: if Objecter has any slow requests, take that as a hint and // slow down our rate of purging (keep accepting pushes though) PurgeQueue::PurgeQueue( @@ -77,7 +74,9 @@ PurgeQueue::PurgeQueue( CEPH_FS_ONDISK_MAGIC, objecter_, nullptr, 0, &timer, &finisher), ops_in_flight(0), - max_purge_ops(0) + max_purge_ops(0), + drain_initial(0), + draining(false) { } @@ -445,9 +444,41 @@ void PurgeQueue::handle_conf_change(const struct md_config_t *conf, } } -bool PurgeQueue::is_idle() const +bool PurgeQueue::drain( + uint64_t *progress, + uint64_t *progress_total, + size_t *in_flight_count + ) { - return in_flight.empty() && ( + assert(progress != nullptr); + assert(progress_total != nullptr); + assert(in_flight_count != nullptr); + + const bool done = in_flight.empty() && ( journaler.get_read_pos() == journaler.get_write_pos()); + if (done) { + return true; + } + + const uint64_t bytes_remaining = journaler.get_write_pos() + - journaler.get_read_pos(); + + if (!draining) { + // Start of draining: remember how much there was outstanding at + // this point so that we can give a progress percentage later + draining = true; + + // Life the op throttle as this daemon now has nothing to do but + // drain the purge queue, so do it as fast as we can. + max_purge_ops = 0xffff; + } + + drain_initial = max(bytes_remaining, drain_initial); + + *progress = drain_initial - bytes_remaining; + *progress_total = drain_initial; + *in_flight_count = in_flight.size(); + + return false; } diff --git a/src/mds/PurgeQueue.h b/src/mds/PurgeQueue.h index b235536745ba..e86833d882e6 100644 --- a/src/mds/PurgeQueue.h +++ b/src/mds/PurgeQueue.h @@ -103,6 +103,13 @@ protected: bool can_consume(); + // How many bytes were remaining when drain() was first called, + // used for indicating progress. + uint64_t drain_initial; + + // Has drain() ever been called on this instance? + bool draining; + void _consume(); void _execute_item( @@ -131,6 +138,22 @@ public: // anything. bool is_idle() const; + /** + * Signal to the PurgeQueue that you would like it to hurry up and + * finish consuming everything in the queue. Provides progress + * feedback. + * + * @param progress: bytes consumed since we started draining + * @param progress_total: max bytes that were outstanding during purge + * @param in_flight_count: number of file purges currently in flight + * + * @returns true if drain is complete + */ + bool drain( + uint64_t *progress, + uint64_t *progress_total, + size_t *in_flight_count); + void update_op_limit(const MDSMap &mds_map); void handle_conf_change(const struct md_config_t *conf,