From: Loic Dachary Date: Fri, 16 Oct 2015 17:27:57 +0000 (+0200) Subject: osd: update recovery stats when the recovery completes X-Git-Tag: v10.0.4~66^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ae79f1d14cbfe9bc6cfc4d74fb5ea147467f75dd;p=ceph.git osd: update recovery stats when the recovery completes Neither ReplicatedBackend nor ECBackend increments the num_recovered counter and friends when not doing a local recovery. Pass the stats to on_global_recover instead so that it's called even if the local copy didn't need to be recovered. http://tracker.ceph.com/issues/13453 Fixes: #13453 Signed-off-by: Loic Dachary --- diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index 1eee600e7cf7..4829d4939935 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -1106,6 +1106,9 @@ void PGMap::recovery_rate_summary(Formatter *f, ostream *out, f->dump_int("recovering_objects_per_sec", objps); f->dump_int("recovering_bytes_per_sec", bps); f->dump_int("recovering_keys_per_sec", kps); + f->dump_int("num_objects_recovered", pos_delta.stats.sum.num_objects_recovered); + f->dump_int("num_bytes_recovered", pos_delta.stats.sum.num_bytes_recovered); + f->dump_int("num_keys_recovered", pos_delta.stats.sum.num_keys_recovered); } else { *out << pretty_si_t(bps) << "B/s"; if (pos_delta.stats.sum.num_keys_recovered) diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 47e3b2f88773..c4381b689149 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -305,19 +305,14 @@ void ECBackend::handle_recovery_push( if ((get_parent()->pgb_is_primary())) { assert(recovery_ops.count(op.soid)); assert(recovery_ops[op.soid].obc); - object_stat_sum_t stats; - stats.num_objects_recovered = 1; - stats.num_bytes_recovered = recovery_ops[op.soid].obc->obs.oi.size; get_parent()->on_local_recover( op.soid, - stats, op.recovery_info, recovery_ops[op.soid].obc, m->t); } else { get_parent()->on_local_recover( op.soid, - object_stat_sum_t(), op.recovery_info, ObjectContextRef(), m->t); @@ -599,7 +594,11 @@ void ECBackend::continue_recovery_op( object_stat_sum_t()); } } - get_parent()->on_global_recover(op.hoid); + object_stat_sum_t stat; + stat.num_bytes_recovered = op.recovery_info.size; + stat.num_keys_recovered = 0; // ??? op ... omap_entries.size(); ? + stat.num_objects_recovered = 1; + get_parent()->on_global_recover(op.hoid, stat); dout(10) << __func__ << ": WRITING return " << op << dendl; recovery_ops.erase(op.hoid); return; diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 1e410c76d9d3..e98ef0cc786f 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -60,7 +60,6 @@ */ virtual void on_local_recover( const hobject_t &oid, - const object_stat_sum_t &stat_diff, const ObjectRecoveryInfo &recovery_info, ObjectContextRef obc, ObjectStore::Transaction *t @@ -70,7 +69,10 @@ * Called when transaction recovering oid is durable and * applied on all replicas */ - virtual void on_global_recover(const hobject_t &oid) = 0; + virtual void on_global_recover( + const hobject_t &oid, + const object_stat_sum_t &stat_diff + ) = 0; /** * Called when peer is recovered diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index d7109ef42f8a..aaa6dcc90a16 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -889,7 +889,7 @@ struct C_ReplicatedBackend_OnPullComplete : GenContext { assert(j != bc->pulling.end()); if (!bc->start_pushes(*i, j->second.obc, h)) { bc->get_parent()->on_global_recover( - *i); + *i, j->second.stat); } bc->pulling.erase(*i); handle.reset_tp_timeout(); @@ -1859,8 +1859,6 @@ bool ReplicatedBackend::handle_pull_response( pi.recovery_progress = pop.after_progress; - pi.stat.num_bytes_recovered += data.length(); - dout(10) << "new recovery_info " << pi.recovery_info << ", new progress " << pi.recovery_progress << dendl; @@ -1875,13 +1873,10 @@ bool ReplicatedBackend::handle_pull_response( pop.omap_entries, t); - pi.stat.num_keys_recovered += pop.omap_entries.size(); - if (complete) { to_continue->push_back(hoid); - pi.stat.num_objects_recovered++; get_parent()->on_local_recover( - hoid, pi.stat, pi.recovery_info, pi.obc, t); + hoid, pi.recovery_info, pi.obc, t); pull_from_peer[from].erase(hoid); if (pull_from_peer[from].empty()) pull_from_peer.erase(from); @@ -1923,7 +1918,6 @@ void ReplicatedBackend::handle_push( if (complete) get_parent()->on_local_recover( pop.recovery_info.soid, - object_stat_sum_t(), pop.recovery_info, ObjectContextRef(), // ok, is replica t); @@ -2216,12 +2210,17 @@ bool ReplicatedBackend::handle_push_reply(pg_shard_t peer, PushReplyOp &op, Push peer, soid, pi->recovery_info, pi->stat); + object_stat_sum_t stat; + stat.num_bytes_recovered = pi->recovery_info.size; + stat.num_keys_recovered = reply->omap_entries.size(); + stat.num_objects_recovered = 1; + pushing[soid].erase(peer); pi = NULL; if (pushing[soid].empty()) { - get_parent()->on_global_recover(soid); + get_parent()->on_global_recover(soid, stat); pushing.erase(soid); } else { dout(10) << "pushed " << soid << ", still waiting for push ack from " diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 9c55e65238f9..2111bc8b6650 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -178,7 +178,6 @@ public: void ReplicatedPG::on_local_recover( const hobject_t &hoid, - const object_stat_sum_t &stat_diff, const ObjectRecoveryInfo &_recovery_info, ObjectContextRef obc, ObjectStore::Transaction *t @@ -227,8 +226,6 @@ void ReplicatedPG::on_local_recover( recover_got(recovery_info.soid, recovery_info.version); if (is_primary()) { - info.stats.stats.sum.add(stat_diff); - assert(obc); obc->obs.exists = true; obc->ondisk_write_lock(); @@ -276,8 +273,10 @@ void ReplicatedPG::on_local_recover( } void ReplicatedPG::on_global_recover( - const hobject_t &soid) + const hobject_t &soid, + const object_stat_sum_t &stat_diff) { + info.stats.stats.sum.add(stat_diff); missing_loc.recovered(soid); publish_stats_to_osd(); dout(10) << "pushed " << soid << " to all replicas" << dendl; diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 1b65b5012912..1735c92f15eb 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -262,7 +262,6 @@ public: /// Listener methods void on_local_recover( const hobject_t &oid, - const object_stat_sum_t &stat_diff, const ObjectRecoveryInfo &recovery_info, ObjectContextRef obc, ObjectStore::Transaction *t @@ -277,7 +276,8 @@ public: pg_shard_t peer, const hobject_t oid); void on_global_recover( - const hobject_t &oid); + const hobject_t &oid, + const object_stat_sum_t &stat_diff); void failed_push(pg_shard_t from, const hobject_t &soid); void cancel_pull(const hobject_t &soid);