]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: update recovery stats when the recovery completes 6268/head
authorLoic Dachary <ldachary@redhat.com>
Fri, 16 Oct 2015 17:27:57 +0000 (19:27 +0200)
committerLoic Dachary <ldachary@redhat.com>
Tue, 10 Nov 2015 06:12:11 +0000 (07:12 +0100)
Neither ReplicatedBackend nor ECBackend increments the num_recovered
counter and friends when not doing a local recovery. Pass the stats to
on_global_recover instead so that it's called even if the local copy
didn't need to be recovered.

http://tracker.ceph.com/issues/13453 Fixes: #13453

Signed-off-by: Loic Dachary <ldachary@redhat.com>
src/mon/PGMap.cc
src/osd/ECBackend.cc
src/osd/PGBackend.h
src/osd/ReplicatedBackend.cc
src/osd/ReplicatedPG.cc
src/osd/ReplicatedPG.h

index 1eee600e7cf75519c8b6975dfdb5ed2654706cbc..4829d4939935fdc7225fb25ef12fe1f70b06dfa4 100644 (file)
@@ -1106,6 +1106,9 @@ void PGMap::recovery_rate_summary(Formatter *f, ostream *out,
       f->dump_int("recovering_objects_per_sec", objps);
       f->dump_int("recovering_bytes_per_sec", bps);
       f->dump_int("recovering_keys_per_sec", kps);
+      f->dump_int("num_objects_recovered", pos_delta.stats.sum.num_objects_recovered);
+      f->dump_int("num_bytes_recovered", pos_delta.stats.sum.num_bytes_recovered);
+      f->dump_int("num_keys_recovered", pos_delta.stats.sum.num_keys_recovered);
     } else {
       *out << pretty_si_t(bps) << "B/s";
       if (pos_delta.stats.sum.num_keys_recovered)
index 47e3b2f88773f0899fa4e2415fe4cc35c5bdaf7e..c4381b6891498521430e2a01572e08e10b8d1b60 100644 (file)
@@ -305,19 +305,14 @@ void ECBackend::handle_recovery_push(
     if ((get_parent()->pgb_is_primary())) {
       assert(recovery_ops.count(op.soid));
       assert(recovery_ops[op.soid].obc);
-      object_stat_sum_t stats;
-      stats.num_objects_recovered = 1;
-      stats.num_bytes_recovered = recovery_ops[op.soid].obc->obs.oi.size;
       get_parent()->on_local_recover(
        op.soid,
-       stats,
        op.recovery_info,
        recovery_ops[op.soid].obc,
        m->t);
     } else {
       get_parent()->on_local_recover(
        op.soid,
-       object_stat_sum_t(),
        op.recovery_info,
        ObjectContextRef(),
        m->t);
@@ -599,7 +594,11 @@ void ECBackend::continue_recovery_op(
                object_stat_sum_t());
            }
          }
-         get_parent()->on_global_recover(op.hoid);
+         object_stat_sum_t stat;
+         stat.num_bytes_recovered = op.recovery_info.size;
+         stat.num_keys_recovered = 0; // ??? op ... omap_entries.size(); ?
+         stat.num_objects_recovered = 1;
+         get_parent()->on_global_recover(op.hoid, stat);
          dout(10) << __func__ << ": WRITING return " << op << dendl;
          recovery_ops.erase(op.hoid);
          return;
index 1e410c76d9d3c5cb7e157bdc74f7e59f852dde51..e98ef0cc786ff45ae06e3ef874680a7757cf3a6c 100644 (file)
@@ -60,7 +60,6 @@
       */
      virtual void on_local_recover(
        const hobject_t &oid,
-       const object_stat_sum_t &stat_diff,
        const ObjectRecoveryInfo &recovery_info,
        ObjectContextRef obc,
        ObjectStore::Transaction *t
       * Called when transaction recovering oid is durable and
       * applied on all replicas
       */
-     virtual void on_global_recover(const hobject_t &oid) = 0;
+     virtual void on_global_recover(
+       const hobject_t &oid,
+       const object_stat_sum_t &stat_diff
+       ) = 0;
 
      /**
       * Called when peer is recovered
index d7109ef42f8ade69c6ebae2fa1923615763e94eb..aaa6dcc90a16e4aa1dcf100d57d37b0b6be930dd 100644 (file)
@@ -889,7 +889,7 @@ struct C_ReplicatedBackend_OnPullComplete : GenContext<ThreadPool::TPHandle&> {
       assert(j != bc->pulling.end());
       if (!bc->start_pushes(*i, j->second.obc, h)) {
        bc->get_parent()->on_global_recover(
-         *i);
+         *i, j->second.stat);
       }
       bc->pulling.erase(*i);
       handle.reset_tp_timeout();
@@ -1859,8 +1859,6 @@ bool ReplicatedBackend::handle_pull_response(
 
   pi.recovery_progress = pop.after_progress;
 
-  pi.stat.num_bytes_recovered += data.length();
-
   dout(10) << "new recovery_info " << pi.recovery_info
           << ", new progress " << pi.recovery_progress
           << dendl;
@@ -1875,13 +1873,10 @@ bool ReplicatedBackend::handle_pull_response(
                   pop.omap_entries,
                   t);
 
-  pi.stat.num_keys_recovered += pop.omap_entries.size();
-
   if (complete) {
     to_continue->push_back(hoid);
-    pi.stat.num_objects_recovered++;
     get_parent()->on_local_recover(
-      hoid, pi.stat, pi.recovery_info, pi.obc, t);
+      hoid, pi.recovery_info, pi.obc, t);
     pull_from_peer[from].erase(hoid);
     if (pull_from_peer[from].empty())
       pull_from_peer.erase(from);
@@ -1923,7 +1918,6 @@ void ReplicatedBackend::handle_push(
   if (complete)
     get_parent()->on_local_recover(
       pop.recovery_info.soid,
-      object_stat_sum_t(),
       pop.recovery_info,
       ObjectContextRef(), // ok, is replica
       t);
@@ -2216,12 +2210,17 @@ bool ReplicatedBackend::handle_push_reply(pg_shard_t peer, PushReplyOp &op, Push
        peer, soid, pi->recovery_info,
        pi->stat);
 
+      object_stat_sum_t stat;
+      stat.num_bytes_recovered = pi->recovery_info.size;
+      stat.num_keys_recovered = reply->omap_entries.size();
+      stat.num_objects_recovered = 1;
+
       pushing[soid].erase(peer);
       pi = NULL;
 
 
       if (pushing[soid].empty()) {
-       get_parent()->on_global_recover(soid);
+       get_parent()->on_global_recover(soid, stat);
        pushing.erase(soid);
       } else {
        dout(10) << "pushed " << soid << ", still waiting for push ack from "
index 9c55e65238f9154663c087e7e13fa7469acc039a..2111bc8b66507eb915fcc1d094c328f45de1ece1 100644 (file)
@@ -178,7 +178,6 @@ public:
 
 void ReplicatedPG::on_local_recover(
   const hobject_t &hoid,
-  const object_stat_sum_t &stat_diff,
   const ObjectRecoveryInfo &_recovery_info,
   ObjectContextRef obc,
   ObjectStore::Transaction *t
@@ -227,8 +226,6 @@ void ReplicatedPG::on_local_recover(
   recover_got(recovery_info.soid, recovery_info.version);
 
   if (is_primary()) {
-    info.stats.stats.sum.add(stat_diff);
-
     assert(obc);
     obc->obs.exists = true;
     obc->ondisk_write_lock();
@@ -276,8 +273,10 @@ void ReplicatedPG::on_local_recover(
 }
 
 void ReplicatedPG::on_global_recover(
-  const hobject_t &soid)
+  const hobject_t &soid,
+  const object_stat_sum_t &stat_diff)
 {
+  info.stats.stats.sum.add(stat_diff);
   missing_loc.recovered(soid);
   publish_stats_to_osd();
   dout(10) << "pushed " << soid << " to all replicas" << dendl;
index 1b65b50129128b33b7a01a5f8d8695c6422f8ba9..1735c92f15eb805b63fe4e6952a1bec03ffd2521 100644 (file)
@@ -262,7 +262,6 @@ public:
   /// Listener methods
   void on_local_recover(
     const hobject_t &oid,
-    const object_stat_sum_t &stat_diff,
     const ObjectRecoveryInfo &recovery_info,
     ObjectContextRef obc,
     ObjectStore::Transaction *t
@@ -277,7 +276,8 @@ public:
     pg_shard_t peer,
     const hobject_t oid);
   void on_global_recover(
-    const hobject_t &oid);
+    const hobject_t &oid,
+    const object_stat_sum_t &stat_diff);
   void failed_push(pg_shard_t from, const hobject_t &soid);
   void cancel_pull(const hobject_t &soid);