osd, test: Add num_shards_repaired to osd_stat_t for pushes with repair set 3(3)

author David Zafman <dzafman@redhat.com>

Tue, 19 Mar 2019 20:55:36 +0000 (13:55 -0700)

committer David Zafman <dzafman@redhat.com>

Mon, 25 Mar 2019 23:03:36 +0000 (16:03 -0700)
author David Zafman <dzafman@redhat.com>
Tue, 19 Mar 2019 20:55:36 +0000 (13:55 -0700)
committer David Zafman <dzafman@redhat.com>
Mon, 25 Mar 2019 23:03:36 +0000 (16:03 -0700)
diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh

index 2af971f1576516a5d036a4307df2b1f34432e472..b62e2c086c8134ac4cb3612a3357b5d0de02646d 100755 (executable)
--- a/qa/standalone/scrub/osd-scrub-repair.sh
+++ b/qa/standalone/scrub/osd-scrub-repair.sh
@@ -498,7 +498,9 @@ function TEST_auto_repair_bluestore_failed_norecov() {
  function TEST_repair_stats() {
      local dir=$1
      local poolname=testpool
+    local OSDS=2
      local OBJS=30
+    # This need to be an even number
      local REPAIRS=20
  
      # Launch a cluster with 5 seconds scrub interval
@@ -507,7 +509,7 @@ function TEST_repair_stats() {
      run_mgr $dir x || return 1
      local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
              --osd-scrub-interval-randomize-ratio=0"
-    for id in $(seq 0 2) ; do
+    for id in $(seq 0 $(expr $OSDS - 1)) ; do
          run_osd_bluestore $dir $id $ceph_osd_args || return 1
      done
  
@@ -530,20 +532,115 @@ function TEST_repair_stats() {
      local primary=$(get_primary $poolname obj1)
  
      kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1
+    kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1
+    for i in $(seq 1 $REPAIRS)
+    do
+      # Remove from both osd.0 and osd.1
+      OSD=$(expr $i % 2)
+      _objectstore_tool_nodown $dir $OSD obj$i remove || return 1
+    done
+    run_osd_bluestore $dir $primary $ceph_osd_args || return 1
+    run_osd_bluestore $dir $other $ceph_osd_args || return 1
+    wait_for_clean || return 1
+
+    repair $pgid
+    wait_for_clean || return 1
+    ceph pg dump pgs
+
+    # This should have caused 1 object to be repaired
+    ceph pg $pgid query | jq '.info.stats.stat_sum'
+    COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
+    test "$COUNT" = "$REPAIRS" || return 1
+
+    ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary )"
+    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $primary ).num_shards_repaired")
+    test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1
+
+    ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other )"
+    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $other ).num_shards_repaired")
+    test "$COUNT" = "$(expr $REPAIRS / 2)" || return 1
+
+    ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum"
+    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+    test "$COUNT" = "$REPAIRS" || return 1
+
+    # Tear down
+    teardown $dir || return 1
+}
+
+function TEST_repair_stats_ec() {
+    local dir=$1
+    local poolname=testpool
+    local OSDS=3
+    local OBJS=30
+    # This need to be an even number
+    local REPAIRS=26
+    local allow_overwrites=false
+
+    # Launch a cluster with 5 seconds scrub interval
+    setup $dir || return 1
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
+            --osd-scrub-interval-randomize-ratio=0"
+    for id in $(seq 0 $(expr $OSDS - 1)) ; do
+        run_osd_bluestore $dir $id $ceph_osd_args || return 1
+    done
+
+    # Create an EC pool
+    create_ec_pool $poolname $allow_overwrites k=2 m=1 || return 1
+
+    # Put an object
+    local payload=ABCDEF
+    echo $payload > $dir/ORIGINAL
+    for i in $(seq 1 $OBJS)
+    do
+      rados --pool $poolname put obj$i $dir/ORIGINAL || return 1
+    done
+
+    # Remove the object from one shard physically
+    # Restarted osd get $ceph_osd_args passed
+    local other=$(get_not_primary $poolname obj1)
+    local pgid=$(get_pg $poolname obj1)
+    local primary=$(get_primary $poolname obj1)
+
+    kill_daemons $dir TERM osd.$other >&2 < /dev/null || return 1
+    kill_daemons $dir TERM osd.$primary >&2 < /dev/null || return 1
      for i in $(seq 1 $REPAIRS)
      do
-      _objectstore_tool_nodown $dir $other obj$i remove || return 1
+      # Remove from both osd.0 and osd.1
+      OSD=$(expr $i % 2)
+      _objectstore_tool_nodown $dir $OSD obj$i remove || return 1
      done
+    run_osd_bluestore $dir $primary $ceph_osd_args || return 1
      run_osd_bluestore $dir $other $ceph_osd_args || return 1
+    wait_for_clean || return 1
  
      repair $pgid
      wait_for_clean || return 1
      ceph pg dump pgs
  
      # This should have caused 1 object to be repaired
+    ceph pg $pgid query | jq '.info.stats.stat_sum'
      COUNT=$(ceph pg $pgid query | jq '.info.stats.stat_sum.num_objects_repaired')
      test "$COUNT" = "$REPAIRS" || return 1
  
+    for osd in $(seq 0 $(expr $OSDS - 1)) ; do
+      if [ $osd = $other -o $osd = $primary ]; then
+        repair=$(expr $REPAIRS / 2)
+      else
+        repair="0"
+      fi
+
+      ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd )"
+      COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats[] | select(.osd == $osd ).num_shards_repaired")
+      test "$COUNT" = "$repair" || return 1
+    done
+
+    ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum"
+    COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
+    test "$COUNT" = "$REPAIRS" || return 1
+
      # Tear down
      teardown $dir || return 1
  }
diff --git a/src/messages/MOSDPGPush.h b/src/messages/MOSDPGPush.h

index d7da9133067284bafc0731d9a30dfe82eaaab086..3960ad70ea387208e780a33e61d5c4de319fbaba 100644 (file)
--- a/src/messages/MOSDPGPush.h
+++ b/src/messages/MOSDPGPush.h
@@ -21,7 +21,7 @@ class MOSDPGPush : public MessageInstance<MOSDPGPush, MOSDFastDispatchOp> {
  public:
    friend factory;
  private:
-  static constexpr int HEAD_VERSION = 3;
+  static constexpr int HEAD_VERSION = 4;
    static constexpr int COMPAT_VERSION = 2;
  
  public:
@@ -29,6 +29,7 @@ public:
    spg_t pgid;
    epoch_t map_epoch = 0, min_epoch = 0;
    vector<PushOp> pushes;
+  bool is_repair = false;
  
  private:
    uint64_t cost;
@@ -79,6 +80,11 @@ public:
      } else {
        min_epoch = map_epoch;
      }
+    if (header.version >= 4) {
+      decode(is_repair, p);
+    } else {
+      is_repair = false;
+    }
    }
  
    void encode_payload(uint64_t features) override {
@@ -90,6 +96,7 @@ public:
      encode(pgid.shard, payload);
      encode(from, payload);
      encode(min_epoch, payload);
+    encode(is_repair, payload);
    }
  
    std::string_view get_type_name() const override { return "MOSDPGPush"; }
diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc

index dfa1f4276b27efdd31bc18b08f370c3e6bb955ea..36a77cc7417a908e765f2f8b24153c7f1f4c3762 100644 (file)
--- a/src/osd/ECBackend.cc
+++ b/src/osd/ECBackend.cc
@@ -285,7 +285,8 @@ struct RecoveryMessages {
  
  void ECBackend::handle_recovery_push(
    const PushOp &op,
-  RecoveryMessages *m)
+  RecoveryMessages *m,
+  bool is_repair)
  {
    if (get_parent()->check_failsafe_full()) {
      dout(10) << __func__ << " Out of space (failsafe) processing push request." << dendl;
@@ -361,6 +362,8 @@ void ECBackend::handle_recovery_push(
      if ((get_parent()->pgb_is_primary())) {
        ceph_assert(recovery_ops.count(op.soid));
        ceph_assert(recovery_ops[op.soid].obc);
+      if (get_parent()->pg_is_repair())
+        get_parent()->inc_osd_stat_repaired();
        get_parent()->on_local_recover(
         op.soid,
         op.recovery_info,
@@ -368,6 +371,9 @@ void ECBackend::handle_recovery_push(
         false,
         &m->t);
      } else {
+      // If primary told us this is a repair, bump osd_stat_t::num_objects_repaired
+      if (is_repair)
+        get_parent()->inc_osd_stat_repaired();
        get_parent()->on_local_recover(
         op.soid,
         op.recovery_info,
@@ -517,6 +523,7 @@ void ECBackend::dispatch_recovery_messages(RecoveryMessages &m, int priority)
      msg->pgid = spg_t(get_parent()->get_info().pgid.pgid, i->first.shard);
      msg->pushes.swap(i->second);
      msg->compute_cost(cct);
+    msg->is_repair = get_parent()->pg_is_repair();
      get_parent()->send_message(
        i->first.osd,
        msg);
@@ -825,7 +832,7 @@ bool ECBackend::_handle_message(
      for (vector<PushOp>::const_iterator i = op->pushes.begin();
          i != op->pushes.end();
          ++i) {
-      handle_recovery_push(*i, &rm);
+      handle_recovery_push(*i, &rm, op->is_repair);
      }
      dispatch_recovery_messages(rm, priority);
      return true;
diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h

index 89d5dcbcb5240f8bd6b6d31c76cfbc2435dc2daa..e003a08c73667cdb96261886b7d17855b227a44c 100644 (file)
--- a/src/osd/ECBackend.h
+++ b/src/osd/ECBackend.h
@@ -306,7 +306,8 @@ private:
      RecoveryMessages *m);
    void handle_recovery_push(
      const PushOp &op,
-    RecoveryMessages *m);
+    RecoveryMessages *m,
+    bool is_repair);
    void handle_recovery_push_reply(
      const PushReplyOp &op,
      pg_shard_t from,
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc

index 020fb437294e1b5cc1c612e5a6fcc33041581308..d32c73b25d5685843c570e255892771a3773a973 100644 (file)
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -945,6 +945,13 @@ osd_stat_t OSDService::set_osd_stat(vector<int>& hb_peers,
    return osd_stat;
  }
  
+void OSDService::inc_osd_stat_repaired()
+{
+  std::lock_guard l(stat_lock);
+  osd_stat.num_shards_repaired++;
+  return;
+}
+
  float OSDService::compute_adjusted_ratio(osd_stat_t new_stat, float *pratio,
                                          uint64_t adjust_used)
  {
diff --git a/src/osd/OSD.h b/src/osd/OSD.h

index 12ceefc9f5f0db0c61f2dd5d1616a4432799d228..ba01a8eb46379662d3f75f9c348a318b41e5261d 100644 (file)
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -907,6 +907,7 @@ public:
    void set_statfs(const struct store_statfs_t &stbuf,
      osd_alert_list_t& alerts);
    osd_stat_t set_osd_stat(vector<int>& hb_peers, int num_pgs);
+  void inc_osd_stat_repaired(void);
    float compute_adjusted_ratio(osd_stat_t new_stat, float *pratio, uint64_t adjust_used = 0);
    osd_stat_t get_osd_stat() {
      std::lock_guard l(stat_lock);
diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h

index 75f895b1b39cb6d91364cd902ad5f880f5e6c598..fa1354c70b269ac460b3a0c5b66b26e197e67c97 100644 (file)
--- a/src/osd/PGBackend.h
+++ b/src/osd/PGBackend.h
@@ -294,6 +294,8 @@ typedef std::shared_ptr<const OSDMap> OSDMapRef;
  
       virtual bool check_osdmap_full(const set<pg_shard_t> &missing_on) = 0;
  
+     virtual bool pg_is_repair() = 0;
+     virtual void inc_osd_stat_repaired() = 0;
       virtual bool pg_is_remote_backfilling() = 0;
       virtual void pg_add_local_num_bytes(int64_t num_bytes) = 0;
       virtual void pg_sub_local_num_bytes(int64_t num_bytes) = 0;
diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h

index e5f47b6a6c8511e6979ce361bd83a39ee65192ad..c0f4afb1846ad5baaf3fa8fba1969c777ea4a46e 100644 (file)
--- a/src/osd/PrimaryLogPG.h
+++ b/src/osd/PrimaryLogPG.h
@@ -402,6 +402,12 @@ public:
      release_object_locks(manager);
    }
  
+  bool pg_is_repair() override {
+    return is_repair();
+  }
+  void inc_osd_stat_repaired() override {
+    osd->inc_osd_stat_repaired();
+  }
    bool pg_is_remote_backfilling() override {
      return is_remote_backfilling();
    }
diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc

index 2602bbd24e31a74d6fd16915fc1a247f50c9bc5e..9614a58997343c3301a0453870089c1c845a1333 100644 (file)
--- a/src/osd/ReplicatedBackend.cc
+++ b/src/osd/ReplicatedBackend.cc
@@ -750,7 +750,7 @@ void ReplicatedBackend::_do_push(OpRequestRef op)
         i != m->pushes.end();
         ++i) {
      replies.push_back(PushReplyOp());
-    handle_push(from, *i, &(replies.back()), &t);
+    handle_push(from, *i, &(replies.back()), &t, m->is_repair);
    }
  
    MOSDPGPushReply *reply = new MOSDPGPushReply;
@@ -1726,8 +1726,10 @@ bool ReplicatedBackend::handle_pull_response(
    if (complete) {
      pi.stat.num_objects_recovered++;
      // XXX: This could overcount if regular recovery is needed right after a repair
-    if (get_parent()->pg_is_repair())
+    if (get_parent()->pg_is_repair()) {
        pi.stat.num_objects_repaired++;
+      get_parent()->inc_osd_stat_repaired();
+    }
      clear_pull_from(piter);
      to_continue->push_back({hoid, pi.stat});
      get_parent()->on_local_recover(
@@ -1743,7 +1745,7 @@ bool ReplicatedBackend::handle_pull_response(
  
  void ReplicatedBackend::handle_push(
    pg_shard_t from, const PushOp &pop, PushReplyOp *response,
-  ObjectStore::Transaction *t)
+  ObjectStore::Transaction *t, bool is_repair)
  {
    dout(10) << "handle_push "
            << pop.recovery_info
@@ -1767,13 +1769,18 @@ void ReplicatedBackend::handle_push(
                    pop.omap_entries,
                    t);
  
-  if (complete)
+  if (complete) {
+    if (is_repair) {
+      get_parent()->inc_osd_stat_repaired();
+      dout(20) << __func__ << " repair complete" << dendl;
+    }
      get_parent()->on_local_recover(
        pop.recovery_info.soid,
        pop.recovery_info,
        ObjectContextRef(), // ok, is replica
        false,
        t);
+  }
  }
  
  void ReplicatedBackend::send_pushes(int prio, map<pg_shard_t, vector<PushOp> > &pushes)
@@ -1796,6 +1803,7 @@ void ReplicatedBackend::send_pushes(int prio, map<pg_shard_t, vector<PushOp> > &
        msg->map_epoch = get_osdmap_epoch();
        msg->min_epoch = get_parent()->get_last_peering_reset_epoch();
        msg->set_priority(prio);
+      msg->is_repair = get_parent()->pg_is_repair();
        for (;
             (j != i->second.end() &&
             cost < cct->_conf->osd_max_push_cost &&
diff --git a/src/osd/ReplicatedBackend.h b/src/osd/ReplicatedBackend.h

index c5593080deed0ecbc1fd6787517aa0d5d7ef5a72..8f447495a4ed106d7b09136c2694c22675fc1023 100644 (file)
--- a/src/osd/ReplicatedBackend.h
+++ b/src/osd/ReplicatedBackend.h
@@ -244,7 +244,7 @@ private:
      list<pull_complete_info> *to_continue,
      ObjectStore::Transaction *t);
    void handle_push(pg_shard_t from, const PushOp &op, PushReplyOp *response,
-                  ObjectStore::Transaction *t);
+                  ObjectStore::Transaction *t, bool is_repair);
  
    static void trim_pushed_data(const interval_set<uint64_t> &copy_subset,
                                const interval_set<uint64_t> &intervals_received,
diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc

index 3830167e248b13bc47d0beea3424a59cebe03305..af3f0d70a01c3989a0c84e6152afa3b7baba1d34 100644 (file)
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -381,6 +381,7 @@ void osd_stat_t::dump(Formatter *f) const
    f->close_section();
    f->dump_int("snap_trim_queue_len", snap_trim_queue_len);
    f->dump_int("num_snap_trimming", num_snap_trimming);
+  f->dump_int("num_shards_repaired", num_shards_repaired);
    f->open_object_section("op_queue_age_hist");
    op_queue_age_hist.dump(f);
    f->close_section();
@@ -394,7 +395,7 @@ void osd_stat_t::dump(Formatter *f) const
  
  void osd_stat_t::encode(bufferlist &bl, uint64_t features) const
  {
-  ENCODE_START(10, 2, bl);
+  ENCODE_START(11, 2, bl);
  
    //////// for compatibility ////////
    int64_t kb = statfs.kb();
@@ -425,6 +426,7 @@ void osd_stat_t::encode(bufferlist &bl, uint64_t features) const
    encode(statfs, bl);
    ///////////////////////////////////
    encode(os_alerts, bl);
+  encode(num_shards_repaired, bl);
    ENCODE_FINISH(bl);
  }
  
@@ -432,7 +434,7 @@ void osd_stat_t::decode(bufferlist::const_iterator &bl)
  {
    int64_t kb, kb_used,kb_avail;
    int64_t kb_used_data, kb_used_omap, kb_used_meta;
-  DECODE_START_LEGACY_COMPAT_LEN(10, 2, 2, bl);
+  DECODE_START_LEGACY_COMPAT_LEN(11, 2, 2, bl);
    decode(kb, bl);
    decode(kb_used, bl);
    decode(kb_avail, bl);
@@ -487,6 +489,11 @@ void osd_stat_t::decode(bufferlist::const_iterator &bl)
    } else {
      os_alerts.clear();
    }
+  if (struct_v >= 11) {
+    decode(num_shards_repaired, bl);
+  } else {
+    num_shards_repaired = 0;
+  }
    DECODE_FINISH(bl);
  }
  
@@ -501,6 +508,7 @@ void osd_stat_t::generate_test_instances(std::list<osd_stat_t*>& o)
    o.back()->hb_peers.push_back(7);
    o.back()->snap_trim_queue_len = 8;
    o.back()->num_snap_trimming = 99;
+  o.back()->num_shards_repaired = 101;
    o.back()->os_alerts[0].emplace(
      "some alert", "some alert details");
    o.back()->os_alerts[1].emplace(
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h

index fbcc71ba3474fb0fefbec54694b9c8c7bddc2de3..440d43357d388525d29aa189396e0ade03c3199d 100644 (file)
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -2321,6 +2321,7 @@ struct osd_stat_t {
    store_statfs_t statfs;
    vector<int> hb_peers;
    int32_t snap_trim_queue_len, num_snap_trimming;
+  uint64_t num_shards_repaired;
  
    pow2_hist_t op_queue_age_hist;
  
@@ -2332,12 +2333,14 @@ struct osd_stat_t {
  
    uint32_t num_pgs = 0;
  
-  osd_stat_t() : snap_trim_queue_len(0), num_snap_trimming(0) {}
+  osd_stat_t() : snap_trim_queue_len(0), num_snap_trimming(0),
+       num_shards_repaired(0)  {}
  
   void add(const osd_stat_t& o) {
      statfs.add(o.statfs);
      snap_trim_queue_len += o.snap_trim_queue_len;
      num_snap_trimming += o.num_snap_trimming;
+    num_shards_repaired += o.num_shards_repaired;
      op_queue_age_hist.add(o.op_queue_age_hist);
      os_perf_stat.add(o.os_perf_stat);
      num_pgs += o.num_pgs;
@@ -2352,6 +2355,7 @@ struct osd_stat_t {
      statfs.sub(o.statfs);
      snap_trim_queue_len -= o.snap_trim_queue_len;
      num_snap_trimming -= o.num_snap_trimming;
+    num_shards_repaired -= o.num_shards_repaired;
      op_queue_age_hist.sub(o.op_queue_age_hist);
      os_perf_stat.sub(o.os_perf_stat);
      num_pgs -= o.num_pgs;
@@ -2376,6 +2380,7 @@ inline bool operator==(const osd_stat_t& l, const osd_stat_t& r) {
    return l.statfs == r.statfs &&
      l.snap_trim_queue_len == r.snap_trim_queue_len &&
      l.num_snap_trimming == r.num_snap_trimming &&
+    l.num_shards_repaired == r.num_shards_repaired &&
      l.hb_peers == r.hb_peers &&
      l.op_queue_age_hist == r.op_queue_age_hist &&
      l.os_perf_stat == r.os_perf_stat &&
author	David Zafman <dzafman@redhat.com>
	Tue, 19 Mar 2019 20:55:36 +0000 (13:55 -0700)
committer	David Zafman <dzafman@redhat.com>
	Mon, 25 Mar 2019 23:03:36 +0000 (16:03 -0700)
qa/standalone/scrub/osd-scrub-repair.sh		patch \| blob \| history
src/messages/MOSDPGPush.h		patch \| blob \| history
src/osd/ECBackend.cc		patch \| blob \| history
src/osd/ECBackend.h		patch \| blob \| history
src/osd/OSD.cc		patch \| blob \| history
src/osd/OSD.h		patch \| blob \| history
src/osd/PGBackend.h		patch \| blob \| history
src/osd/PrimaryLogPG.h		patch \| blob \| history
src/osd/ReplicatedBackend.cc		patch \| blob \| history
src/osd/ReplicatedBackend.h		patch \| blob \| history
src/osd/osd_types.cc		patch \| blob \| history
src/osd/osd_types.h		patch \| blob \| history