scrub errors. This repair count is maintained as a way of identifying any such
failing disks.
+In order to allow clearing of the warning, a new command
+``ceph tell osd.# clear_shards_repaired [count]`` has been added.
+By default it will set the repair count to 0. A `count` value can be passed
+to the command. Thus, the administrator has the option to re-enable the warning
+by passing the value of ``mon_osd_warn_num_repaired`` (or above) to the command.
+An alternative to using `clear_shards_repaired` is to mute the
+`OSD_TOO_MANY_REPAIRS` alert with `ceph health mute`.
LARGE_OMAP_OBJECTS
__________________
ceph health | $(! grep -q "Too many repaired reads on 1 OSDs") || return 1
set +o pipefail
+ ceph health unmute OSD_TOO_MANY_REPAIRS
+ ceph tell osd.$primary clear_shards_repaired
+ sleep 10
+
+ set -o pipefail
+ # Should clear this
+ ceph health | $(! grep -q "Too many repaired reads on 1 OSDs") || return 1
+ set +o pipefail
+
+ ceph tell osd.$primary clear_shards_repaired $OBJS
+ sleep 10
+
for i in $(seq 1 $OBJS)
do
inject_$inject rep data $poolname ${objbase}-$i $dir 0 || return 1
COUNT=$(ceph pg dump --format=json-pretty | jq ".pg_map.osd_stats_sum.num_shards_repaired")
test "$COUNT" = "$(expr $OBJS \* 3)" || return 1
- # Give mon a chance to notice additional OSD and unmute
+ # Give mon a chance to notice additional OSD and reset num_shards_repaired
# The default tick time is 5 seconds
CHECKTIME=10
LOOPS=0
return;
}
+void OSDService::set_osd_stat_repaired(int64_t count)
+{
+ std::lock_guard l(stat_lock);
+ osd_stat.num_shards_repaired = count;
+ return;
+}
+
float OSDService::compute_adjusted_ratio(osd_stat_t new_stat, float *pratio,
uint64_t adjust_used)
{
scrub_purged_snaps();
}
+ else if (prefix == "clear_shards_repaired") {
+ int64_t count = cmd_getval_or<int64_t>(cmdmap, "count", 0);
+ service.set_osd_stat_repaired(count);
+ }
+
else if (prefix == "reset_purged_snaps_last") {
lock_guard l(osd_lock);
superblock.purged_snaps_last = 0;
asok_hook,
"debug the scrubber");
ceph_assert(r == 0);
+ r = admin_socket->register_command(
+ "clear_shards_repaired "
+ "name=count,type=CephInt,req=false,range=0",
+ asok_hook,
+ "clear num_shards_repaired to clear health warning");
+ ceph_assert(r == 0);
// -- pg commands --
// old form: ceph pg <pgid> command ...
osd_alert_list_t& alerts);
osd_stat_t set_osd_stat(std::vector<int>& hb_peers, int num_pgs);
void inc_osd_stat_repaired(void);
+ void set_osd_stat_repaired(int64_t count);
float compute_adjusted_ratio(osd_stat_t new_stat, float *pratio, uint64_t adjust_used = 0);
osd_stat_t get_osd_stat() {
std::lock_guard l(stat_lock);