Treat backfull_toofull as a warning condition because it can resolve itself.
Includes test case for PG_BACKFILL_FULL
Includes test case for recovery_toofull / PG_RECOVERY_FULL
Fixes: https://tracker.ceph.com/issues/39555
Signed-off-by: David Zafman <dzafman@redhat.com>
ceph tell <pgid> query
-PG_DEGRADED_FULL
+PG_RECOVERY_FULL
________________
Data redundancy may be reduced or at risk for some data due to a lack
of free space in the cluster. Specifically, one or more PGs has the
-*backfill_toofull* or *recovery_toofull* flag set, meaning that the
+*recovery_toofull* flag set, meaning that the
+cluster is unable to migrate or recover data because one or more OSDs
+is above the *full* threshold.
+
+See the discussion for *OSD_FULL* above for steps to resolve this condition.
+
+PG_BACKFILL_FULL
+________________
+
+Data redundancy may be reduced or at risk for some data due to a lack
+of free space in the cluster. Specifically, one or more PGs has the
+*backfill_toofull* flag set, meaning that the
cluster is unable to migrate or recover data because one or more OSDs
is above the *backfillfull* threshold.
-See the discussion for *OSD_BACKFILLFULL* or *OSD_FULL* above for
+See the discussion for *OSD_BACKFILLFULL* above for
steps to resolve this condition.
PG_DAMAGED
but couldn't be completed due to insufficient storage capacity. When a
placement group cannot be backfilled, it may be considered ``incomplete``.
+The ``backfill_toofull`` state may be transient. It is possible that as PGs
+are moved around, space may become available. The ``backfill_toofull`` is
+similar to ``backfill_wait`` in that as soon as conditions change
+backfill can proceed.
+
Ceph provides a number of settings to manage the load spike associated with
reassigning placement groups to an OSD (especially a new OSD). By default,
``osd_max_backfills`` sets the maximum number of concurrent backfills to and from
The placement group is waiting in line to start backfill.
*backfill_toofull*
- A backfill operation is waiting because the destination OSD is over its
- full ratio.
+ A backfill operation is waiting because the destination OSD is over
+ the backfillfull ratio.
*backfill_unfound*
Backfill stopped due to unfound objects.
fi
ceph pg dump pgs
+ ceph status
+
+ ceph status --format=json-pretty > $dir/stat.json
+
+ eval SEV=$(jq '.health.checks.PG_BACKFILL_FULL.severity' $dir/stat.json)
+ if [ "$SEV" != "HEALTH_WARN" ]; then
+ echo "PG_BACKFILL_FULL severity $SEV not HEALTH_WARN"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+ eval MSG=$(jq '.health.checks.PG_BACKFILL_FULL.summary.message' $dir/stat.json)
+ if [ "$MSG" != "Low space hindering backfill (add storage if this doesn't resolve itself): 4 pgs backfill_toofull" ]; then
+ echo "PG_BACKFILL_FULL message '$MSG' mismatched"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+ rm -f $dir/stat.json
if [ $ERRORS != "0" ];
then
--- /dev/null
+#!/usr/bin/env bash
+#
+# Copyright (C) 2018 Red Hat <contact@redhat.com>
+#
+# Author: David Zafman <dzafman@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+ local dir=$1
+ shift
+
+ export CEPH_MON="127.0.0.1:7221" # git grep '\<7221\>' : there must be only one
+ export CEPH_ARGS
+ CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+ CEPH_ARGS+="--mon-host=$CEPH_MON "
+ CEPH_ARGS+="--osd_max_backfills=10 "
+ export objects=600
+ export poolprefix=test
+
+ local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+ for func in $funcs ; do
+ setup $dir || return 1
+ $func $dir || return 1
+ teardown $dir || return 1
+ done
+}
+
+
+function get_num_in_state() {
+ local state=$1
+ local expression
+ expression+="select(contains(\"${state}\"))"
+ ceph --format json pg dump pgs 2>/dev/null | \
+ jq ".pg_stats | [.[] | .state | $expression] | length"
+}
+
+
+function wait_for_state() {
+ local state=$1
+ local num_in_state=-1
+ local cur_in_state
+ local -a delays=($(get_timeout_delays $2 5))
+ local -i loop=0
+
+ flush_pg_stats || return 1
+ while test $(get_num_pgs) == 0 ; do
+ sleep 1
+ done
+
+ while true ; do
+ cur_in_state=$(get_num_in_state ${state})
+ test $cur_in_state = "0" && break
+ if test $cur_in_state != $num_in_state ; then
+ loop=0
+ num_in_state=$cur_in_state
+ elif (( $loop >= ${#delays[*]} )) ; then
+ ceph pg dump pgs
+ return 1
+ fi
+ sleep ${delays[$loop]}
+ loop+=1
+ done
+ return 0
+}
+
+
+function wait_for_recovery_toofull() {
+ local timeout=$1
+ wait_for_state recovery_toofull $timeout
+}
+
+
+# Create 1 pools with size 1
+# set ful-ratio to 50%
+# Write data 600 5K (3000K)
+# Inject fake_statfs_for_testing to 3600K (83% full)
+# Incresase the pool size to 2
+# The pool shouldn't have room to recovery
+function TEST_recovery_test_simple() {
+ local dir=$1
+ local pools=1
+ local OSDS=2
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ ceph osd set-nearfull-ratio .40
+ ceph osd set-backfillfull-ratio .45
+ ceph osd set-full-ratio .50
+
+ for p in $(seq 1 $pools)
+ do
+ create_pool "${poolprefix}$p" 1 1
+ ceph osd pool set "${poolprefix}$p" size 1
+ done
+
+ wait_for_clean || return 1
+
+ dd if=/dev/urandom of=$dir/datafile bs=1024 count=5
+ for o in $(seq 1 $objects)
+ do
+ rados -p "${poolprefix}$p" put obj$o $dir/datafile
+ done
+
+ for o in $(seq 0 $(expr $OSDS - 1))
+ do
+ ceph tell osd.$o injectargs '--fake_statfs_for_testing 3686400' || return 1
+ done
+ sleep 5
+
+ ceph pg dump pgs
+
+ for p in $(seq 1 $pools)
+ do
+ ceph osd pool set "${poolprefix}$p" size 2
+ done
+
+ # If this times out, we'll detected errors below
+ wait_for_recovery_toofull 30
+
+ ERRORS=0
+ if [ "$(ceph pg dump pgs | grep +recovery_toofull | wc -l)" != "1" ];
+ then
+ echo "One pool should have been in recovery_toofull"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+
+ ceph pg dump pgs
+ ceph status
+ ceph status --format=json-pretty > $dir/stat.json
+
+ eval SEV=$(jq '.health.checks.PG_RECOVERY_FULL.severity' $dir/stat.json)
+ if [ "$SEV" != "HEALTH_ERR" ]; then
+ echo "PG_RECOVERY_FULL severity $SEV not HEALTH_ERR"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+ eval MSG=$(jq '.health.checks.PG_RECOVERY_FULL.summary.message' $dir/stat.json)
+ if [ "$MSG" != "Full OSDs blocking recovery: 1 pg recovery_toofull" ]; then
+ echo "PG_RECOVERY_FULL message '$MSG' mismatched"
+ ERRORS="$(expr $ERRORS + 1)"
+ fi
+ rm -f $dir/stat.json
+
+ if [ $ERRORS != "0" ];
+ then
+ return 1
+ fi
+
+ for i in $(seq 1 $pools)
+ do
+ delete_pool "${poolprefix}$i"
+ done
+ kill_daemons $dir || return 1
+}
+
+
+main osd-recovery-space "$@"
+
+# Local Variables:
+# compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-space.sh"
+# End:
typedef enum pg_consequence_t {
UNAVAILABLE = 1, // Client IO to the pool may block
DEGRADED = 2, // Fewer than the requested number of replicas are present
- DEGRADED_FULL = 3, // Fewer than the request number of replicas may be present
- // and insufficiet resources are present to fix this
- DAMAGED = 4 // The data may be missing or inconsistent on disk and
+ BACKFILL_FULL = 3, // Backfill is blocked for space considerations
+ // This may or may not be a deadlock condition.
+ DAMAGED = 4, // The data may be missing or inconsistent on disk and
// requires repair
+ RECOVERY_FULL = 5 // Recovery is blocked because OSDs are full
} pg_consequence_t;
// For a given PG state, how should it be reported at the pool level?
{ PG_STATE_SNAPTRIM_ERROR, {DAMAGED, {}} },
{ PG_STATE_RECOVERY_UNFOUND, {DAMAGED, {}} },
{ PG_STATE_BACKFILL_UNFOUND, {DAMAGED, {}} },
- { PG_STATE_BACKFILL_TOOFULL, {DEGRADED_FULL, {}} },
- { PG_STATE_RECOVERY_TOOFULL, {DEGRADED_FULL, {}} },
+ { PG_STATE_BACKFILL_TOOFULL, {BACKFILL_FULL, {}} },
+ { PG_STATE_RECOVERY_TOOFULL, {RECOVERY_FULL, {}} },
{ PG_STATE_DEGRADED, {DEGRADED, {}} },
{ PG_STATE_DOWN, {UNAVAILABLE, {}} },
// Delayed (wait until stuck) reports
summary = "Degraded data redundancy: ";
sev = HEALTH_WARN;
break;
- case DEGRADED_FULL:
- health_code = "PG_DEGRADED_FULL";
- summary = "Degraded data redundancy (low space): ";
- sev = HEALTH_ERR;
+ case BACKFILL_FULL:
+ health_code = "PG_BACKFILL_FULL";
+ summary = "Low space hindering backfill (add storage if this doesn't resolve itself): ";
+ sev = HEALTH_WARN;
break;
case DAMAGED:
health_code = "PG_DAMAGED";
summary = "Possible data damage: ";
sev = HEALTH_ERR;
break;
+ case RECOVERY_FULL:
+ health_code = "PG_RECOVERY_FULL";
+ summary = "Full OSDs blocking recovery: ";
+ sev = HEALTH_ERR;
+ break;
default:
ceph_abort();
}