headers only when the `read-stats` querystring is explicitly included in the
API request.
+* RADOS: The ceph df command reports incorrect MAX AVAIL for stretch mode pools when
+ CRUSH rules use multiple take steps for datacenters. PGMap::get_rule_avail
+ incorrectly calculates available space from only one datacenter.
+ As a workaround, define CRUSH rules with take default and choose firstn 0 type
+ datacenter. See https://tracker.ceph.com/issues/56650#note-6 for details.
+ Upgrading a cluster configured with a crush rule with multiple take steps
+ can lead to data shuffling, as the new crush changes may necessitate data
+ redistribution. In contrast, a stretch rule with a single-take configuration
+ will not cause any data movement during the upgrade process.
+
>=19.2.1
* CephFS: Command `fs subvolume create` now allows tagging subvolumes through option
teardown $dir || return 1
}
+function TEST_stretched_cluster_ceph_df() {
+ local dir=$1
+ local OSDS=12
+ local nodes=6
+ setup $dir || return 1
+
+ run_mon $dir a || return 1
+ run_mgr $dir x || return 1
+ export CEPH_ARGS
+
+ for osd in $(seq 0 $(expr $OSDS - 1))
+ do
+ run_osd $dir $osd || return 1
+ done
+
+ for dc in DC1 DC2
+ do
+ ceph osd crush add-bucket $dc datacenter
+ ceph osd crush move $dc root=default
+ done
+
+ for node in $(seq 0 $(expr $nodes - 1))
+ do
+ ceph osd crush add-bucket osd-$node host
+ done
+
+ for p in $(seq 0 2)
+ do
+ ceph osd crush move osd-$p datacenter=DC1
+ done
+
+ for q in $(seq 3 5)
+ do
+ ceph osd crush move osd-$q datacenter=DC2
+ done
+
+ ceph osd crush move osd.0 host=osd-0
+ ceph osd crush move osd.3 host=osd-0
+ ceph osd crush move osd.1 host=osd-1
+ ceph osd crush move osd.2 host=osd-1
+ ceph osd crush move osd.7 host=osd-2
+ ceph osd crush move osd.11 host=osd-2
+ ceph osd crush move osd.4 host=osd-3
+ ceph osd crush move osd.8 host=osd-3
+ ceph osd crush move osd.5 host=osd-4
+ ceph osd crush move osd.9 host=osd-4
+ ceph osd crush move osd.6 host=osd-5
+ ceph osd crush move osd.10 host=osd-5
+
+ hostname=$(hostname -s)
+ ceph osd crush remove $hostname
+
+ ceph osd getcrushmap > $dir/crushmap || return 1
+ crushtool --decompile $dir/crushmap > $dir/crushmap.txt || return 1
+ sed 's/^# end crush map$//' $dir/crushmap.txt > $dir/crushmap_modified.txt || return 1
+ cat >> $dir/crushmap_modified.txt << EOF
+rule stretch_rule {
+ id 1
+ type replicated
+ min_size 1
+ max_size 10
+ step take DC1
+ step chooseleaf firstn 2 type host
+ step emit
+ step take DC2
+ step chooseleaf firstn 2 type host
+ step emit
+}
+
+rule stretch_replicated_rule {
+ id 2
+ type replicated
+ min_size 1
+ max_size 10
+ step take default
+ step choose firstn 0 type datacenter
+ step chooseleaf firstn 2 type host
+ step emit
+}
+
+# end crush map
+EOF
+
+ crushtool --compile $dir/crushmap_modified.txt -o $dir/crushmap.bin || return 1
+ ceph osd setcrushmap -i $dir/crushmap.bin || return 1
+
+ ceph osd crush rule ls
+
+ local stretched_poolname=stretched_rbdpool
+ local stretched_rep_poolname=stretched_replicated_rbdpool
+ ceph osd pool create $stretched_poolname 32 32 stretch_rule
+ ceph osd pool create $stretched_rep_poolname 32 32 stretch_replicated_rule
+ ceph osd pool set $stretched_poolname size 4
+ ceph osd pool set $stretched_rep_poolname size 4
+ wait_for_clean || return 1
+
+ local global_avail=`ceph df -f json | jq '.stats.total_avail_bytes'`
+ local stretched_avail=`ceph df -f json | jq '.pools | map(select(.name == "'$stretched_poolname'"))[0].stats.max_avail'`
+ local stretched_rep_avail=`ceph df -f json | jq '.pools | map(select(.name == "'$stretched_rep_poolname'"))[0].stats.max_avail'`
+
+ test ${stretched_avail} == ${stretched_rep_avail} || return 1
+
+ ceph osd pool delete $stretched_poolname $stretched_poolname --yes-i-really-really-mean-it
+ ceph osd pool delete $stretched_rep_poolname $stretched_rep_poolname --yes-i-really-really-mean-it
+ teardown $dir || return 1
+}
+
main osd-pool-df "$@"
// build a weight map for each TAKE in the rule, and then merge them
- // FIXME: if there are multiple takes that place a different number of
- // objects we do not take that into account. (Also, note that doing this
+ // We do handles varying object counts across multiple takes.
+ // However, the ultimate object placement is subject to the
+ // pool size constraint, which might restrict the number of
+ // objects selected by the Crush rule (Also, note that doing this
// right is also a function of the pool, since the crush rule
// might choose 2 + choose 2 but pool size may only be 3.)
+ float sum = 0;
+ map<int,float> m;
for (unsigned i=0; i<rule->len; ++i) {
- map<int,float> m;
- float sum = 0;
if (rule->steps[i].op == CRUSH_RULE_TAKE) {
int n = rule->steps[i].arg1;
if (n >= 0) {
sum += _get_take_weight_osd_map(n, &m);
}
}
- _normalize_weight_map(sum, m, pmap);
}
+ _normalize_weight_map(sum, m, pmap);
return 0;
}