the I/O capacity of the OSD.
The default stride size (``osd_deep_scrub_stride``) was 512 KBytes, and is now 4 MBytes.
+* RADOS: Stretch mode can now be entered even if the two dividing buckets differ
+ in weight by a small fraction (default 0.1). This is tunable via
+ `mon_stretch_max_bucket_weight_delta`.
+
* CephFS: The offline CephFS tools (cephfs-data-scan, cephfs-journal-tool,
and cephfs-table-tool) now include progress tracking with ETA (Estimated Time of
Arrival) for long-running operations. Progress updates are displayed automatically
We encourage you to fix this by removing additional dividing CRUSH buckets or by increasing the
number of dividing buckets to two. For more information, see :ref:`stretch_mode`.
-UNEVEN_WEIGHTS_STRETCH_MODE
-___________________________
+STRETCH_MODE_BUCKET_WEIGHT_IMBALANCE
+____________________________________
+
+The two dividing buckets must have weights within a fractional difference
+when stretch mode is enabled. This is determined by the configuration option
+``mon_stretch_max_bucket_weight_delta`` (default: 0.1).
-The two dividing CRUSH buckets must have equal weights when stretch mode is enabled.
-This warning suggests that the two dividing buckets have uneven weights after
-stretch mode is enabled. This is not immediately fatal, however, you can expect
-Ceph to be confused when trying to process transitions between dividing buckets.
+This is not immediately fatal, however, you can expect Ceph to experience performance bottlenecks
+and imbalanced PG distribution if the aggregate CRUSH weights of the buckets differ significantly,
+as the smaller bucket will carry a higher I/O load per OSD.
-We encourage you to fix this by making the weights even on both dividing CRUSH buckets.
+We encourage you to fix this by making the weights of the dividing buckets more even.
This can be done by making sure the combined weight of the OSDs on each dividing
-bucket are the same. For more information, see :ref:`stretch_mode`.
+bucket are within the fractional difference defined by
+``mon_stretch_max_bucket_weight_delta``.
NONEXISTENT_MON_CRUSH_LOC_STRETCH_MODE
______________________________________
teardown $dir || return 1
done
}
-TEST_stretched_cluster_uneven_weight() {
+TEST_stretch_cluster_uneven_crush_weights() {
local dir=$1
local OSDS=4
local weight=0.09000
ceph osd crush rm sham # clear the health warn
wait_for_health_gone "INCORRECT_NUM_BUCKETS_STRETCH_MODE" || return 1
- # Next, we test for uneven weights across buckets
+ # Next, we test for STRETCH_BUCKET_WEIGHT_IMBALANCE
- ceph osd crush reweight osd.0 0.07000
+ ceph osd crush reweight osd.0 0.08999 # make weights uneven below threshold
+ sleep 5 # sleep to allow monitor to process the weight change or health check
+ wait_for_health_ok || return 1 # we should not see any health warning
- wait_for_health "UNEVEN_WEIGHTS_STRETCH_MODE" || return 1
+ ceph osd crush reweight osd.0 0.00000 # now make the weights uneven above threshold
+ ceph osd crush reweight osd.1 0.00000 # now make the weights uneven above threshold
+ wait_for_health "STRETCH_MODE_BUCKET_WEIGHT_IMBALANCE" || return 1 # we should see the health warning
- ceph osd crush reweight osd.0 $weight # clear the health warn
-
- wait_for_health_gone "UNEVEN_WEIGHTS_STRETCH_MODE" || return 1
+ ceph osd crush reweight osd.0 $weight # make weights even again
+ ceph osd crush reweight osd.1 $weight # make weights even again
+ wait_for_health_gone "STRETCH_MODE_BUCKET_WEIGHT_IMBALANCE" || return 1 # health warning should be cleared
teardown $dir || return 1
}
-main mon-stretched-cluster-uneven-weight "$@"
\ No newline at end of file
+main mon-stretch-cluster-uneven-crush-weights "$@"
\ No newline at end of file
- mon
min: 2
max: 4
+- name: mon_stretch_max_bucket_weight_delta
+ type: float
+ level: dev
+ desc: Max difference allowed among CRUSH bucket weights when in stretch mode.
+ The value is a percentage expressed as a real number between 0.0 and 1.0.
+ default: 0.1
+ services:
+ - mon
- name: mon_clock_drift_allowed
type: float
level: advanced
return;
}
__u8 new_rule = static_cast<__u8>(new_crush_rule_result);
-
+ if (bucket_count != 2) {
+ ss << "currently we only support 2-site stretch clusters!";
+ *errcode = -EINVAL;
+ ceph_assert(!commit || bucket_count == 2);
+ return;
+ }
+ double stretch_max_weight_delta = g_conf().get_val<double>("mon_stretch_max_bucket_weight_delta");
int weight1 = crush.get_item_weight(subtrees[0]);
int weight2 = crush.get_item_weight(subtrees[1]);
- if (weight1 != weight2) {
- // TODO: I'm really not sure this is a good idea?
+ bool exceeds_threshold = abs(weight1 - weight2) >
+ (stretch_max_weight_delta * std::min(weight1, weight2));
+ if (exceeds_threshold) {
ss << "the 2 " << dividing_bucket
<< "instances in the cluster have differing weights "
<< weight1 << " and " << weight2
- <<" but stretch mode currently requires they be the same!";
+ << " but stretch mode currently"
+ <<" requires the difference to be no greater than "
+ << stretch_max_weight_delta * 100 << "%";
*errcode = -EINVAL;
- ceph_assert(!commit || (weight1 == weight2));
- return;
- }
- if (bucket_count != 2) {
- ss << "currently we only support 2-site stretch clusters!";
- *errcode = -EINVAL;
- ceph_assert(!commit || bucket_count == 2);
+ ceph_assert(!commit || !exceeds_threshold);
return;
}
// TODO: check CRUSH rules for pools so that we are appropriately divided
ss.str(), 0);
}
}
- // UNEQUAL_WEIGHT
+ // INCORRECT_NUM_BUCKETS_STRETCH_MODE
if (stretch_mode_enabled) {
vector<int> subtrees;
crush->get_subtree_of_type(stretch_mode_bucket, &subtrees);
checks->add("INCORRECT_NUM_BUCKETS_STRETCH_MODE", HEALTH_WARN, ss.str(), 0);
return;
}
+ // STRETCH_MODE_BUCKET_WEIGHT_IMBALANCE
int weight1 = crush->get_item_weight(subtrees[0]);
int weight2 = crush->get_item_weight(subtrees[1]);
+ double stretch_max_weight_delta = cct->_conf.get_val<double>("mon_stretch_max_bucket_weight_delta");
stringstream ss;
- if (weight1 != weight2) {
- ss << "Stretch mode buckets have different weights!";
- checks->add("UNEVEN_WEIGHTS_STRETCH_MODE", HEALTH_WARN, ss.str(), 0);
+ if (abs(weight1 - weight2) >
+ (stretch_max_weight_delta * std::min(weight1, weight2))) {
+ ss << "Stretch mode buckets differ in weight by more than " << (stretch_max_weight_delta * 100) << "%";
+ checks->add("STRETCH_MODE_BUCKET_WEIGHT_IMBALANCE", HEALTH_WARN, ss.str(), 0);
}
}