local exp_osds_upgradable=2
local crush_bucket=$(ceph osd tree | grep host | awk '{ print $4 }')
local res=$(ceph osd ok-to-upgrade $crush_bucket $ceph_version --format=json)
- # Specifying hostname as the crush bucket with a 3x replicated pool on 10 OSDs
- # and with the default 'mgr_osd_upgrade_check_convergence_factor' would result
- # in 4 OSDs being reported as upgradable.
+ # Specifying hostname as the crush bucket with a 3x replicated pool on 10
+ # OSDs, with the default 'mgr_osd_upgrade_check_convergence_factor' and
+ # with min_size=1 should result in at least 2 OSDs being reported as
+ # upgradable. But it is very likely that more than 2 OSDs could be found
+ # due to the way PGs are spread out across the replicas. The same is true
+ # with min_size=2. Therefore, the check for upgradable OSDs considers this
+ # and verifies that at least the expected minimum OSDs are returned.
test $(echo $res | jq '.all_osds_upgraded') = false || return 1
test $(echo $res | jq '.ok_to_upgrade') = true || return 1
local num_osds_upgradable=$(echo $res | jq '.osds_ok_to_upgrade | length' | bc)
local num_osds_upgraded=$(echo $res | jq '.osds_upgraded | length' | bc)
test $num_osds_upgraded -eq 0 || return 1
- # Test for upgradability with min_size=1, 1 OSD to upgrade and max=3.
- # This tests the functionality of the 'max' parameter and checks the
- # logic to find more OSDs in the crush bucket.
- local max=2
- exp_osds_upgradable=2
- crush_bucket="osd.0"
+ # Test the same command as above, but exercise the 'max' parameter.
+ # Only the 'max' specified number of OSDs from the crush bucket must be returned.
+ local max=1
+ exp_osds_upgradable=1
+ # Test command with terse syntax which tests type inferencing
res=$(ceph osd ok-to-upgrade $crush_bucket $ceph_version $max --format=json)
test $(echo $res | jq '.all_osds_upgraded') = false || return 1
test $(echo $res | jq '.ok_to_upgrade') = true || return 1
num_osds_upgradable=$(echo $res | jq '.osds_ok_to_upgrade | length' | bc)
+ test $num_osds_upgradable -eq $exp_osds_upgradable || return 1
+ num_osds_upgraded=$(echo $res | jq '.osds_upgraded | length' | bc)
+ test $num_osds_upgraded -eq 0 || return 1
+
+ # Test same command above with verbose syntax
+ res=$(ceph osd ok-to-upgrade --crush_bucket $crush_bucket \
+ --ceph_version $ceph_version --max $max --format=json)
+ test $(echo $res | jq '.all_osds_upgraded') = false || return 1
+ test $(echo $res | jq '.ok_to_upgrade') = true || return 1
+ num_osds_upgradable=$(echo $res | jq '.osds_ok_to_upgrade | length' | bc)
+ test $num_osds_upgradable -eq $exp_osds_upgradable || return 1
+ num_osds_upgraded=$(echo $res | jq '.osds_upgraded | length' | bc)
+ test $num_osds_upgraded -eq 0 || return 1
+
+ # Test for upgradability with min_size=1 and 1 OSD to upgrade. The outcome
+ # must be the specified osd as the command limits the search within the
+ # provided crush bucket.
+ exp_osds_upgradable=1
+ crush_bucket="osd.0"
+ res=$(ceph osd ok-to-upgrade $crush_bucket $ceph_version --format=json)
+ test $(echo $res | jq '.all_osds_upgraded') = false || return 1
+ test $(echo $res | jq '.ok_to_upgrade') = true || return 1
+ num_osds_upgradable=$(echo $res | jq '.osds_ok_to_upgrade | length' | bc)
test $exp_osds_upgradable = $num_osds_upgradable || return 1
- test $max = $num_osds_upgradable || return 1
num_osds_upgraded=$(echo $res | jq '.osds_upgraded | length' | bc)
test $num_osds_upgraded -eq 0 || return 1
local num_osds_upgraded=$(echo $res | jq '.osds_upgraded | length' | bc)
test $num_osds_upgraded -eq 0 || return 1
- # Test for upgradability with min_size=1, 1 OSD to upgrade and max=3.
- # This tests the functionality of the 'max' parameter and also checks
- # the logic to find more OSDs in the crush bucket.
- local max=3
- crush_bucket="osd.0"
+ # Test the same command as above, but exercise the 'max' parameter.
+ # Only the 'max' specified number of OSDs from the crush bucket must be returned.
+ local max=1
+ exp_osds_upgradable=1
+ # Test command with terse syntax which tests type inferencing
res=$(ceph osd ok-to-upgrade $crush_bucket $ceph_version $max --format=json)
test $(echo $res | jq '.all_osds_upgraded') = false || return 1
test $(echo $res | jq '.ok_to_upgrade') = true || return 1
num_osds_upgradable=$(echo $res | jq '.osds_ok_to_upgrade | length' | bc)
+ test $num_osds_upgradable -eq $exp_osds_upgradable || return 1
+ num_osds_upgraded=$(echo $res | jq '.osds_upgraded | length' | bc)
+ test $num_osds_upgraded -eq 0 || return 1
+
+ # Test command above with verbose syntax
+ res=$(ceph osd ok-to-upgrade --crush_bucket $crush_bucket \
+ --ceph_version $ceph_version --max $max --format=json)
+ test $(echo $res | jq '.all_osds_upgraded') = false || return 1
+ test $(echo $res | jq '.ok_to_upgrade') = true || return 1
+ num_osds_upgradable=$(echo $res | jq '.osds_ok_to_upgrade | length' | bc)
+ test $num_osds_upgradable -eq $exp_osds_upgradable || return 1
+ num_osds_upgraded=$(echo $res | jq '.osds_upgraded | length' | bc)
+ test $num_osds_upgraded -eq 0 || return 1
+
+ # Test for upgradability with min_size=5 and 1 OSD to upgrade. The outcome
+ # must be the specified osd as the command limits the search within the
+ # provided crush bucket.
+ exp_osds_upgradable=1
+ crush_bucket="osd.0"
+ res=$(ceph osd ok-to-upgrade $crush_bucket $ceph_version --format=json)
+ test $(echo $res | jq '.all_osds_upgraded') = false || return 1
+ test $(echo $res | jq '.ok_to_upgrade') = true || return 1
+ num_osds_upgradable=$(echo $res | jq '.osds_ok_to_upgrade | length' | bc)
test $exp_osds_upgradable = $num_osds_upgradable || return 1
- test $max = $num_osds_upgradable || return 1
num_osds_upgraded=$(echo $res | jq '.osds_upgraded | length' | bc)
test $num_osds_upgraded -eq 0 || return 1
auto ver = get_osd_metadata("ceph_version_short", osd_id);
if (ver.has_value()) {
if (*ver != ceph_version_new) {
- dout(20) << "found " << osd_id << " to upgrade" << dendl;
to_upgrade.push_back(osd);
} else {
- dout(20) << osd_id << " is already running the new version("
- << *ver << ")" << dendl;
upgraded.push_back(osd);
}
} else {
- derr << "couldn't determine 'ceph_version_short' for "
- << osd_id << dendl;
- version_unknown.push_back(osd);
+ derr << "couldn't determine 'ceph_version_short' for "
+ << osd_id << dendl;
+ version_unknown.push_back(osd);
}
}
+ dout(20) << "osds to upgrade: " << to_upgrade << dendl;
+ dout(20) << "osds upgraded: " << upgraded << " running new version("
+ << ceph_version_new << ")" << dendl;
+
// Check if all OSDs are upgraded
_update_upgraded_osds(orig_osds, to_upgrade, upgraded,
version_unknown, out_osd_report);
const double convergence_factor =
g_conf().get_val<double>("mgr_osd_upgrade_check_convergence_factor");
size_t osd_subset_count = to_upgrade.size();
+ std::vector<int> osds = to_upgrade;
while (true) {
+ // reset pg report
+ *out_pg_report = offline_pg_report();
// Check impact to PGs with the filtered set. Use the existing
// ok-to-stop logic for this purpose.
- _check_offlines_pgs(to_upgrade, osdmap, pgmap, out_pg_report);
- if (!out_pg_report->ok_to_stop()) {
- if (osd_subset_count == 1) {
- // This means that there's no safe set of OSDs to upgrade.
- // This probably indicates a problem with the cluster configuration.
- to_upgrade.clear();
- _update_upgraded_osds(orig_osds, to_upgrade, upgraded,
- version_unknown, out_osd_report);
- return;
- }
- // Reduce the number of OSDs in the set by the convergence factor.
- osd_subset_count = std::max<size_t>(
- 1, static_cast<size_t>(osd_subset_count * convergence_factor));
- // Prune the 'to-upgrade' set to hold the new subset of OSDs
- auto start_it = std::next(to_upgrade.begin(), osd_subset_count);
- auto end_it = to_upgrade.end();
- to_upgrade.erase(start_it, end_it);
- // reset pg report
- *out_pg_report = offline_pg_report();
- } else {
- _update_upgraded_osds(orig_osds, to_upgrade, upgraded,
- version_unknown, out_osd_report);
+ _check_offlines_pgs(osds, osdmap, pgmap, out_pg_report);
+ if (out_pg_report->ok_to_stop()) {
+ // we have a set that can be upgraded. But if it still exceeds
+ // the 'max' criteria set by the user, prune the to_upgrade
+ // vector further to hold only 'max' number of osds. For
+ // safety, run the offline pg check before returning.
+ if (osd_subset_count > max) {
+ osd_subset_count = max;
+ osds.resize(osd_subset_count);
+ continue;
+ }
+ _update_upgraded_osds(orig_osds, osds, upgraded,
+ version_unknown, out_osd_report);
if (out_osd_report->ok_to_upgrade()) {
// Found a safe subset! Break and generate the output.
- dout(20) << "found " << osd_subset_count << " OSDs that are safe to "
- << "upgrade" << dendl;
+ dout(20) << "found " << osd_subset_count << " OSDs that are "
+ << "safe to upgrade." << dendl;
break;
}
}
+ // The offline pg check failed. Trigger the reduction logic.
+ if (osd_subset_count == 1) {
+ // This means that there's no safe set of OSDs to upgrade.
+ // This probably indicates a problem with the cluster configuration.
+ osds.clear();
+ _update_upgraded_osds(orig_osds, osds, upgraded,
+ version_unknown, out_osd_report);
+ return;
+ }
+ // Reduce the number of OSDs in the set by the convergence factor.
+ osd_subset_count = std::max<size_t>(
+ 1, static_cast<size_t>(osd_subset_count * convergence_factor));
+ // Prune the 'to-upgrade' set to hold the new subset of OSDs
+ osds.resize(osd_subset_count);
}
- if (to_upgrade.size() >= max) {
- // already at max
- dout(20) << "to_upgrade(" << to_upgrade.size() << ") >= "
- << " max(" << max << ")" << dendl;
- return;
+
+ if (osds.size() == max) {
+ // already at max
+ dout(20) << "to_upgrade(" << osds.size() << ") == "
+ << " max(" << max << ")" << dendl;
+ return;
}
/**
- * semi-arbitrarily start with the first osd in the 'to_upgrade'
- * vector and see if we can add more osds to upgrade. The reason
- * for using a vector instead of set is to preserve the order of
- * OSDs according to the order of other parent and their child
- * buckets. This order ensures that the offline pgs check can
- * correctly determine the outcome of a set of OSDs stopped from
- * a specific bucket.
+ * Handle case if 'max' criteria is not met and there are OSDs
+ * not yet considered from the to_upgrade vector. This can
+ * happen depending on the value of the convergence factor
+ * resulting in some residual OSDs in the crush bucket
+ * not participating in the initial offline pg check. Consider
+ * the residual OSDs and try maximizing the upgrade set.
*/
- offline_pg_report _pg_report;
- upgrade_osd_report _osd_report;
- std::vector<int> osds = to_upgrade;
- int parent = *osds.begin();
- std::vector<int> children;
-
- dout(20) << "Trying to add more children..." << dendl;
- while (true) {
- // identify the next parent
- int r = osdmap.crush->get_immediate_parent_id(parent, &parent);
- if (r < 0) {
- dout(20) << "No parent found for item id: " << parent << dendl;
- return; // just go with what we have so far!
- }
-
- // get candidate additions that are beneath this point in the tree
- children.clear();
- r = _populate_crush_bucket_osds(parent, osdmap, pgmap, children);
- if (r != 0) {
- return; // just go with what we have so far!
- }
-
- // try adding in more osds from the list of children
- // determined above to maximize the upgrade set.
- int failed = 0; // how many children we failed to add to our set
- for (auto o : children) {
- auto it = std::find(osds.begin(), osds.end(), o);
- bool can_add_osd = (it == osds.end());
- if (o >= 0 && osdmap.is_up(o) && can_add_osd) {
- osds.push_back(o);
- _check_offlines_pgs(osds, osdmap, pgmap, &_pg_report);
- if (!_pg_report.ok_to_stop()) {
- osds.pop_back();
- ++failed;
- continue;
- }
+ if (osds.size() < max && osds.size() < to_upgrade.size()) {
+ // Avoid reallocations as we won't exceed max
+ osds.reserve(max);
+ int failed = 0;
+ dout(20) << "Maximization phase: testing candidate subset [ ";
+ for (auto it = to_upgrade.begin() + osd_subset_count;
+ it != to_upgrade.end();
+ ++it) {
+ *_dout << *it << " ";
+ }
+ *_dout << "]" << dendl;
+
+ for(size_t i = osd_subset_count;
+ i < to_upgrade.size() && osds.size() < max;
+ ++i) {
+ int candidate = to_upgrade[i];
+ osds.push_back(candidate);
+ // offline pg check with new osd
+ offline_pg_report _pg_report;
+ _check_offlines_pgs(osds, osdmap, pgmap, &_pg_report);
+ if (_pg_report.ok_to_stop()) {
+ upgrade_osd_report _osd_report;
_update_upgraded_osds(orig_osds, osds, upgraded,
- version_unknown, &_osd_report);
- *out_pg_report = _pg_report;
- *out_osd_report = _osd_report;
- if (osds.size() == max) {
- dout(20) << " hit max" << dendl;
- if (out_osd_report->ok_to_upgrade()) {
- // Found additional children that can be upgraded
- dout(20) << "found " << osds.size() - to_upgrade.size()
- << " additional OSD(s) to upgrade" << dendl;
- }
- return; // yay, we hit the max
+ version_unknown, &_osd_report);
+ if (_osd_report.ok_to_upgrade()) {
+ // avoid deep copies as the reports may be huge
+ *out_pg_report = std::move(_pg_report);
+ *out_osd_report = std::move(_osd_report);
+ continue;
}
}
+ // pg check or osd report failed, disregard osd
+ osds.pop_back();
+ ++failed;
+ }
+ if (osds.size() == max) {
+ dout(20) << " hit max" << dendl;
+ }
+ if (osds.size() > osd_subset_count) {
+ dout(20) << "found " << osds.size() - osd_subset_count
+ << " additional OSD(s) to upgrade" << dendl;
}
-
if (failed) {
// we hit some failures; go with what we have
dout(20) << " hit some peer failures" << dendl;
- return;
}
}
}
cmd_getval(cmdctx->cmdmap, "crush_bucket", crush_bucket_name);
std::string ceph_version;
cmd_getval(cmdctx->cmdmap, "ceph_version", ceph_version);
- int64_t max = 1;
+ int64_t max = 0; // default value
cmd_getval(cmdctx->cmdmap, "max", max);
int r;
std::vector<int> osds_in_crush_bucket;
cmdctx->reply(-ENOENT, ss);
return true;
}
- if (max < (int)osds_in_crush_bucket.size()) {
- max = osds_in_crush_bucket.size();
+ // If 'max' is not specified, limit it to the number of osds
+ // in the crush bucket
+ if (max == 0) {
+ max = (int)osds_in_crush_bucket.size();
+ dout(0) << "Override 'max' to " << max << ", which is the total number "
+ << "of osds in crush bucket " << crush_bucket_name << dendl;
}
upgrade_osd_report osd_upgrade_report;
offline_pg_report pg_offline_report;