} else {
int num_in_osds = 0;
int num_down_in_osds = 0;
+ int num_in_subtrees = 0;
+ int num_down_in_subtrees = 0;
set<int> osds;
+ set<int> down_cache; // quick cache of down subtrees
+ set<int> in_subtrees;
+ set<int> up_in_subtrees;
+ set<int> down_in_subtrees;
+ int type = osdmap.crush->get_type_id(g_conf->mon_osd_down_out_subtree_limit);
for (int i = 0; i < osdmap.get_max_osd(); i++) {
if (!osdmap.exists(i)) {
if (osdmap.crush->item_exists(i)) {
if (osdmap.is_out(i))
continue;
++num_in_osds;
+ // get the id of the parent subtree
+ int subtree_id = osdmap.get_parent_subtree_id(g_ceph_context, i, type, &down_cache);
+ if (subtree_id != -ENOENT) {
+ in_subtrees.insert(subtree_id);
+ }
+
if (!osdmap.is_up(i)) {
++num_down_in_osds;
if (detail) {
const osd_info_t& info = osdmap.get_info(i);
ostringstream ss;
- ss << "osd." << i << " is down since epoch " << info.down_at
+ ss << "osd." << i << " belonging to " << g_conf->mon_osd_down_out_subtree_limit
+ << " id " << subtree_id << " is down since epoch " << info.down_at
<< ", last address " << osdmap.get_addr(i);
detail->push_back(make_pair(HEALTH_WARN, ss.str()));
}
}
+ else {
+ // if an osd in a subtree is up, implies subtree is not down
+ up_in_subtrees.insert(subtree_id);
+ }
}
+
+ set_difference(in_subtrees.begin(), in_subtrees.end(),
+ up_in_subtrees.begin(), up_in_subtrees.end(),
+ inserter(down_in_subtrees, down_in_subtrees.end()));
+ num_in_subtrees = in_subtrees.size();
+ num_down_in_subtrees = down_in_subtrees.size();
assert(num_down_in_osds <= num_in_osds);
+ assert(num_down_in_subtrees <= num_in_subtrees);
if (num_down_in_osds > 0) {
ostringstream ss;
- ss << num_down_in_osds << "/" << num_in_osds << " in osds are down";
- summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+ ss << num_down_in_osds << "/" << num_in_osds << " in osds are down. ";
+ if (num_down_in_subtrees == 1) {
+ ss << num_down_in_subtrees << "/" << num_in_subtrees << " of CRUSH type " <<
+ g_conf->mon_osd_down_out_subtree_limit << " is down. ";
+ }
+ else {
+ ss << num_down_in_subtrees << "/" << num_in_subtrees << " of CRUSH type " <<
+ g_conf->mon_osd_down_out_subtree_limit << " are down. ";
+ summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+ }
+ if (detail) {
+ ss << "CRUSH type " << g_conf->mon_osd_down_out_subtree_limit << " down list: [" <<
+ down_in_subtrees << "]";
+ detail->push_back(make_pair(HEALTH_WARN, ss.str()));
+ }
}
if (!osds.empty()) {
}
}
+int OSDMap::get_parent_subtree_id(CephContext *cct, int id, int subtree_type, set<int> *down_cache) const
+{
+ set<int> local_down_cache;
+ if (!down_cache) {
+ down_cache = &local_down_cache;
+ }
+
+ int current = id;
+ while (true) {
+ int type;
+ if (current >= 0) {
+ type = 0;
+ } else {
+ type = crush->get_bucket_type(current);
+ }
+ assert(type >= 0);
+
+ if (type >= subtree_type) {
+ return current;
+ }
+
+ int r = crush->get_immediate_parent_id(current, ¤t);
+ if (r < 0) {
+ return -ENOENT;
+ }
+ }
+}
+
void OSDMap::Incremental::encode_client_old(bufferlist& bl) const
{
__u16 v = 5;
bool subtree_is_down(int id, set<int> *down_cache) const;
bool containing_subtree_is_down(CephContext *cct, int osd, int subtree_type, set<int> *down_cache) const;
+ /**
+ * get the id of the parent subtree
+ */
+ int get_parent_subtree_id(CephContext *cct, int osd, int subtree_type, set<int> *down_cache) const;
int identify_osd(const entity_addr_t& addr) const;
int identify_osd(const uuid_d& u) const;
int identify_osd_on_all_channels(const entity_addr_t& addr) const;