recommend that you begin the process well in advance of any update to Reef or
to later releases.
+OSD_UNREACHABLE
+_______________
+
+Registered v1/v2 public address of one or more OSD(s) is/are out of the
+defined `public_network` subnet, which will prevent these unreachable OSDs
+from communicating with ceph clients properly.
+
+Even though these unreachable OSDs are in up state, rados clients
+will hang till TCP timeout before erroring out due to this inconsistency.
+
POOL_FULL
_________
return r;
}
+bool is_addr_in_subnet(
+ CephContext *cct,
+ const std::string &networks,
+ const std::string &addr)
+{
+ const auto nets = get_str_list(networks);
+ ceph_assert(!nets.empty());
+ const auto &net = nets.front();
+ struct ifaddrs ifa;
+ unsigned ipv = CEPH_PICK_ADDRESS_IPV4;
+ struct sockaddr_in public_addr;
+
+ ifa.ifa_next = nullptr;
+ ifa.ifa_addr = (struct sockaddr*)&public_addr;
+ public_addr.sin_family = AF_INET;
+ inet_pton(AF_INET, addr.c_str(), &public_addr.sin_addr);
+
+ return matches_with_net(cct, ifa, net, ipv);
+}
#include "crush/CrushTreeDumper.h"
#include "common/Clock.h"
#include "mon/PGMap.h"
+#include "common/pick_address.h"
using std::list;
using std::make_pair;
}
}
+void OSDMap::get_out_of_subnet_osd_counts(CephContext *cct,
+ std::string const &public_network,
+ set<int> *unreachable) const
+{
+ unreachable->clear();
+ for (int i = 0; i < max_osd; i++) {
+ if (exists(i) && is_up(i)) {
+ if (const auto& addrs = get_addrs(i).v; addrs.size() >= 2) {
+ auto v1_addr = addrs[0].ip_only_to_str();
+ if (!is_addr_in_subnet(cct, public_network, v1_addr)) {
+ unreachable->emplace(i);
+ }
+ auto v2_addr = addrs[1].ip_only_to_str();
+ if (!is_addr_in_subnet(cct, public_network, v2_addr)) {
+ unreachable->emplace(i);
+ }
+ }
+ }
+ }
+}
+
void OSDMap::get_all_osds(set<int32_t>& ls) const
{
for (int i=0; i<max_osd; i++)
checks->add("UNEVEN_WEIGHTS_STRETCH_MODE", HEALTH_WARN, ss.str(), 0);
}
}
+
+ // PUBLIC ADDRESS IS IN SUBNET MASK
+ {
+ auto public_network = cct->_conf.get_val<std::string>("public_network");
+
+ if (!public_network.empty()) {
+ set<int> unreachable;
+ get_out_of_subnet_osd_counts(cct, public_network, &unreachable);
+ if (unreachable.size()) {
+ ostringstream ss;
+ ss << unreachable.size()
+ << " osds(s) "
+ << (unreachable.size() == 1 ? "is" : "are")
+ << " not reachable";
+ auto& d = checks->add("OSD_UNREACHABLE", HEALTH_ERR, ss.str(), unreachable.size());
+ for (auto& i: unreachable) {
+ ostringstream ss;
+ ss << "osd." << i << "'s public address is not in '" << public_network << "' subnet";
+ d.detail.push_back(ss.str());
+ }
+ }
+ }
+ }
}
int OSDMap::parse_osd_id_list(const vector<string>& ls, set<int> *out,
void get_full_osd_counts(std::set<int> *full, std::set<int> *backfill,
std::set<int> *nearfull) const;
+ void get_out_of_subnet_osd_counts(CephContext *cct,
+ std::string const &public_network,
+ std::set<int> *unreachable) const;
/***** cluster state *****/
/* osds */