DEFINE_CEPH_FEATURE_RETIRED(50, 1, MON_METADATA, MIMIC, OCTOPUS)
DEFINE_CEPH_FEATURE(50, 2, SERVER_TENTACLE);
DEFINE_CEPH_FEATURE_RETIRED(51, 1, OSD_BITWISE_HOBJ_SORT, MIMIC, OCTOPUS)
+DEFINE_CEPH_FEATURE(51, 2, NVMEOF_BEACON_DIFF)
// available
DEFINE_CEPH_FEATURE_RETIRED(52, 1, OSD_PROXY_WRITE_FEATURES, MIMIC, OCTOPUS)
// available
CEPH_FEATUREMASK_SERVER_REEF | \
CEPH_FEATUREMASK_SERVER_SQUID | \
CEPH_FEATUREMASK_SERVER_TENTACLE | \
+ CEPH_FEATUREMASK_NVMEOF_BEACON_DIFF | \
0ULL)
#define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL
for (auto& gws_states: created_gws[group_key]) {
if (gws_states.first == gw_id) {
auto& state = gws_states.second;
+ if (state.availability == gw_availability_t::GW_AVAILABLE) {
+ /*prevent failover because blocklisting right now cause IO errors */
+ dout(4) << "Delete GW: set skip-failovers for group " << gw_id
+ << " group " << group_key << dendl;
+ skip_failovers_for_group(group_key, 5);
+ }
state.availability = gw_availability_t::GW_DELETING;
dout(4) << " Deleting GW :"<< gw_id << " in state "
<< state.availability << " Resulting GW availability: "
<< state.availability << dendl;
- state.subsystems.clear();//ignore subsystems of this GW
utime_t now = ceph_clock_now();
mon->nvmegwmon()->gws_deleting_time[group_key][gw_id] = now;
return 0;
}
}
-void NVMeofGwMap::skip_failovers_for_group(const NvmeGroupKey& group_key)
+void NVMeofGwMap::skip_failovers_for_group(const NvmeGroupKey& group_key,
+ int interval_sec)
{
- const auto skip_failovers = g_conf().get_val<std::chrono::seconds>
- ("mon_nvmeofgw_skip_failovers_interval");
+ std::chrono::seconds skip_failovers;
+ if (interval_sec == 0) {
+ skip_failovers = g_conf().get_val<std::chrono::seconds>
+ ("mon_nvmeofgw_skip_failovers_interval");
+ } else {
+ skip_failovers = std::chrono::seconds(interval_sec);
+ }
for (auto& gw_created: created_gws[group_key]) {
gw_created.second.allow_failovers_ts = std::chrono::system_clock::now()
+ skip_failovers;
NvmeGwMonState& gw_map = created_gws[group_key][gw_id];
//gw_map.beacon_sequence_ooo = false;
- if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOFHAMAP)) { //TODO BEACONDIFF
+ if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF)) {
if (beacon_sequence == 40 && inject1 == 0) { //Inject sequence ooo
inject1 = 1;
gw_map.beacon_sequence -= 5;
return rc;
}
+bool NVMeofGwMap::set_gw_beacon_sequence_number(const NvmeGwId &gw_id,
+ const NvmeGroupKey& group_key, uint64_t beacon_sequence)
+{
+ NvmeGwMonState& gw_map = created_gws[group_key][gw_id];
+ if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOF_BEACON_DIFF)) {
+ gw_map.beacon_sequence = beacon_sequence;
+ dout(10) << gw_id << " set beacon_sequence " << beacon_sequence << dendl;
+ }
+ return true;
+}
+
+
void NVMeofGwMap::update_active_timers(bool &propose_pending)
{
const auto now = std::chrono::system_clock::now();
const NvmeGroupKey& group_key, bool &propose_pending);
void set_addr_vect(const NvmeGwId &gw_id,
const NvmeGroupKey& group_key, const entity_addr_t &addr_vect);
- void skip_failovers_for_group(const NvmeGroupKey& group_key);
+ void skip_failovers_for_group(const NvmeGroupKey& group_key,
+ int interval_sec = 0);
bool put_gw_beacon_sequence_number(const NvmeGwId &gw_id,
const NvmeGroupKey& group_key, uint64_t beacon_sequence,
uint64_t& old_sequence);
+ bool set_gw_beacon_sequence_number(const NvmeGwId &gw_id,
+ const NvmeGroupKey& group_key, uint64_t beacon_sequence);
private:
int do_delete_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key);
int do_erase_gw_id(const NvmeGwId &gw_id,
for (auto &[group_key, gws_states]: pending_map.created_gws) {
BeaconSubsystems *subsystems = &empty_subsystems;
for (auto& gw_state : gws_states) { // loop for GWs inside nqn group
- subsystems = &gw_state.second.subsystems;
+ if (gw_state.second.availability == gw_availability_t::GW_AVAILABLE) {
+ subsystems = &gw_state.second.subsystems;
+ }
if (subsystems->size()) { // Set subsystems to the valid value
break;
}
pending_map.created_gws[group_key][gw_id].subsystems;
auto &state = pending_map.created_gws[group_key][gw_id];
- if (!HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOFHAMAP)) { //TODO beacondiff//NVMEOF_BEACONDIFF)) {
+ if (!HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOF_BEACON_DIFF)) {
if (gw_subs != sub) {
dout(10) << "subsystems of GW changed, propose pending " << gw_id << dendl;
gw_subs = sub;
}
}
}
+ if (changed) {
+ avail = gw_availability_t::GW_AVAILABLE;
+ }
if (gw_subs.size() == 0) {
avail = gw_availability_t::GW_CREATED;
dout(10) << "No-subsystems condition detected for GW " << gw_id <<dendl;
}
}// for HA no-subsystems and no-listeners are same usecases
//dout(10) << " GWid " << gw_id << " beacon subsystems changed = " << changed << dendl;
+ if (avail == gw_availability_t::GW_UNAVAILABLE) {
+ dout(4) << "Warning: UNAVAILABLE gw " << gw_id << dendl;
+ }
return (changed == true ? 1:0);
}
ConnectionRef con = op->get_connection();
NvmeGwId gw_id = m->get_gw_id();
NvmeGroupKey group_key = std::make_pair(m->get_gw_pool(), m->get_gw_group());
+ //"avail" variable will be changed inside the function
+ // when it becomes CREATED for several reasons GW's load balance group
+ // is serviced by another GW
gw_availability_t avail = m->get_availability();
bool propose = false;
bool nonce_propose = false;
<< map.created_gws << dendl;
goto set_propose;
} else {
- correct_sequence = pending_map.put_gw_beacon_sequence_number
- (gw_id, group_key, sequence, stored_sequence);
+ pending_map.created_gws[group_key][gw_id].subsystems.clear();
+ pending_map.set_gw_beacon_sequence_number (gw_id, group_key, sequence);
dout(4) << "GW beacon: Created state - full startup done " << gw_id
<< " GW state in monitor data-base : "
<< pending_map.created_gws[group_key][gw_id].availability
dout(1) << " Warning :GW marked as Available in the NVmeofGwMon "
<< "database, performed full startup - Apply it but don't allow failover!"
<< gw_id << dendl;
- correct_sequence = true; //ack with ooo indication wouldn't sent this time
- //to prevent duplicated exception handling
process_gw_down(gw_id, group_key, gw_propose, avail);
pending_map.skip_failovers_for_group(group_key);
dout(4) << "fast_reboot:set skip-failovers for group " << gw_id << " group "
goto false_return;
}
if (!correct_sequence) {
+ if (avail == gw_availability_t::GW_AVAILABLE) {
+ /*prevent failover because blocklisting is not possible */
+ dout(4) << "sequence ooo: set skip-failovers for group " << gw_id
+ << " group " << group_key << dendl;
+ pending_map.skip_failovers_for_group(group_key, 7);
+ }
avail = gw_availability_t::GW_CREATED;
- // GW in service mode, not active up to correct sequence
+ // GW is not active up to correct sequence
goto check_availability;
}
}
* if epoch-filter-bit: send ack to beacon in case no propose
* or if changed something not relevant to gw-epoch
*/
- if (gw_created) {
+ if (gw_created) { //TODO make func
// respond with a map slice correspondent to the same GW
- ack_map.created_gws[group_key][gw_id] = map.created_gws[group_key][gw_id];
+ ack_map.created_gws[group_key][gw_id] = (gw_propose) ? //avail = CREATED
+ pending_map.created_gws[group_key][gw_id] :
+ map.created_gws[group_key][gw_id];
ack_map.created_gws[group_key][gw_id].beacon_sequence = sequence;
if (!correct_sequence) {
- dout(4) << "beacon from GW " << gw_id <<
+ dout(4) << " GW " << gw_id <<
" sending ACK due to receiving beacon_sequence out of order"
<< dendl;
ack_map.created_gws[group_key][gw_id].beacon_sequence =
stored_sequence;
ack_map.created_gws[group_key][gw_id].beacon_sequence_ooo = true;
}
-
+ if (gw_propose) {
+ dout(10) << "GW in Created " << gw_id << " ack map " << ack_map << dendl;
+ }
}
ack_map.epoch = get_ack_map_epoch(gw_created, group_key);
if (!gw_created)
inline void encode(const NvmeGwClientState& state, ceph::bufferlist &bl, uint64_t features) {
uint8_t version = 1;
- if (HAVE_FEATURE(features, NVMEOFHAMAP)) { //TODO beacondiff//NVMEOF_BEACONDIFF)) {
+ if (HAVE_FEATURE(features, NVMEOF_BEACON_DIFF)) {
version = 2;
}
ENCODE_START(version, version, bl);
inline void encode(const BeaconSubsystem& sub, ceph::bufferlist &bl, uint64_t features) {
uint8_t version = 1;
- if (HAVE_FEATURE(features, NVMEOFHAMAP)) { //TODO beacondiff//NVMEOF_BEACONDIFF)) {
+ if (HAVE_FEATURE(features, NVMEOF_BEACON_DIFF)) {
version = 2;
}
ENCODE_START(version, version, bl);