with_legacy: true
see_also:
- osd_heartbeat_use_min_delay_socket
+- name: nvmeof_mon_client_disconnect_panic
+ type: secs
+ level: advanced
+ desc: The duration, expressed in seconds, after which the nvmeof gateway
+ should trigger a panic if it loses connection to the monitor
+ default: 100
+ services:
+ - mon
- name: nvmeof_mon_client_tick_period
type: secs
level: advanced
const auto& gw_id = gw_created_pair.first;
const auto& gw_created = gw_created_pair.second;
- auto gw_state = NvmeGwState(gw_created.ana_grp_id, epoch);
+ auto gw_state = NvmeGwState(gw_created.ana_grp_id, epoch, gw_created.availability);
for (const auto& sub: gw_created.subsystems) {
gw_state.subsystems.insert({sub.nqn, NqnState(sub.nqn, gw_created.sm_state, gw_created )});
}
st.standby_state(i);
}
propose_pending = true; // map should reflect that gw becames unavailable
+ if (propose_pending) validate_gw_map(group_key);
}
else {
dout(1) << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_id << dendl;
fsm_handle_gw_alive (gw_id, group_key, gw_state->second, st.sm_state[i], i, last_osd_epoch, propose_pending);
}
}
+ if (propose_pending) validate_gw_map(group_key);
}
find_failback_gw(gw_id, group_key, propose);
}
}
+ if (propose) {
+ validate_gw_map(group_key);
+ }
}
}
ceph_assert(false);
}
}
+ if (map_modified) validate_gw_map(group_key);
}
void NVMeofGwMap::fsm_handle_to_expired(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId grpid, bool &map_modified)
dout(1) << " Expired GW_WAIT_FAILOVER_PREPARED timer from GW " << gw_id << " ANA groupId: "<< grpid << dendl;
ceph_assert(false);
}
+ if (map_modified) validate_gw_map(group_key);
}
NvmeGwCreated& NVMeofGwMap::find_already_created_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key)
return 0;
}
+void NVMeofGwMap::validate_gw_map(const NvmeGroupKey& group_key)
+{
+ NvmeAnaGrpId anas[MAX_SUPPORTED_ANA_GROUPS];
+ int i = 0;
+ int max_groups = 0;
+ for (auto& gw_created_pair: Created_gws[group_key]) {
+ auto& st = gw_created_pair.second;
+ anas[i++] = st.ana_grp_id;
+ }
+ max_groups = i;
+ for(int i = 0; i < max_groups; i++)
+ {
+ int ana_group = anas[i];
+ int count = 0;
+ for (auto& gw_created_pair: Created_gws[group_key]) {
+ auto& st = gw_created_pair.second;
+ if (st.sm_state[ana_group] == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE){
+ count ++;
+ if(count == 2) {
+ dout(1) << "number active states per ana-group " << ana_group << "more than 1 in pool-group " << group_key << dendl;
+ dout(1) << Created_gws[group_key] << dendl;
+ ceph_assert(false);
+ }
+ }
+ }
+ }
+}
+
void NVMeofGwMap::update_active_timers( bool &propose_pending ){
//dout(4) << __func__ << " called, p_monitor: " << mon << dendl;
int get_timer (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId anagrpid);
void cancel_timer(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId anagrpid);
+ void validate_gw_map(const NvmeGroupKey& group_key);
public:
void encode(ceph::buffer::list &bl) const {
if( map.Created_gws[group_key].size()){
f->open_object_section("common");
+ f->dump_unsigned("epoch", map.epoch);
f->dump_string("pool", pool);
f->dump_string("group", group);
f->dump_unsigned("num gws", map.Created_gws[group_key].size());
}
inline std::ostream& operator<<(std::ostream& os, const NvmeGwState value) {
- os << "NvmeGwState { group id: " << value.group_id << " gw_map_epoch " << value.gw_map_epoch
+ os << "NvmeGwState { group id: " << value.group_id << " gw_map_epoch " << value.gw_map_epoch << " availablilty "<< value.availability
<< " GwSubsystems: [ ";
for (const auto& sub: value.subsystems) os << sub.second << " ";
os << " ] }";
encode(state.group_id, bl);
encode(state.gw_map_epoch, bl);
encode (state.subsystems, bl);
+ encode((uint32_t)state.availability, bl);
ENCODE_FINISH(bl);
}
decode(state.group_id, bl);
decode(state.gw_map_epoch, bl);
decode(state.subsystems, bl);
+ uint32_t avail;
+ decode(avail, bl);
+ state.availability = (GW_AVAILABILITY_E)avail;
DECODE_FINISH(bl);
}
NvmeAnaGrpId group_id;
epoch_t gw_map_epoch;
GwSubsystems subsystems;
-
- NvmeGwState(NvmeAnaGrpId id, epoch_t epoch):
+ GW_AVAILABILITY_E availability;
+ NvmeGwState(NvmeAnaGrpId id, epoch_t epoch, GW_AVAILABILITY_E available):
group_id(id),
- gw_map_epoch(epoch)
+ gw_map_epoch(epoch),
+ availability(available)
{};
- NvmeGwState() : NvmeGwState(REDUNDANT_GW_ANA_GROUP_ID, 0) {};
+ NvmeGwState() : NvmeGwState(REDUNDANT_GW_ANA_GROUP_ID, 0, GW_AVAILABILITY_E::GW_UNAVAILABLE) {};
};
struct NvmeGwMetaData {
void NVMeofGwMonitorClient::disconnect_panic()
{
- auto disconnect_panic_duration = g_conf().get_val<std::chrono::seconds>("mon_nvmeofgw_beacon_grace").count();
+ auto disconnect_panic_duration = g_conf().get_val<std::chrono::seconds>("nvmeof_mon_client_disconnect_panic").count();
auto now = std::chrono::steady_clock::now();
auto elapsed_seconds = std::chrono::duration_cast<std::chrono::seconds>(now - last_map_time).count();
if (elapsed_seconds > disconnect_panic_duration) {
dout(4) << "Triggering a panic upon disconnection from the monitor, elapsed " << elapsed_seconds << ", configured disconnect panic duration " << disconnect_panic_duration << dendl;
- throw std::runtime_error("Lost connection to the monitor (mon).");
+ throw std::runtime_error("Lost connection to the monitor (beacon timeout).");
}
}
}
}
- // Make sure we do not get out of order state changes from the monitor
if (got_old_gw_state && got_new_gw_state) {
dout(0) << "got_old_gw_state: " << old_gw_state << "got_new_gw_state: " << new_gw_state << dendl;
+ // Make sure we do not get out of order state changes from the monitor
ceph_assert(new_gw_state.gw_map_epoch >= old_gw_state.gw_map_epoch);
+
+ // If the monitor previously identified this gateway as accessible but now
+ // flags it as unavailable, it suggests that the gateway lost connection
+ // to the monitor.
+ if (old_gw_state.availability == GW_AVAILABILITY_E::GW_AVAILABLE &&
+ new_gw_state.availability == GW_AVAILABILITY_E::GW_UNAVAILABLE) {
+ dout(4) << "Triggering a panic upon disconnection from the monitor, gw state - unavailable" << dendl;
+ throw std::runtime_error("Lost connection to the monitor (gw map unavailable).");
+ }
}
// Gather all state changes
std::string pool = "pool1";
std::string group = "grp1";
std::string gw_id = "GW1";
- NvmeGwState state(1, 32);
+ NvmeGwState state(1, 32, GW_AVAILABILITY_E::GW_UNAVAILABLE);
std::string nqn = "nqn";
ANA_STATE ana_state;
NqnState nqn_state(nqn, ana_state);