--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2023 IBM, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ */
+
+#ifndef MON_NVMEOFGWMAP_H_
+#define MON_NVMEOFGWMAP_H_
+#include <map>
+#include <iostream>
+#include "include/encoding.h"
+#include "include/utime.h"
+#include "common/Formatter.h"
+#include "common/ceph_releases.h"
+#include "common/version.h"
+#include "common/options.h"
+#include "common/Clock.h"
+#include "msg/Message.h"
+#include "common/ceph_time.h"
+#include "NVMeofGwTypes.h"
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_mon
+#undef dout_prefix
+#define MODULE_PREFFIX "nvmeofgw "
+#define dout_prefix *_dout << MODULE_PREFFIX << __PRETTY_FUNCTION__ << " "
+
+
+static const version_t STRUCT_VERSION = 2;
+static const version_t OLD_STRUCT_VERSION = 1;
+
+using ceph::coarse_mono_clock;
+class Monitor;
+/*-------------------*/
+class NVMeofGwMap
+{
+public:
+ Monitor* mon = NULL;
+ epoch_t epoch = 0; // epoch is for Paxos synchronization mechanizm
+ bool delay_propose = false;
+ std::map<entity_addrvec_t , uint32_t> peer_addr_2_version;
+ std::map<NvmeGroupKey, NvmeGwMonStates> created_gws;
+ std::map<NvmeGroupKey, NvmeGwTimers> fsm_timers;// map that handles timers started by all Gateway FSMs
+ void to_gmap(std::map<NvmeGroupKey, NvmeGwMonClientStates>& Gmap) const;
+
+ int cfg_add_gw (const NvmeGwId &gw_id, const NvmeGroupKey& group_key);
+ int cfg_delete_gw (const NvmeGwId &gw_id, const NvmeGroupKey& group_key);
+ void process_gw_map_ka (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, epoch_t& last_osd_epoch, bool &propose_pending);
+ int process_gw_map_gw_down (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, bool &propose_pending);
+ void update_active_timers (bool &propose_pending);
+ void handle_abandoned_ana_groups (bool &propose_pending);
+ void handle_removed_subsystems (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, const std::vector<NvmeNqnId> ¤t_subsystems, bool &propose_pending);
+ void start_timer (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId anagrpid, uint8_t value);
+private:
+ void add_grp_id (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, const NvmeAnaGrpId grpid);
+ void remove_grp_id(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, const NvmeAnaGrpId grpid);
+ void fsm_handle_gw_down (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, gw_states_per_group_t state, NvmeAnaGrpId grpid, bool &map_modified);
+ void fsm_handle_gw_delete (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, gw_states_per_group_t state, NvmeAnaGrpId grpid, bool &map_modified);
+ void fsm_handle_gw_alive (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeGwMonState & gw_state, gw_states_per_group_t state,
+ NvmeAnaGrpId grpid, epoch_t& last_osd_epoch, bool &map_modified);
+ void fsm_handle_to_expired (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId grpid, bool &map_modified);
+
+ void find_failover_candidate(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId grpid, bool &propose_pending);
+ void find_failback_gw (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, bool &propose_pending);
+ void set_failover_gw_for_ANA_group (const NvmeGwId &failed_gw_id, const NvmeGroupKey& group_key, const NvmeGwId &gw_id,
+ NvmeAnaGrpId groupid);
+
+
+ int get_timer (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId anagrpid);
+ void cancel_timer(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId anagrpid);
+ void validate_gw_map(const NvmeGroupKey& group_key);
+
+public:
+ int blocklist_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId ANA_groupid, epoch_t &epoch, bool failover);
+
+ void encode(ceph::buffer::list &bl, uint64_t features) const {
+ uint8_t version;
+ if (HAVE_FEATURE(features, SERVER_SQUID)) version = STRUCT_VERSION;
+ else version = OLD_STRUCT_VERSION;
+ ENCODE_START(version, 1, bl);
+ dout(4) << "encode1 version " << (uint64_t)version << version << " features " << features << dendl;
+ using ceph::encode;
+ encode(epoch, bl);// global map epoch
+ if (version == STRUCT_VERSION) {
+ //encode(peer_addr_2_version, bl);
+ }
+ encode(created_gws, bl, features); //Encode created GWs
+ encode(fsm_timers, bl, features);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(ceph::buffer::list::const_iterator &bl) {
+ using ceph::decode;
+ epoch_t struct_version = 0;
+ DECODE_START(STRUCT_VERSION, bl);
+ DECODE_OLDEST(1);
+ struct_version = struct_v;
+ dout(4) << "decode version " << struct_version << dendl;
+ decode(epoch, bl);
+ if (struct_version == STRUCT_VERSION) {
+ //dout(4) << "Decode peer_2_addr " << dendl;
+ //decode(peer_addr_2_version, bl);
+ }
+ decode(created_gws, bl);
+ decode(fsm_timers, bl);
+ DECODE_FINISH(bl);
+ }
+};
+
+#include "NVMeofGwSerialize.h"
+
+#endif /* SRC_MON_NVMEOFGWMAP_H_ */
}
}
+void NVMeofGwMap::handle_gw_performing_fast_reboot(const NvmeGwId &gw_id,
+ const NvmeGroupKey& group_key, bool &map_modified)
+{
+ for (auto& state_itr: created_gws[group_key][gw_id].sm_state ) {
+ fsm_handle_gw_fast_reboot(gw_id,group_key, state_itr.first, map_modified);
+ }
+}
+
+void NVMeofGwMap::fsm_handle_gw_fast_reboot(const NvmeGwId &gw_id,
+ const NvmeGroupKey& group_key, NvmeAnaGrpId grpid, bool &map_modified)
+{
+ // GW that appears in the internal map as Available, performed reboot,
+ // need to re-apply this GW: to load proper states for all active ANA groups
+ auto& gw_state = created_gws[group_key][gw_id];
+ map_modified = true;
+ gw_states_per_group_t state = gw_state.sm_state[grpid];
+ dout(10) << "GW " << gw_id << " ANA groupId: " << grpid << " state "
+ << state << dendl;
+ switch (state){
+ case gw_states_per_group_t::GW_IDLE_STATE:
+ case gw_states_per_group_t::GW_STANDBY_STATE:
+ case gw_states_per_group_t::GW_ACTIVE_STATE:
+ break;
+
+ case gw_states_per_group_t::GW_WAIT_FAILBACK_PREPARED:
+ {
+ //restart timeout
+ start_timer(gw_id, group_key, grpid, 3);
+ }
+ break;
+
+ case gw_states_per_group_t::GW_OWNER_WAIT_FAILBACK_PREPARED:
+ {
+ // since owner was reseted for this group, wait for the background process
+ // to choose it again
+ gw_state.standby_state(grpid);
+ }
+ break;
+
+ case gw_states_per_group_t::GW_WAIT_BLOCKLIST_CMPL:
+ {
+ //restart timer
+ // The blocklist was started, need to wait for the epoch in the GW
+ start_timer(gw_id, group_key, grpid, 30);
+ }
+ break;
+
+ default:
+ {
+ dout(4) << "Warning: GW " << gw_id << " Invalid state " << state << dendl;
+ }
+ }
+ validate_gw_map(group_key);
+}
+
void NVMeofGwMap::fsm_handle_gw_alive(
const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
NvmeGwMonState & gw_state, gw_states_per_group_t state,
err = 0;
sstrm.str("");
}
+ if (rc == 0) {
+ LastBeacon lb = {id, group_key};
+ last_beacon.erase(lb);
+ }
}
// propose pending would be generated by the PaxosService
if ((rc != -EEXIST) && (rc != -EINVAL)) {
auto& group_gws = map.created_gws[group_key];
auto gw = group_gws.find(gw_id);
const BeaconSubsystems& sub = m->get_subsystems();
+ auto now = ceph::coarse_mono_clock::now();
if (avail == gw_availability_t::GW_CREATED) {
if (gw == group_gws.end()) {
if (pending_map.created_gws[group_key][gw_id].availability ==
gw_availability_t::GW_AVAILABLE) {
dout(4) << " Warning :GW marked as Available in the NVmeofGwMon "
- << "database, performed full startup - Force gw to exit!"
+ << "database, performed full startup - Apply GW!"
<< gw_id << dendl;
- avail = gw_availability_t::GW_UNAVAILABLE;
- // Monitor performs Force Failover for this GW in process_gw_map_gw_down
+ pending_map.handle_gw_performing_fast_reboot(gw_id, group_key, propose);
+ LastBeacon lb = {gw_id, group_key};
+ last_beacon[lb] = now; //Update last beacon
} else if (
pending_map.created_gws[group_key][gw_id].performed_full_startup ==
false) {
pending_map.created_gws[group_key][gw_id].performed_full_startup = true;
propose = true;
- goto set_propose;
}
+ goto set_propose;
}
// gw already created
} else {
<< " beacon_epoch " << m->get_last_gwmap_epoch() << dendl;
}
if (avail == gw_availability_t::GW_AVAILABLE) {
- auto now = ceph::coarse_mono_clock::now();
// check pending_map.epoch vs m->get_version() -
// if different - drop the beacon