ceph-nvmeof-mon fixes

author Alexander Indenbaum <aindenba@redhat.com>

Tue, 2 Apr 2024 10:36:04 +0000 (10:36 +0000)

committer Alexander Indenbaum <aindenba@redhat.com>

Thu, 20 Nov 2025 08:55:27 +0000 (10:55 +0200)
author Alexander Indenbaum <aindenba@redhat.com>
Tue, 2 Apr 2024 10:36:04 +0000 (10:36 +0000)
committer Alexander Indenbaum <aindenba@redhat.com>
Thu, 20 Nov 2025 08:55:27 +0000 (10:55 +0200)
diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc

index 03bc75dc68fc8ab719496f3366195c4828f5f43a..1d656574b8d22c3ec75ac640a8546190ea24064e 100755 (executable)
--- a/src/mon/NVMeofGwMap.cc
+++ b/src/mon/NVMeofGwMap.cc
@@ -61,6 +61,7 @@ int  NVMeofGwMap::cfg_add_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_ke
          if (allocated[i] == false) {
              NvmeGwCreated gw_created(i);
              Created_gws[group_key][gw_id] = gw_created;
+            Created_gws[group_key][gw_id].performed_full_startup = true;
              dout(4) << __func__ << "Created GWS:  " << Created_gws  <<  dendl;
              return 0;
          }
@@ -103,7 +104,7 @@ int NVMeofGwMap::process_gw_map_gw_down(const NvmeGwId &gw_id, const NvmeGroupKe
      if (gw_state != gws_states.end()) {
          dout(4) << "GW down " << gw_id << dendl;
          auto& st = gw_state->second;
-        st.availability = GW_AVAILABILITY_E::GW_UNAVAILABLE;
+        st.set_unavailable_state();
          for (NvmeAnaGrpId i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i ++) {
              fsm_handle_gw_down (gw_id, group_key, st.sm_state[i], i, propose_pending);
              st.standby_state(i);
@@ -513,14 +514,18 @@ NvmeGwCreated& NVMeofGwMap::find_already_created_gw(const NvmeGwId &gw_id, const
  
  struct CMonRequestProposal : public Context {
    NVMeofGwMap *m;
-  CMonRequestProposal(NVMeofGwMap *mon) : m(mon) {}
+  entity_addrvec_t addr_vect;
+  utime_t expires;
+  CMonRequestProposal(NVMeofGwMap *mon , entity_addrvec_t addr_vector, utime_t until) : m(mon), addr_vect(addr_vector), expires (until)  {}
    void finish(int r) {
        dout(4) << "osdmon is  writable? " << m->mon->osdmon()->is_writeable() << dendl;
        if(m->mon->osdmon()->is_writeable()){
+        epoch_t epoch = m->mon->osdmon()->blocklist(addr_vect, expires);
+        dout (4) << "epoch " << epoch <<dendl;
          m->mon->nvmegwmon()->request_proposal(m->mon->osdmon());
        }
        else {
-          m->mon->osdmon()->wait_for_writeable_ctx( new CMonRequestProposal(m));
+          m->mon->osdmon()->wait_for_writeable_ctx( new CMonRequestProposal(m, addr_vect, expires));
        }
    }
  };
@@ -546,14 +551,15 @@ int NVMeofGwMap::blocklist_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_k
          bool rc = addr_vect.parse(&str[0]);
          dout(10) << str << " rc " << rc <<  " network vector: " << addr_vect << " " << addr_vect.size() << dendl;
          ceph_assert(rc);
+
          epoch = mon->osdmon()->blocklist(addr_vect, expires);
  
          if (!mon->osdmon()->is_writeable()) {
-            dout(4) << "osdmon is not writable, waiting " << dendl;
-            mon->osdmon()->wait_for_writeable_ctx( new CMonRequestProposal(this ));// return false;
+            dout(4) << "osdmon is not writable, waiting, epoch = " << epoch << dendl;
+            mon->osdmon()->wait_for_writeable_ctx( new CMonRequestProposal(this, addr_vect, expires ));// return false;
          }
          else  mon->nvmegwmon()->request_proposal(mon->osdmon());
-        dout(4) << str << " mon->osdmon()->blocklist:  " << epoch <<  " address vector: " << addr_vect << " " << addr_vect.size() << dendl;
+        dout(4) << str << " mon->osdmon()->blocklist: epoch : " << epoch <<  " address vector: " << addr_vect << " " << addr_vect.size() << dendl;
      }
      else{
          dout(1) << "Error: No nonces context present for gw: " <<gw_id  << " ANA group: " << grpid << dendl;
diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h

index 51c80111ed405e24da5da6f86b1ea7a1a3e1f51c..bc9732c6818f109873f5625fe17b5c10dc5751c6 100755 (executable)
--- a/src/mon/NVMeofGwMap.h
+++ b/src/mon/NVMeofGwMap.h
@@ -60,13 +60,14 @@ private:
      void find_failback_gw       (const NvmeGwId &gw_id, const NvmeGroupKey& group_key,  bool &propose_pending);
      void set_failover_gw_for_ANA_group (const NvmeGwId &failed_gw_id, const NvmeGroupKey& group_key, const NvmeGwId &gw_id,
                                                                                                       NvmeAnaGrpId groupid);
-    int  blocklist_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId ANA_groupid, epoch_t &epoch, bool failover);
+
  
      int  get_timer   (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId anagrpid);
      void cancel_timer(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId anagrpid);
      void validate_gw_map(const NvmeGroupKey& group_key);
  
  public:
+    int  blocklist_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId ANA_groupid, epoch_t &epoch, bool failover);
      void encode(ceph::buffer::list &bl) const {
          using ceph::encode;
          ENCODE_START(1, 1, bl);
diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc

index 69766f9d0548c77e0222516a5e8547bce0dfd0d9..97bff620f77ce66e0b24236ce35f547c1fc54f20 100644 (file)
--- a/src/mon/NVMeofGwMon.cc
+++ b/src/mon/NVMeofGwMon.cc
@@ -25,11 +25,11 @@
  using std::string;
  
  void NVMeofGwMon::init(){
-    dout(4) <<  "called " << dendl;
+    dout(10) <<  "called " << dendl;
  }
  
  void NVMeofGwMon::on_restart(){
-    dout(4) <<  "called " << dendl;
+    dout(10) <<  "called " << dendl;
      last_beacon.clear();
      last_tick = ceph::coarse_mono_clock::now();
      synchronize_last_beacon();
@@ -37,7 +37,7 @@ void NVMeofGwMon::on_restart(){
  
  
  void NVMeofGwMon::synchronize_last_beacon(){
-    dout(10) <<  "called, is leader : " << mon.is_leader()  <<" active " << is_active()  << dendl;
+    dout(10) <<  "called " << dendl;
      // Initialize last_beacon to identify transitions of available  GWs to unavailable state
      for (const auto& created_map_pair: map.Created_gws) {
        const auto& group_key = created_map_pair.first;
@@ -45,7 +45,7 @@ void NVMeofGwMon::synchronize_last_beacon(){
        for (const auto& gw_created_pair: gw_created_map) {
            const auto& gw_id = gw_created_pair.first;
            if (gw_created_pair.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE){
-             dout(4) << "synchronize last_beacon for  GW :" << gw_id << dendl;
+             dout(10) << "synchronize last_beacon for  GW :" << gw_id << dendl;
               LastBeacon lb = {gw_id, group_key};
               last_beacon[lb] = last_tick;
            }
@@ -54,7 +54,7 @@ void NVMeofGwMon::synchronize_last_beacon(){
  }
  
  void NVMeofGwMon::on_shutdown() {
-    dout(4) <<  "called " << dendl;
+    dout(10) <<  "called " << dendl;
  }
  
  void NVMeofGwMon::tick(){
@@ -75,7 +75,7 @@ void NVMeofGwMon::tick(){
          // This case handles either local slowness (calls being delayed
          // for whatever reason) or cluster election slowness (a long gap
          // between calls while an election happened)
-        dout(4) << ": resetting beacon timeouts due to mon delay "
+        dout(10) << ": resetting beacon timeouts due to mon delay "
                  "(slow election?) of " << now - last_tick << " seconds" << dendl;
          for (auto &i : last_beacon) {
            i.second = now;
@@ -95,7 +95,7 @@ void NVMeofGwMon::tick(){
          auto& lb = itr.first;
          auto last_beacon_time = itr.second;
          if(last_beacon_time < cutoff){
-            dout(4) << "beacon timeout for GW " << lb.gw_id << dendl;
+            dout(10) << "beacon timeout for GW " << lb.gw_id << dendl;
              pending_map.process_gw_map_gw_down( lb.gw_id, lb.group_key, propose);
              _propose_pending |= propose;
              last_beacon.erase(lb);
@@ -109,13 +109,9 @@ void NVMeofGwMon::tick(){
      _propose_pending |= propose;
  
      if(_propose_pending){
-       //pending_map.delay_propose = true; // not to send map to clients immediately in "update_from_paxos"
-       dout(4) << "propose pending " <<dendl;
+       dout(10) << "propose pending " <<dendl;
         propose_pending();
      }
-
-    // if propose_pending returned true , call propose_pending method of the paxosService
-    // todo understand the logic of paxos.plugged for sending several propose_pending see MgrMonitor::tick
  }
  
  const char **NVMeofGwMon::get_tracked_conf_keys() const
@@ -129,7 +125,7 @@ const char **NVMeofGwMon::get_tracked_conf_keys() const
  void NVMeofGwMon::handle_conf_change(const ConfigProxy& conf,
                                      const std::set<std::string> &changed)
  {
-  dout(4) << "changed " << changed << dendl;
+  dout(10) << "changed " << changed << dendl;
  }
  
  void NVMeofGwMon::create_pending(){
@@ -152,10 +148,10 @@ void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t){
  void NVMeofGwMon::update_from_paxos(bool *need_bootstrap){
      version_t version = get_last_committed();
  
-    //dout(4) <<  MY_MON_PREFFIX << __func__ << " version "  << version  << " map.epoch " << map.epoch << dendl;
+    //dout(10) <<  MY_MON_PREFFIX << __func__ << " version "  << version  << " map.epoch " << map.epoch << dendl;
  
      if (version != map.epoch) {
-        dout(4) << " NVMeGW loading version " << version  << " " << map.epoch << dendl;
+        dout(10) << " NVMeGW loading version " << version  << " " << map.epoch << dendl;
  
          bufferlist bl;
          int err = get_version(version, bl);
@@ -164,7 +160,7 @@ void NVMeofGwMon::update_from_paxos(bool *need_bootstrap){
          auto p = bl.cbegin();
          map.decode(p);
          if(!mon.is_leader()) {
-            dout(4) << "leader map: " << map <<  dendl;
+            dout(10) << "leader map: " << map <<  dendl;
          }
          check_subs(true);
      }
@@ -177,7 +173,7 @@ void NVMeofGwMon::check_sub(Subscription *sub)
      dout(10) << "sub->next , map-epoch " << sub->next << " " << map.epoch << dendl;
      if (sub->next <= map.epoch)
      {
-      dout(4) << "Sending map to subscriber " << sub->session->con << " " << sub->session->con->get_peer_addr() << dendl;
+      dout(10) << "Sending map to subscriber " << sub->session->con << " " << sub->session->con->get_peer_addr() << dendl;
        sub->session->con->send_message2(make_message<MNVMeofGwMap>(map));
  
        if (sub->onetime) {
@@ -191,7 +187,7 @@ void NVMeofGwMon::check_sub(Subscription *sub)
  void NVMeofGwMon::check_subs(bool t)
  {
    const std::string type = "NVMeofGw";
-  dout(4) <<  "count " << mon.session_map.subs.count(type) << dendl;
+  dout(10) <<  "count " << mon.session_map.subs.count(type) << dendl;
  
    if (mon.session_map.subs.count(type) == 0){
        return;
@@ -251,7 +247,7 @@ bool NVMeofGwMon::prepare_update(MonOpRequestRef op){
  
  bool NVMeofGwMon::preprocess_command(MonOpRequestRef op)
  {
-    dout(4) << dendl;
+    dout(10) << dendl;
      auto m = op->get_req<MMonCommand>();
      std::stringstream ss;
      bufferlist rdata;
@@ -267,14 +263,14 @@ bool NVMeofGwMon::preprocess_command(MonOpRequestRef op)
  
      string prefix;
      cmd_getval(cmdmap, "prefix", prefix);
-    dout(4) << "MonCommand : "<< prefix <<  dendl;
+    dout(10) << "MonCommand : "<< prefix <<  dendl;
  
      return false;
  }
  
  bool NVMeofGwMon::prepare_command(MonOpRequestRef op)
  {
-    dout(4)  << dendl;
+    dout(10)  << dendl;
      auto m = op->get_req<MMonCommand>();
      int rc;
      std::stringstream ss;
@@ -302,7 +298,7 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op)
  
      const auto prefix = cmd_getval_or<string>(cmdmap, "prefix", string{});
  
-    dout(4) << "MonCommand : "<< prefix <<  dendl;
+    dout(10) << "MonCommand : "<< prefix <<  dendl;
      if( prefix == "nvme-gw create" || prefix == "nvme-gw delete" ) {
          std::string id, pool, group;
  
@@ -310,7 +306,7 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op)
          cmd_getval(cmdmap, "pool", pool);
          cmd_getval(cmdmap, "group", group);
          auto group_key = std::make_pair(pool, group);
-        dout(4) << " id "<< id <<" pool "<< pool << " group "<< group << dendl;
+        dout(10) << " id "<< id <<" pool "<< pool << " group "<< group << dendl;
          if(prefix == "nvme-gw create"){
              rc = pending_map.cfg_add_gw(id, group_key);
              ceph_assert(rc!= -EINVAL);
@@ -339,7 +335,7 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op)
          cmd_getval(cmdmap, "pool", pool);
          cmd_getval(cmdmap, "group", group);
          auto group_key = std::make_pair(pool, group);
-        dout(4) <<"nvme-gw show  pool "<< pool << " group "<< group << dendl;
+        dout(10) <<"nvme-gw show  pool "<< pool << " group "<< group << dendl;
  
          if( map.Created_gws[group_key].size()){
              f->open_object_section("common");
@@ -365,7 +361,7 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op)
                   f->open_object_section("stat");
                   f->dump_string("gw-id", gw_id);
                   f->dump_unsigned("anagrp-id",state.ana_grp_id+1);
-                 f->dump_unsigned("last-gw_map-epoch-valid",state.last_gw_map_epoch_valid);
+                 f->dump_unsigned("performed-full-startup", state.performed_full_startup);
                   std::stringstream  ss1;
                   ss1 << state.availability;
                   f->dump_string("Availability", ss1.str());
@@ -411,10 +407,7 @@ bool NVMeofGwMon::preprocess_beacon(MonOpRequestRef op){
  }
  
  
-//#define BYPASS_GW_CREATE_CLI
-
  bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){
-    //dout(4)  << dendl;
      auto m = op->get_req<MNVMeofGwBeacon>();
  
      dout(20) << "availability " <<  m->get_availability() << " GW : " << m->get_gw_id() <<
@@ -435,10 +428,31 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){
      if (avail == GW_AVAILABILITY_E::GW_CREATED){
          if (gw == group_gws.end()) {
             gw_created = false;
-           dout(4) << "Warning: GW " << gw_id << " group_key " << group_key << " was not found in the  map.Created_gws "<< map.Created_gws <<dendl;
+           dout(10) << "Warning: GW " << gw_id << " group_key " << group_key << " was not found in the  map.Created_gws "<< map.Created_gws <<dendl;
+        }
+        else {
+            dout(4) << "GW  prepares the full startup " << gw_id << dendl;
+            if(pending_map.Created_gws[group_key][gw_id].performed_full_startup == false){
+                pending_map.Created_gws[group_key][gw_id].performed_full_startup = true;
+                propose = true;
+            }
          }
          goto set_propose;
      }
+    else { // gw already created
+        if (gw != group_gws.end()) // if GW reports Available but in monitor's database it is Unavailable
+                                   // it means it did not perform "exit" after failover was set by NVMeofGWMon
+           if( pending_map.Created_gws[group_key][gw_id].availability == GW_AVAILABILITY_E::GW_UNAVAILABLE  &&
+               pending_map.Created_gws[group_key][gw_id].performed_full_startup == false &&
+               avail == GW_AVAILABILITY_E::GW_AVAILABLE ) {
+               ack_map.Created_gws[group_key][gw_id] = pending_map.Created_gws[group_key][gw_id];
+               ack_map.epoch = map.epoch;
+               dout(1) << " Force gw to exit: Sending ack_map to GW: " << gw_id << dendl;
+               auto msg = make_message<MNVMeofGwMap>(ack_map);
+               mon.send_reply(op, msg.detach());
+               goto false_return;
+           }
+    }
  
      // At this stage the gw has to be in the Created_gws
      if(gw == group_gws.end()){
@@ -450,28 +464,24 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){
      if(m->get_nonce_map().size()) {
          if(pending_map.Created_gws[group_key][gw_id].nonce_map != m->get_nonce_map())
          {
-            dout(4) << "nonce map of GW  changed , propose pending " << gw_id << dendl;
+            dout(10) << "nonce map of GW  changed , propose pending " << gw_id << dendl;
              pending_map.Created_gws[group_key][gw_id].nonce_map = m->get_nonce_map();
-            dout(4) << "nonce map of GW " << gw_id << " "<< pending_map.Created_gws[group_key][gw_id].nonce_map  << dendl;
+            dout(10) << "nonce map of GW " << gw_id << " "<< pending_map.Created_gws[group_key][gw_id].nonce_map  << dendl;
              nonce_propose = true;
          }
      }
      else  {
-        dout(4) << "Warning: received empty nonce map in the beacon of GW " << gw_id << " "<< dendl;
+        dout(10) << "Warning: received empty nonce map in the beacon of GW " << gw_id << " "<< dendl;
      }
  
-    //pending_map.handle_removed_subsystems(gw_id, group_key, configured_subsystems, propose);
-
-    //if  no subsystem configured set gw as avail = GW_AVAILABILITY_E::GW_UNAVAILABLE
-
-    if(sub.size() == 0) {
+    if(sub.size() == 0 ) {
          avail = GW_AVAILABILITY_E::GW_UNAVAILABLE;
      }
      if(pending_map.Created_gws[group_key][gw_id].subsystems != sub)
      {
          dout(10) << "subsystems of GW changed, propose pending " << gw_id << dendl;
          pending_map.Created_gws[group_key][gw_id].subsystems =  sub;
-        dout(10) << "subsystems of GW " << gw_id << " "<< pending_map.Created_gws[group_key][gw_id].subsystems << dendl;
+        dout(20) << "subsystems of GW " << gw_id << " "<< pending_map.Created_gws[group_key][gw_id].subsystems << dendl;
          nonce_propose = true;
      }
      pending_map.Created_gws[group_key][gw_id].last_gw_map_epoch_valid = ( map.epoch == m->get_last_gwmap_epoch() );
@@ -480,7 +490,6 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){
      }
      if(avail == GW_AVAILABILITY_E::GW_AVAILABLE)
      {
-        //dout(4) <<"subsystems from beacon " << pending_map.Created_gws << dendl;
          auto now = ceph::coarse_mono_clock::now();
          // check pending_map.epoch vs m->get_version() - if different - drop the beacon
  
@@ -519,7 +528,7 @@ set_propose:
      }
  false_return:
      if (propose){
-      dout(4) << "decision in prepare_beacon" <<dendl;
+      dout(10) << "decision in prepare_beacon" <<dendl;
        return true;
      }
      else 
diff --git a/src/mon/NVMeofGwMon.h b/src/mon/NVMeofGwMon.h

index 38eeac429cd96ce050ad732c3961dc2a6ac5600b..6078b5b54b3eff65175780bea475982fb45e1597 100644 (file)
--- a/src/mon/NVMeofGwMon.h
+++ b/src/mon/NVMeofGwMon.h
@@ -55,9 +55,9 @@ public:
      void handle_conf_change(const ConfigProxy& conf, const std::set<std::string> &changed) override;
  
      // 3 pure virtual methods of the paxosService
-    void create_initial()override{};
-    void create_pending()override ;
-    void encode_pending(MonitorDBStore::TransactionRef t)override ;
+    void create_initial() override {};
+    void create_pending() override;
+    void encode_pending(MonitorDBStore::TransactionRef t) override;
  
      void init() override;
      void on_shutdown() override;
diff --git a/src/mon/NVMeofGwSerialize.h b/src/mon/NVMeofGwSerialize.h

index 151e2e513a589be8209e6ce0494345aeed1d46d1..821da1e4d42d20756b843ac8eb518a18ca6fd106 100755 (executable)
--- a/src/mon/NVMeofGwSerialize.h
+++ b/src/mon/NVMeofGwSerialize.h
@@ -130,12 +130,11 @@ inline std::ostream& print_gw_created_t(std::ostream& os, const NvmeGwCreated va
          os << " " << anas[i] <<": " << value.blocklist_data[anas[i]].osd_epoch << ":" <<value.blocklist_data[anas[i]].is_failover ;
      }
      os << "\n" << MODULE_PREFFIX << "nonces: " << value.nonce_map << " }";
-    os << "\n" << MODULE_PREFFIX << "saved-nonces: " << value.copied_nonce_map << " }";
      for (size_t i = 0; i < num_ana_groups; i++) {
          os << " " << anas[i] <<": " << value.sm_state[anas[i]] << ",";
      }
  
-    os << "]\n"<< MODULE_PREFFIX << "availability " << value.availability << "]";
+    os << "]\n"<< MODULE_PREFFIX << "availability " << value.availability << " full-startup " << value.performed_full_startup  << " ]";
  
      return os;
  }
@@ -332,7 +331,8 @@ inline void encode(const NvmeGwCreatedMap& gws,  ceph::bufferlist &bl) {
              encode((uint32_t)(gw.second.sm_state[i]), bl);
          }
          encode((uint32_t)gw.second.availability, bl);
-        encode((uint32_t)gw.second.last_gw_map_epoch_valid, bl);
+        encode((uint16_t)gw.second.performed_full_startup, bl);
+        encode((uint16_t)gw.second.last_gw_map_epoch_valid, bl);
          encode(gw.second.subsystems, bl);
  
          for(int i=0; i< MAX_SUPPORTED_ANA_GROUPS; i++){
@@ -340,7 +340,6 @@ inline void encode(const NvmeGwCreatedMap& gws,  ceph::bufferlist &bl) {
              encode(gw.second.blocklist_data[i].is_failover, bl);
          }
          encode(gw.second.nonce_map, bl);
-        encode(gw.second.copied_nonce_map, bl);
      }
      ENCODE_FINISH(bl);
  }
@@ -367,9 +366,12 @@ inline void decode(NvmeGwCreatedMap& gws, ceph::buffer::list::const_iterator &bl
          uint32_t avail;
          decode(avail, bl);
          gw_created.availability = (GW_AVAILABILITY_E)avail;
-        uint32_t gwmap_epoch;
-        decode(gwmap_epoch, bl);
-        gw_created.last_gw_map_epoch_valid = (bool)gwmap_epoch;
+        uint16_t performed_startup;
+        decode(performed_startup, bl);
+        gw_created.performed_full_startup = (bool)performed_startup;
+        uint16_t last_epoch_valid;
+        decode(last_epoch_valid, bl);
+        gw_created.last_gw_map_epoch_valid = (bool)last_epoch_valid;
          BeaconSubsystems   subsystems;
          decode(subsystems, bl);
          gw_created.subsystems = subsystems;
@@ -379,8 +381,6 @@ inline void decode(NvmeGwCreatedMap& gws, ceph::buffer::list::const_iterator &bl
              decode(gw_created.blocklist_data[i].is_failover, bl);
          }
          decode(gw_created.nonce_map, bl);
-        decode(gw_created.copied_nonce_map, bl);
-
          gws[gw_name] = gw_created;
      }
      DECODE_FINISH(bl);
diff --git a/src/mon/NVMeofGwTypes.h b/src/mon/NVMeofGwTypes.h

index e78e817081527b5ab5b1527b785ea291ffb317fe..01704dc48dafb9ba17396f04b1bab8d934a7f804 100755 (executable)
--- a/src/mon/NVMeofGwTypes.h
+++ b/src/mon/NVMeofGwTypes.h
@@ -98,9 +98,9 @@ struct NvmeGwCreated {
      NvmeAnaGrpId       ana_grp_id;                    // ana-group-id allocated for this GW, GW owns this group-id
      GW_AVAILABILITY_E  availability;                  // in absence of  beacon  heartbeat messages it becomes inavailable
      bool               last_gw_map_epoch_valid;       // "true" if the last epoch seen by the gw-client is up-to-date
+    bool               performed_full_startup;        // in order to identify gws that did not exit upon failover
      BeaconSubsystems   subsystems;                    // gateway susbsystem and their state machine states
      NvmeAnaNonceMap    nonce_map;
-    NvmeAnaNonceMap    copied_nonce_map;
      SM_STATE           sm_state;                      // state machine states per ANA group
      struct{
         epoch_t     osd_epoch;
@@ -109,7 +109,8 @@ struct NvmeGwCreated {
  
      NvmeGwCreated(): ana_grp_id(REDUNDANT_GW_ANA_GROUP_ID) {};
  
-    NvmeGwCreated(NvmeAnaGrpId id): ana_grp_id(id), availability(GW_AVAILABILITY_E::GW_CREATED), last_gw_map_epoch_valid(false)
+    NvmeGwCreated(NvmeAnaGrpId id): ana_grp_id(id), availability(GW_AVAILABILITY_E::GW_CREATED), last_gw_map_epoch_valid(false),
+                                    performed_full_startup(false)
      {
          for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++){
              sm_state[i] = GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE;
@@ -117,7 +118,10 @@ struct NvmeGwCreated {
              blocklist_data[i].is_failover = true;
          }
      };
-
+    void set_unavailable_state(){
+        availability = GW_AVAILABILITY_E::GW_UNAVAILABLE;
+        performed_full_startup = false; // after setting this state the next time monitor sees GW, it expects it performed the full startup
+    }
      void standby_state(NvmeAnaGrpId grpid) {
             sm_state[grpid]       = GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE;
      };
diff --git a/src/nvmeof/NVMeofGwMonitorClient.cc b/src/nvmeof/NVMeofGwMonitorClient.cc

index 61a71ffb205a446386e8437b799c2c9f53866937..923d9d3d7dcd569bc927562c323159a70f670d67 100644 (file)
--- a/src/nvmeof/NVMeofGwMonitorClient.cc
+++ b/src/nvmeof/NVMeofGwMonitorClient.cc
@@ -296,6 +296,9 @@ void NVMeofGwMonitorClient::handle_nvmeof_gw_map(ceph::ref_t<MNVMeofGwMap> nmap)
    NvmeGwState new_gw_state;
    auto got_new_gw_state = get_gw_state("new map", new_map, group_key, name, new_gw_state); 
  
+  // ensure that the gateway state has not vanished
+  ceph_assert(got_new_gw_state || !got_old_gw_state);
+
    if (!got_old_gw_state) {
      if (!got_new_gw_state) {
        dout(0) << "Can not find new gw state" << dendl;
diff --git a/src/test/test_nvmeof_mon_encoding.cc b/src/test/test_nvmeof_mon_encoding.cc

index 89c72df38499b9d0c32f10abe1ae4d188bf6bd72..3f17998b1a36f293eda212dcc5fea6c5d9dab3f0 100644 (file)
--- a/src/test/test_nvmeof_mon_encoding.cc
+++ b/src/test/test_nvmeof_mon_encoding.cc
@@ -40,19 +40,21 @@ void test_NVMeofGwMap() {
    pending_map.cfg_add_gw("GW3" ,group_key);
    NvmeNonceVector new_nonces = {"abc", "def","hij"};
    pending_map.Created_gws[group_key]["GW1"].nonce_map[1] = new_nonces;
+  pending_map.Created_gws[group_key]["GW1"].performed_full_startup = true;
    for(int i=0; i< MAX_SUPPORTED_ANA_GROUPS; i++){
      pending_map.Created_gws[group_key]["GW1"].blocklist_data[i].osd_epoch = i*2;
      pending_map.Created_gws[group_key]["GW1"].blocklist_data[i].is_failover = false;
    }
  
    pending_map.Created_gws[group_key]["GW2"].nonce_map[2] = new_nonces;
+  dout(0) << " == Dump map before Encode : == " <<dendl;
    dout(0) << pending_map << dendl;
  
    ceph::buffer::list bl;
    pending_map.encode(bl);
    auto p = bl.cbegin();
    pending_map.decode(p);
-  dout(0) << "Dump map after decode encode:" <<dendl;
+  dout(0) << " == Dump map after Decode: == " <<dendl;
    dout(0) << pending_map << dendl;
  }
  
@@ -87,7 +89,6 @@ void test_MNVMeofGwMap() {
    pending_map.cfg_add_gw("GW3" ,group_key);
    NvmeNonceVector new_nonces = {"abc", "def","hij"};
    pending_map.Created_gws[group_key]["GW1"].nonce_map[1] = new_nonces;
-  pending_map.Created_gws[group_key]["GW1"].copied_nonce_map[1] = new_nonces;
    pending_map.Created_gws[group_key]["GW1"].subsystems.push_back(sub);
    for(int i=0; i< MAX_SUPPORTED_ANA_GROUPS; i++){
      pending_map.Created_gws[group_key]["GW1"].blocklist_data[i].osd_epoch = i*2;
author	Alexander Indenbaum <aindenba@redhat.com>
	Tue, 2 Apr 2024 10:36:04 +0000 (10:36 +0000)
committer	Alexander Indenbaum <aindenba@redhat.com>
	Thu, 20 Nov 2025 08:55:27 +0000 (10:55 +0200)
src/mon/NVMeofGwMap.cc		patch \| blob \| history
src/mon/NVMeofGwMap.h		patch \| blob \| history
src/mon/NVMeofGwMon.cc		patch \| blob \| history
src/mon/NVMeofGwMon.h		patch \| blob \| history
src/mon/NVMeofGwSerialize.h		patch \| blob \| history
src/mon/NVMeofGwTypes.h		patch \| blob \| history
src/nvmeof/NVMeofGwMonitorClient.cc		patch \| blob \| history
src/test/test_nvmeof_mon_encoding.cc		patch \| blob \| history