]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
ceph-nvmeof-mon fixes
authorAlexander Indenbaum <aindenba@redhat.com>
Tue, 2 Apr 2024 10:36:04 +0000 (10:36 +0000)
committerAlexander Indenbaum <aindenba@redhat.com>
Thu, 20 Nov 2025 08:55:27 +0000 (10:55 +0200)
Resolves: rhbz#2280205

- NVMeofGwMonitorClient: assert gateway state does not vanish
Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
- send unavailable in beacon response to gw that did not exit after failover
Signed-off-by: Leonid Chernin <leonidc@il.ibm.com>
- fix blacklist start when osd is not writtable
Signed-off-by: Leonid Chernin <leonidc@il.ibm.com>
- Clean up

  - NVMeofGwTypes: remove copied_nonce_map cleanup
  - NVMeofGwMon.h: fix weird spacing
  - NVMeofGwMon.cc: dout(4) to dout(10)
  - NVMeofGwMon.cc: rm commented out code

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
  (cherry picked from commit 05a7b1f31a87cd60c21cd46bbcde5c7745721f55)

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
(cherry picked from commit bad3542358edaf950bfda49044e52ae5de757aa0)

src/mon/NVMeofGwMap.cc
src/mon/NVMeofGwMap.h
src/mon/NVMeofGwMon.cc
src/mon/NVMeofGwMon.h
src/mon/NVMeofGwSerialize.h
src/mon/NVMeofGwTypes.h
src/nvmeof/NVMeofGwMonitorClient.cc
src/test/test_nvmeof_mon_encoding.cc

index 03bc75dc68fc8ab719496f3366195c4828f5f43a..1d656574b8d22c3ec75ac640a8546190ea24064e 100755 (executable)
@@ -61,6 +61,7 @@ int  NVMeofGwMap::cfg_add_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_ke
         if (allocated[i] == false) {
             NvmeGwCreated gw_created(i);
             Created_gws[group_key][gw_id] = gw_created;
+            Created_gws[group_key][gw_id].performed_full_startup = true;
             dout(4) << __func__ << "Created GWS:  " << Created_gws  <<  dendl;
             return 0;
         }
@@ -103,7 +104,7 @@ int NVMeofGwMap::process_gw_map_gw_down(const NvmeGwId &gw_id, const NvmeGroupKe
     if (gw_state != gws_states.end()) {
         dout(4) << "GW down " << gw_id << dendl;
         auto& st = gw_state->second;
-        st.availability = GW_AVAILABILITY_E::GW_UNAVAILABLE;
+        st.set_unavailable_state();
         for (NvmeAnaGrpId i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i ++) {
             fsm_handle_gw_down (gw_id, group_key, st.sm_state[i], i, propose_pending);
             st.standby_state(i);
@@ -513,14 +514,18 @@ NvmeGwCreated& NVMeofGwMap::find_already_created_gw(const NvmeGwId &gw_id, const
 
 struct CMonRequestProposal : public Context {
   NVMeofGwMap *m;
-  CMonRequestProposal(NVMeofGwMap *mon) : m(mon) {}
+  entity_addrvec_t addr_vect;
+  utime_t expires;
+  CMonRequestProposal(NVMeofGwMap *mon , entity_addrvec_t addr_vector, utime_t until) : m(mon), addr_vect(addr_vector), expires (until)  {}
   void finish(int r) {
       dout(4) << "osdmon is  writable? " << m->mon->osdmon()->is_writeable() << dendl;
       if(m->mon->osdmon()->is_writeable()){
+        epoch_t epoch = m->mon->osdmon()->blocklist(addr_vect, expires);
+        dout (4) << "epoch " << epoch <<dendl;
         m->mon->nvmegwmon()->request_proposal(m->mon->osdmon());
       }
       else {
-          m->mon->osdmon()->wait_for_writeable_ctx( new CMonRequestProposal(m));
+          m->mon->osdmon()->wait_for_writeable_ctx( new CMonRequestProposal(m, addr_vect, expires));
       }
   }
 };
@@ -546,14 +551,15 @@ int NVMeofGwMap::blocklist_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_k
         bool rc = addr_vect.parse(&str[0]);
         dout(10) << str << " rc " << rc <<  " network vector: " << addr_vect << " " << addr_vect.size() << dendl;
         ceph_assert(rc);
+
         epoch = mon->osdmon()->blocklist(addr_vect, expires);
 
         if (!mon->osdmon()->is_writeable()) {
-            dout(4) << "osdmon is not writable, waiting " << dendl;
-            mon->osdmon()->wait_for_writeable_ctx( new CMonRequestProposal(this ));// return false;
+            dout(4) << "osdmon is not writable, waiting, epoch = " << epoch << dendl;
+            mon->osdmon()->wait_for_writeable_ctx( new CMonRequestProposal(this, addr_vect, expires ));// return false;
         }
         else  mon->nvmegwmon()->request_proposal(mon->osdmon());
-        dout(4) << str << " mon->osdmon()->blocklist:  " << epoch <<  " address vector: " << addr_vect << " " << addr_vect.size() << dendl;
+        dout(4) << str << " mon->osdmon()->blocklist: epoch : " << epoch <<  " address vector: " << addr_vect << " " << addr_vect.size() << dendl;
     }
     else{
         dout(1) << "Error: No nonces context present for gw: " <<gw_id  << " ANA group: " << grpid << dendl;
index 51c80111ed405e24da5da6f86b1ea7a1a3e1f51c..bc9732c6818f109873f5625fe17b5c10dc5751c6 100755 (executable)
@@ -60,13 +60,14 @@ private:
     void find_failback_gw       (const NvmeGwId &gw_id, const NvmeGroupKey& group_key,  bool &propose_pending);
     void set_failover_gw_for_ANA_group (const NvmeGwId &failed_gw_id, const NvmeGroupKey& group_key, const NvmeGwId &gw_id,
                                                                                                      NvmeAnaGrpId groupid);
-    int  blocklist_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId ANA_groupid, epoch_t &epoch, bool failover);
+
 
     int  get_timer   (const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId anagrpid);
     void cancel_timer(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId anagrpid);
     void validate_gw_map(const NvmeGroupKey& group_key);
 
 public:
+    int  blocklist_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId ANA_groupid, epoch_t &epoch, bool failover);
     void encode(ceph::buffer::list &bl) const {
         using ceph::encode;
         ENCODE_START(1, 1, bl);
index 69766f9d0548c77e0222516a5e8547bce0dfd0d9..97bff620f77ce66e0b24236ce35f547c1fc54f20 100644 (file)
 using std::string;
 
 void NVMeofGwMon::init(){
-    dout(4) <<  "called " << dendl;
+    dout(10) <<  "called " << dendl;
 }
 
 void NVMeofGwMon::on_restart(){
-    dout(4) <<  "called " << dendl;
+    dout(10) <<  "called " << dendl;
     last_beacon.clear();
     last_tick = ceph::coarse_mono_clock::now();
     synchronize_last_beacon();
@@ -37,7 +37,7 @@ void NVMeofGwMon::on_restart(){
 
 
 void NVMeofGwMon::synchronize_last_beacon(){
-    dout(10) <<  "called, is leader : " << mon.is_leader()  <<" active " << is_active()  << dendl;
+    dout(10) <<  "called " << dendl;
     // Initialize last_beacon to identify transitions of available  GWs to unavailable state
     for (const auto& created_map_pair: map.Created_gws) {
       const auto& group_key = created_map_pair.first;
@@ -45,7 +45,7 @@ void NVMeofGwMon::synchronize_last_beacon(){
       for (const auto& gw_created_pair: gw_created_map) {
           const auto& gw_id = gw_created_pair.first;
           if (gw_created_pair.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE){
-             dout(4) << "synchronize last_beacon for  GW :" << gw_id << dendl;
+             dout(10) << "synchronize last_beacon for  GW :" << gw_id << dendl;
              LastBeacon lb = {gw_id, group_key};
              last_beacon[lb] = last_tick;
           }
@@ -54,7 +54,7 @@ void NVMeofGwMon::synchronize_last_beacon(){
 }
 
 void NVMeofGwMon::on_shutdown() {
-    dout(4) <<  "called " << dendl;
+    dout(10) <<  "called " << dendl;
 }
 
 void NVMeofGwMon::tick(){
@@ -75,7 +75,7 @@ void NVMeofGwMon::tick(){
         // This case handles either local slowness (calls being delayed
         // for whatever reason) or cluster election slowness (a long gap
         // between calls while an election happened)
-        dout(4) << ": resetting beacon timeouts due to mon delay "
+        dout(10) << ": resetting beacon timeouts due to mon delay "
                 "(slow election?) of " << now - last_tick << " seconds" << dendl;
         for (auto &i : last_beacon) {
           i.second = now;
@@ -95,7 +95,7 @@ void NVMeofGwMon::tick(){
         auto& lb = itr.first;
         auto last_beacon_time = itr.second;
         if(last_beacon_time < cutoff){
-            dout(4) << "beacon timeout for GW " << lb.gw_id << dendl;
+            dout(10) << "beacon timeout for GW " << lb.gw_id << dendl;
             pending_map.process_gw_map_gw_down( lb.gw_id, lb.group_key, propose);
             _propose_pending |= propose;
             last_beacon.erase(lb);
@@ -109,13 +109,9 @@ void NVMeofGwMon::tick(){
     _propose_pending |= propose;
 
     if(_propose_pending){
-       //pending_map.delay_propose = true; // not to send map to clients immediately in "update_from_paxos"
-       dout(4) << "propose pending " <<dendl;
+       dout(10) << "propose pending " <<dendl;
        propose_pending();
     }
-
-    // if propose_pending returned true , call propose_pending method of the paxosService
-    // todo understand the logic of paxos.plugged for sending several propose_pending see MgrMonitor::tick
 }
 
 const char **NVMeofGwMon::get_tracked_conf_keys() const
@@ -129,7 +125,7 @@ const char **NVMeofGwMon::get_tracked_conf_keys() const
 void NVMeofGwMon::handle_conf_change(const ConfigProxy& conf,
                                     const std::set<std::string> &changed)
 {
-  dout(4) << "changed " << changed << dendl;
+  dout(10) << "changed " << changed << dendl;
 }
 
 void NVMeofGwMon::create_pending(){
@@ -152,10 +148,10 @@ void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t){
 void NVMeofGwMon::update_from_paxos(bool *need_bootstrap){
     version_t version = get_last_committed();
 
-    //dout(4) <<  MY_MON_PREFFIX << __func__ << " version "  << version  << " map.epoch " << map.epoch << dendl;
+    //dout(10) <<  MY_MON_PREFFIX << __func__ << " version "  << version  << " map.epoch " << map.epoch << dendl;
 
     if (version != map.epoch) {
-        dout(4) << " NVMeGW loading version " << version  << " " << map.epoch << dendl;
+        dout(10) << " NVMeGW loading version " << version  << " " << map.epoch << dendl;
 
         bufferlist bl;
         int err = get_version(version, bl);
@@ -164,7 +160,7 @@ void NVMeofGwMon::update_from_paxos(bool *need_bootstrap){
         auto p = bl.cbegin();
         map.decode(p);
         if(!mon.is_leader()) {
-            dout(4) << "leader map: " << map <<  dendl;
+            dout(10) << "leader map: " << map <<  dendl;
         }
         check_subs(true);
     }
@@ -177,7 +173,7 @@ void NVMeofGwMon::check_sub(Subscription *sub)
     dout(10) << "sub->next , map-epoch " << sub->next << " " << map.epoch << dendl;
     if (sub->next <= map.epoch)
     {
-      dout(4) << "Sending map to subscriber " << sub->session->con << " " << sub->session->con->get_peer_addr() << dendl;
+      dout(10) << "Sending map to subscriber " << sub->session->con << " " << sub->session->con->get_peer_addr() << dendl;
       sub->session->con->send_message2(make_message<MNVMeofGwMap>(map));
 
       if (sub->onetime) {
@@ -191,7 +187,7 @@ void NVMeofGwMon::check_sub(Subscription *sub)
 void NVMeofGwMon::check_subs(bool t)
 {
   const std::string type = "NVMeofGw";
-  dout(4) <<  "count " << mon.session_map.subs.count(type) << dendl;
+  dout(10) <<  "count " << mon.session_map.subs.count(type) << dendl;
 
   if (mon.session_map.subs.count(type) == 0){
       return;
@@ -251,7 +247,7 @@ bool NVMeofGwMon::prepare_update(MonOpRequestRef op){
 
 bool NVMeofGwMon::preprocess_command(MonOpRequestRef op)
 {
-    dout(4) << dendl;
+    dout(10) << dendl;
     auto m = op->get_req<MMonCommand>();
     std::stringstream ss;
     bufferlist rdata;
@@ -267,14 +263,14 @@ bool NVMeofGwMon::preprocess_command(MonOpRequestRef op)
 
     string prefix;
     cmd_getval(cmdmap, "prefix", prefix);
-    dout(4) << "MonCommand : "<< prefix <<  dendl;
+    dout(10) << "MonCommand : "<< prefix <<  dendl;
 
     return false;
 }
 
 bool NVMeofGwMon::prepare_command(MonOpRequestRef op)
 {
-    dout(4)  << dendl;
+    dout(10)  << dendl;
     auto m = op->get_req<MMonCommand>();
     int rc;
     std::stringstream ss;
@@ -302,7 +298,7 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op)
 
     const auto prefix = cmd_getval_or<string>(cmdmap, "prefix", string{});
 
-    dout(4) << "MonCommand : "<< prefix <<  dendl;
+    dout(10) << "MonCommand : "<< prefix <<  dendl;
     if( prefix == "nvme-gw create" || prefix == "nvme-gw delete" ) {
         std::string id, pool, group;
 
@@ -310,7 +306,7 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op)
         cmd_getval(cmdmap, "pool", pool);
         cmd_getval(cmdmap, "group", group);
         auto group_key = std::make_pair(pool, group);
-        dout(4) << " id "<< id <<" pool "<< pool << " group "<< group << dendl;
+        dout(10) << " id "<< id <<" pool "<< pool << " group "<< group << dendl;
         if(prefix == "nvme-gw create"){
             rc = pending_map.cfg_add_gw(id, group_key);
             ceph_assert(rc!= -EINVAL);
@@ -339,7 +335,7 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op)
         cmd_getval(cmdmap, "pool", pool);
         cmd_getval(cmdmap, "group", group);
         auto group_key = std::make_pair(pool, group);
-        dout(4) <<"nvme-gw show  pool "<< pool << " group "<< group << dendl;
+        dout(10) <<"nvme-gw show  pool "<< pool << " group "<< group << dendl;
 
         if( map.Created_gws[group_key].size()){
             f->open_object_section("common");
@@ -365,7 +361,7 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op)
                  f->open_object_section("stat");
                  f->dump_string("gw-id", gw_id);
                  f->dump_unsigned("anagrp-id",state.ana_grp_id+1);
-                 f->dump_unsigned("last-gw_map-epoch-valid",state.last_gw_map_epoch_valid);
+                 f->dump_unsigned("performed-full-startup", state.performed_full_startup);
                  std::stringstream  ss1;
                  ss1 << state.availability;
                  f->dump_string("Availability", ss1.str());
@@ -411,10 +407,7 @@ bool NVMeofGwMon::preprocess_beacon(MonOpRequestRef op){
 }
 
 
-//#define BYPASS_GW_CREATE_CLI
-
 bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){
-    //dout(4)  << dendl;
     auto m = op->get_req<MNVMeofGwBeacon>();
 
     dout(20) << "availability " <<  m->get_availability() << " GW : " << m->get_gw_id() <<
@@ -435,10 +428,31 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){
     if (avail == GW_AVAILABILITY_E::GW_CREATED){
         if (gw == group_gws.end()) {
            gw_created = false;
-           dout(4) << "Warning: GW " << gw_id << " group_key " << group_key << " was not found in the  map.Created_gws "<< map.Created_gws <<dendl;
+           dout(10) << "Warning: GW " << gw_id << " group_key " << group_key << " was not found in the  map.Created_gws "<< map.Created_gws <<dendl;
+        }
+        else {
+            dout(4) << "GW  prepares the full startup " << gw_id << dendl;
+            if(pending_map.Created_gws[group_key][gw_id].performed_full_startup == false){
+                pending_map.Created_gws[group_key][gw_id].performed_full_startup = true;
+                propose = true;
+            }
         }
         goto set_propose;
     }
+    else { // gw already created
+        if (gw != group_gws.end()) // if GW reports Available but in monitor's database it is Unavailable
+                                   // it means it did not perform "exit" after failover was set by NVMeofGWMon
+           if( pending_map.Created_gws[group_key][gw_id].availability == GW_AVAILABILITY_E::GW_UNAVAILABLE  &&
+               pending_map.Created_gws[group_key][gw_id].performed_full_startup == false &&
+               avail == GW_AVAILABILITY_E::GW_AVAILABLE ) {
+               ack_map.Created_gws[group_key][gw_id] = pending_map.Created_gws[group_key][gw_id];
+               ack_map.epoch = map.epoch;
+               dout(1) << " Force gw to exit: Sending ack_map to GW: " << gw_id << dendl;
+               auto msg = make_message<MNVMeofGwMap>(ack_map);
+               mon.send_reply(op, msg.detach());
+               goto false_return;
+           }
+    }
 
     // At this stage the gw has to be in the Created_gws
     if(gw == group_gws.end()){
@@ -450,28 +464,24 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){
     if(m->get_nonce_map().size()) {
         if(pending_map.Created_gws[group_key][gw_id].nonce_map != m->get_nonce_map())
         {
-            dout(4) << "nonce map of GW  changed , propose pending " << gw_id << dendl;
+            dout(10) << "nonce map of GW  changed , propose pending " << gw_id << dendl;
             pending_map.Created_gws[group_key][gw_id].nonce_map = m->get_nonce_map();
-            dout(4) << "nonce map of GW " << gw_id << " "<< pending_map.Created_gws[group_key][gw_id].nonce_map  << dendl;
+            dout(10) << "nonce map of GW " << gw_id << " "<< pending_map.Created_gws[group_key][gw_id].nonce_map  << dendl;
             nonce_propose = true;
         }
     }
     else  {
-        dout(4) << "Warning: received empty nonce map in the beacon of GW " << gw_id << " "<< dendl;
+        dout(10) << "Warning: received empty nonce map in the beacon of GW " << gw_id << " "<< dendl;
     }
 
-    //pending_map.handle_removed_subsystems(gw_id, group_key, configured_subsystems, propose);
-
-    //if  no subsystem configured set gw as avail = GW_AVAILABILITY_E::GW_UNAVAILABLE
-
-    if(sub.size() == 0) {
+    if(sub.size() == 0 ) {
         avail = GW_AVAILABILITY_E::GW_UNAVAILABLE;
     }
     if(pending_map.Created_gws[group_key][gw_id].subsystems != sub)
     {
         dout(10) << "subsystems of GW changed, propose pending " << gw_id << dendl;
         pending_map.Created_gws[group_key][gw_id].subsystems =  sub;
-        dout(10) << "subsystems of GW " << gw_id << " "<< pending_map.Created_gws[group_key][gw_id].subsystems << dendl;
+        dout(20) << "subsystems of GW " << gw_id << " "<< pending_map.Created_gws[group_key][gw_id].subsystems << dendl;
         nonce_propose = true;
     }
     pending_map.Created_gws[group_key][gw_id].last_gw_map_epoch_valid = ( map.epoch == m->get_last_gwmap_epoch() );
@@ -480,7 +490,6 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){
     }
     if(avail == GW_AVAILABILITY_E::GW_AVAILABLE)
     {
-        //dout(4) <<"subsystems from beacon " << pending_map.Created_gws << dendl;
         auto now = ceph::coarse_mono_clock::now();
         // check pending_map.epoch vs m->get_version() - if different - drop the beacon
 
@@ -519,7 +528,7 @@ set_propose:
     }
 false_return:
     if (propose){
-      dout(4) << "decision in prepare_beacon" <<dendl;
+      dout(10) << "decision in prepare_beacon" <<dendl;
       return true;
     }
     else 
index 38eeac429cd96ce050ad732c3961dc2a6ac5600b..6078b5b54b3eff65175780bea475982fb45e1597 100644 (file)
@@ -55,9 +55,9 @@ public:
     void handle_conf_change(const ConfigProxy& conf, const std::set<std::string> &changed) override;
 
     // 3 pure virtual methods of the paxosService
-    void create_initial()override{};
-    void create_pending()override ;
-    void encode_pending(MonitorDBStore::TransactionRef t)override ;
+    void create_initial() override {};
+    void create_pending() override;
+    void encode_pending(MonitorDBStore::TransactionRef t) override;
 
     void init() override;
     void on_shutdown() override;
index 151e2e513a589be8209e6ce0494345aeed1d46d1..821da1e4d42d20756b843ac8eb518a18ca6fd106 100755 (executable)
@@ -130,12 +130,11 @@ inline std::ostream& print_gw_created_t(std::ostream& os, const NvmeGwCreated va
         os << " " << anas[i] <<": " << value.blocklist_data[anas[i]].osd_epoch << ":" <<value.blocklist_data[anas[i]].is_failover ;
     }
     os << "\n" << MODULE_PREFFIX << "nonces: " << value.nonce_map << " }";
-    os << "\n" << MODULE_PREFFIX << "saved-nonces: " << value.copied_nonce_map << " }";
     for (size_t i = 0; i < num_ana_groups; i++) {
         os << " " << anas[i] <<": " << value.sm_state[anas[i]] << ",";
     }
 
-    os << "]\n"<< MODULE_PREFFIX << "availability " << value.availability << "]";
+    os << "]\n"<< MODULE_PREFFIX << "availability " << value.availability << " full-startup " << value.performed_full_startup  << " ]";
 
     return os;
 }
@@ -332,7 +331,8 @@ inline void encode(const NvmeGwCreatedMap& gws,  ceph::bufferlist &bl) {
             encode((uint32_t)(gw.second.sm_state[i]), bl);
         }
         encode((uint32_t)gw.second.availability, bl);
-        encode((uint32_t)gw.second.last_gw_map_epoch_valid, bl);
+        encode((uint16_t)gw.second.performed_full_startup, bl);
+        encode((uint16_t)gw.second.last_gw_map_epoch_valid, bl);
         encode(gw.second.subsystems, bl);
 
         for(int i=0; i< MAX_SUPPORTED_ANA_GROUPS; i++){
@@ -340,7 +340,6 @@ inline void encode(const NvmeGwCreatedMap& gws,  ceph::bufferlist &bl) {
             encode(gw.second.blocklist_data[i].is_failover, bl);
         }
         encode(gw.second.nonce_map, bl);
-        encode(gw.second.copied_nonce_map, bl);
     }
     ENCODE_FINISH(bl);
 }
@@ -367,9 +366,12 @@ inline void decode(NvmeGwCreatedMap& gws, ceph::buffer::list::const_iterator &bl
         uint32_t avail;
         decode(avail, bl);
         gw_created.availability = (GW_AVAILABILITY_E)avail;
-        uint32_t gwmap_epoch;
-        decode(gwmap_epoch, bl);
-        gw_created.last_gw_map_epoch_valid = (bool)gwmap_epoch;
+        uint16_t performed_startup;
+        decode(performed_startup, bl);
+        gw_created.performed_full_startup = (bool)performed_startup;
+        uint16_t last_epoch_valid;
+        decode(last_epoch_valid, bl);
+        gw_created.last_gw_map_epoch_valid = (bool)last_epoch_valid;
         BeaconSubsystems   subsystems;
         decode(subsystems, bl);
         gw_created.subsystems = subsystems;
@@ -379,8 +381,6 @@ inline void decode(NvmeGwCreatedMap& gws, ceph::buffer::list::const_iterator &bl
             decode(gw_created.blocklist_data[i].is_failover, bl);
         }
         decode(gw_created.nonce_map, bl);
-        decode(gw_created.copied_nonce_map, bl);
-
         gws[gw_name] = gw_created;
     }
     DECODE_FINISH(bl);
index e78e817081527b5ab5b1527b785ea291ffb317fe..01704dc48dafb9ba17396f04b1bab8d934a7f804 100755 (executable)
@@ -98,9 +98,9 @@ struct NvmeGwCreated {
     NvmeAnaGrpId       ana_grp_id;                    // ana-group-id allocated for this GW, GW owns this group-id
     GW_AVAILABILITY_E  availability;                  // in absence of  beacon  heartbeat messages it becomes inavailable
     bool               last_gw_map_epoch_valid;       // "true" if the last epoch seen by the gw-client is up-to-date
+    bool               performed_full_startup;        // in order to identify gws that did not exit upon failover
     BeaconSubsystems   subsystems;                    // gateway susbsystem and their state machine states
     NvmeAnaNonceMap    nonce_map;
-    NvmeAnaNonceMap    copied_nonce_map;
     SM_STATE           sm_state;                      // state machine states per ANA group
     struct{
        epoch_t     osd_epoch;
@@ -109,7 +109,8 @@ struct NvmeGwCreated {
 
     NvmeGwCreated(): ana_grp_id(REDUNDANT_GW_ANA_GROUP_ID) {};
 
-    NvmeGwCreated(NvmeAnaGrpId id): ana_grp_id(id), availability(GW_AVAILABILITY_E::GW_CREATED), last_gw_map_epoch_valid(false)
+    NvmeGwCreated(NvmeAnaGrpId id): ana_grp_id(id), availability(GW_AVAILABILITY_E::GW_CREATED), last_gw_map_epoch_valid(false),
+                                    performed_full_startup(false)
     {
         for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++){
             sm_state[i] = GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE;
@@ -117,7 +118,10 @@ struct NvmeGwCreated {
             blocklist_data[i].is_failover = true;
         }
     };
-
+    void set_unavailable_state(){
+        availability = GW_AVAILABILITY_E::GW_UNAVAILABLE;
+        performed_full_startup = false; // after setting this state the next time monitor sees GW, it expects it performed the full startup
+    }
     void standby_state(NvmeAnaGrpId grpid) {
            sm_state[grpid]       = GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE;
     };
index 61a71ffb205a446386e8437b799c2c9f53866937..923d9d3d7dcd569bc927562c323159a70f670d67 100644 (file)
@@ -296,6 +296,9 @@ void NVMeofGwMonitorClient::handle_nvmeof_gw_map(ceph::ref_t<MNVMeofGwMap> nmap)
   NvmeGwState new_gw_state;
   auto got_new_gw_state = get_gw_state("new map", new_map, group_key, name, new_gw_state); 
 
+  // ensure that the gateway state has not vanished
+  ceph_assert(got_new_gw_state || !got_old_gw_state);
+
   if (!got_old_gw_state) {
     if (!got_new_gw_state) {
       dout(0) << "Can not find new gw state" << dendl;
index 89c72df38499b9d0c32f10abe1ae4d188bf6bd72..3f17998b1a36f293eda212dcc5fea6c5d9dab3f0 100644 (file)
@@ -40,19 +40,21 @@ void test_NVMeofGwMap() {
   pending_map.cfg_add_gw("GW3" ,group_key);
   NvmeNonceVector new_nonces = {"abc", "def","hij"};
   pending_map.Created_gws[group_key]["GW1"].nonce_map[1] = new_nonces;
+  pending_map.Created_gws[group_key]["GW1"].performed_full_startup = true;
   for(int i=0; i< MAX_SUPPORTED_ANA_GROUPS; i++){
     pending_map.Created_gws[group_key]["GW1"].blocklist_data[i].osd_epoch = i*2;
     pending_map.Created_gws[group_key]["GW1"].blocklist_data[i].is_failover = false;
   }
 
   pending_map.Created_gws[group_key]["GW2"].nonce_map[2] = new_nonces;
+  dout(0) << " == Dump map before Encode : == " <<dendl;
   dout(0) << pending_map << dendl;
 
   ceph::buffer::list bl;
   pending_map.encode(bl);
   auto p = bl.cbegin();
   pending_map.decode(p);
-  dout(0) << "Dump map after decode encode:" <<dendl;
+  dout(0) << " == Dump map after Decode: == " <<dendl;
   dout(0) << pending_map << dendl;
 }
 
@@ -87,7 +89,6 @@ void test_MNVMeofGwMap() {
   pending_map.cfg_add_gw("GW3" ,group_key);
   NvmeNonceVector new_nonces = {"abc", "def","hij"};
   pending_map.Created_gws[group_key]["GW1"].nonce_map[1] = new_nonces;
-  pending_map.Created_gws[group_key]["GW1"].copied_nonce_map[1] = new_nonces;
   pending_map.Created_gws[group_key]["GW1"].subsystems.push_back(sub);
   for(int i=0; i< MAX_SUPPORTED_ANA_GROUPS; i++){
     pending_map.Created_gws[group_key]["GW1"].blocklist_data[i].osd_epoch = i*2;