]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
msg/async: ProtocolV2::send_server_ident update ProtocolV2::global_seq wip-ksirivad-fix-msg-v2
authorKamoltat Sirivadhna <ksirivad@redhat.com>
Fri, 18 Jul 2025 05:00:18 +0000 (05:00 +0000)
committerKamoltat Sirivadhna <ksirivad@redhat.com>
Fri, 18 Jul 2025 05:13:04 +0000 (05:13 +0000)
In ProtocolV2::send_server_ident(), the global_seq was being fetched
from messenger->get_global_seq() and used in the ServerIdentFrame,
however, it is stored in a local var and not the private class var
ProtocolV2::global_seq. This causes problems like
where the receiving peer sees a peer_global_seq that
appears older than expected, triggering a false-positive reconnect logic:

```
2025-07-15T11:40:50.927+0000 mon.c handle_existing_connection client has clearly restarted
(peer_global_seq < ex_peer_global_seq && cookie changed), dropping existing connection=0x563ffe9a9000 in favor of new one
```

In this case, mon.c received a peer_global_seq=75, which was already logged by mon.d as gs=79 in
its send_server_ident()—but ProtocolV2::global_seq was never updated, resulting in inconsistent state and premature connection teardown.

This commit fixes the issue by assigning the freshly incremented messenger->get_global_seq() value to the local global_seq field in ProtocolV2 as well,
ensuring consistency in the protocol.

Fixes: https://tracker.ceph.com/issues/71344
Signed-off-by: Kamoltat Sirivadhna <ksirivad@redhat.com>
src/msg/async/AsyncMessenger.cc
src/msg/async/ProtocolV2.cc
src/msg/async/ProtocolV2.h

index 244716060f08d706b3f40b9f0157577012ee3ce0..3bd574085ffc505cd1c144cb9c71c7d6374b42e0 100644 (file)
@@ -1066,7 +1066,7 @@ __u32 AsyncMessenger::get_global_seq(__u32 old)
       ldout(cct, 10) << __func__ << " old=" << old
         << " > global_seq=" << global_seq
         << "; new global_seq=" << old << dendl;
-    global_seq = old;
+      global_seq = old;
   }
   __u32 ret = ++global_seq;
   ldout(cct, 10) << __func__ << " increment to global_seq=" << global_seq << dendl;
index cc145c4557a5a237a07934ff85c83751f51846cb..9611d174704d6e91224a85486646296837939db1 100644 (file)
@@ -461,7 +461,7 @@ void ProtocolV2::send_message(Message *m) {
     m->trace.event("async enqueueing message");
     out_queue[m->get_priority()].emplace_back(
       out_queue_entry_t{is_prepared, m});
-    ldout(cct, 15) << __func__ << " inline write is denied, reschedule m=" << m
+    ldout(cct, 15) << __func__ << " message queued for async transmission m=" << m
                    << dendl;
     if (((!replacing && can_write) || state == STANDBY) && !write_in_progress) {
       write_in_progress = true;
@@ -2679,8 +2679,11 @@ CtPtr ProtocolV2::handle_existing_connection(const AsyncConnectionRef& existing)
   if (peer_global_seq < exproto->peer_global_seq &&
       exproto->client_cookie && client_cookie &&
       exproto->client_cookie != client_cookie) {
-    ldout(cct, 1) << __func__ << " client has clearly restarted (peer_global_seq < ex_peer_global_seq && cookie changed), "
-                  << "dropping existing connection=" << existing << " in favor of new one" << dendl;
+    ldout(cct, 1) << __func__ << " client has clearly restarted (peer_global_seq="
+                  << peer_global_seq << " < ex_peer_global_seq=" << exproto->peer_global_seq
+                  << " && cookie changed: client_cookie=" << client_cookie << " != ex_client_cookie="
+                  << exproto->client_cookie << "), "
+                  << "dropping existing_connection=" << existing << " in favor of new_connection=" << connection << dendl;
     existing->protocol->stop();
     existing->dispatch_queue->queue_reset(existing.get());
     l.unlock();
@@ -2932,11 +2935,11 @@ CtPtr ProtocolV2::send_server_ident() {
     flags = flags | CEPH_MSG_CONNECT_LOSSY;
   }
 
-  uint64_t gs = messenger->get_global_seq();
+  global_seq = messenger->get_global_seq();
   auto server_ident = ServerIdentFrame::Encode(
           messenger->get_myaddrs(),
           messenger->get_myname().num(),
-          gs,
+          global_seq,
           connection->policy.features_supported,
           connection->policy.features_required | msgr2_required,
           flags,
@@ -2945,7 +2948,7 @@ CtPtr ProtocolV2::send_server_ident() {
   ldout(cct, 5) << __func__ << " sending identification:"
                 << " addrs=" << messenger->get_myaddrs()
                 << " gid=" << messenger->get_myname().num()
-                << " global_seq=" << gs << " features_supported=" << std::hex
+                << " global_seq=" << global_seq << " features_supported=" << std::hex
                 << connection->policy.features_supported
                 << " features_required="
                            << (connection->policy.features_required | msgr2_required)
index aa7006b03216775082845a59d38f89c9a1374ea9..26840b2d515a78b755b756f73018e2474d4b2413 100644 (file)
@@ -82,7 +82,7 @@ private:
 
   uint64_t client_cookie;
   uint64_t server_cookie;
-  uint64_t global_seq;
+  uint64_t global_seq; // Snapshot of AsyncMessenger::global_seq
   uint64_t connect_seq;
   uint64_t peer_global_seq;
   uint64_t message_seq;