From 539d26adb82655e28ffc22654bc8764112cd94e1 Mon Sep 17 00:00:00 2001 From: "Kamoltat (Junior) Sirivadhna" Date: Thu, 19 Jun 2025 10:23:55 -0400 Subject: [PATCH] Revert "[DNM] ProtocolV2: make handle_existing_connection check for cookie mismatch first" --- qa/config/rados.yaml | 1 - qa/suites/rados/monthrash/ceph.yaml | 1 - qa/tasks/mon_thrash.py | 5 ----- src/msg/async/ProtocolV2.cc | 30 +++++++++++++---------------- src/vstart.sh | 2 +- 5 files changed, 14 insertions(+), 25 deletions(-) diff --git a/qa/config/rados.yaml b/qa/config/rados.yaml index ad006bab444d7..710847f594b8f 100644 --- a/qa/config/rados.yaml +++ b/qa/config/rados.yaml @@ -11,4 +11,3 @@ overrides: osd mclock profile: high_recovery_ops mon: mon scrub interval: 300 - debug mon: 30 \ No newline at end of file diff --git a/qa/suites/rados/monthrash/ceph.yaml b/qa/suites/rados/monthrash/ceph.yaml index f862813990041..8055fe3722140 100644 --- a/qa/suites/rados/monthrash/ceph.yaml +++ b/qa/suites/rados/monthrash/ceph.yaml @@ -13,7 +13,6 @@ overrides: mon osdmap full prune txsize: 2 mon scrub inject crc mismatch: 0.01 mon scrub inject missing keys: 0.05 - debug ms: 20 # thrashing monitors may make mgr have trouble w/ its keepalive log-ignorelist: - ScrubResult diff --git a/qa/tasks/mon_thrash.py b/qa/tasks/mon_thrash.py index 97fa38983a6f9..84b0b6c521b32 100644 --- a/qa/tasks/mon_thrash.py +++ b/qa/tasks/mon_thrash.py @@ -354,13 +354,11 @@ class MonitorThrasher(Thrasher): if mons_to_freeze: for mon in mons_to_freeze: - self.log('freezing mon.{m}'.format(m=mon)) self.freeze_mon(mon) self.log('waiting for {delay} secs to unfreeze mons'.format( delay=self.freeze_mon_duration)) time.sleep(self.freeze_mon_duration) for mon in mons_to_freeze: - self.log('unfreezing mon.{m}'.format(m=mon)) self.unfreeze_mon(mon) if self.maintain_quorum: @@ -384,18 +382,15 @@ class MonitorThrasher(Thrasher): self.switch_task() for mon in mons_to_kill: - self.log('reviving mon.{m}'.format(m=mon)) self.revive_mon(mon) # do more freezes if mons_to_freeze: for mon in mons_to_freeze: - self.log('freezing mon.{m}'.format(m=mon)) self.freeze_mon(mon) self.log('waiting for {delay} secs to unfreeze mons'.format( delay=self.freeze_mon_duration)) time.sleep(self.freeze_mon_duration) for mon in mons_to_freeze: - self.log('unfreezing mon.{m}'.format(m=mon)) self.unfreeze_mon(mon) self.manager.wait_for_mon_quorum_size(len(mons)) diff --git a/src/msg/async/ProtocolV2.cc b/src/msg/async/ProtocolV2.cc index bf010e4396e6a..58e4f4df21df5 100644 --- a/src/msg/async/ProtocolV2.cc +++ b/src/msg/async/ProtocolV2.cc @@ -1894,7 +1894,6 @@ CtPtr ProtocolV2::handle_auth_done(ceph::bufferlist &payload) } CtPtr ProtocolV2::finish_client_auth() { - ldout(cct, 20) << __func__ << dendl; if (HAVE_MSGR2_FEATURE(peer_supported_features, COMPRESSION)) { return send_compression_request(); } @@ -1903,7 +1902,6 @@ CtPtr ProtocolV2::finish_client_auth() { } CtPtr ProtocolV2::finish_server_auth() { - ldout(cct, 20) << __func__ << dendl; // server had sent AuthDone and client responded with correct pre-auth // signature. // We can start conditioanl msgr protocol @@ -1920,12 +1918,10 @@ CtPtr ProtocolV2::finish_server_auth() { CtPtr ProtocolV2::start_session_connect() { if (!server_cookie) { - ldout(cct, 20) << __func__ << " starting a new session" << dendl; ceph_assert(connect_seq == 0); state = SESSION_CONNECTING; return send_client_ident(); } else { // reconnecting to previous session - ldout(cct, 20) << __func__ << " reconnecting to session" << dendl; state = SESSION_RECONNECTING; ceph_assert(connect_seq > 0); return send_reconnect(); @@ -2675,19 +2671,6 @@ CtPtr ProtocolV2::handle_existing_connection(const AsyncConnectionRef& existing) return WRITE(wait, "wait", read_frame); } - if (exproto->server_cookie && exproto->client_cookie && - exproto->client_cookie != client_cookie) { - // Found previous session - // peer has reseted and we're going to reuse the existing connection - // by replacing the communication socket - ldout(cct, 1) << __func__ << " found previous session existing=" << existing - << ", peer must have reseted." << dendl; - if (connection->policy.resetcheck) { - exproto->reset_session(); - } - return reuse_connection(existing, exproto); - } - if (exproto->peer_global_seq > peer_global_seq) { ldout(cct, 1) << __func__ << " this is a stale connection, peer_global_seq=" << peer_global_seq @@ -2710,6 +2693,19 @@ CtPtr ProtocolV2::handle_existing_connection(const AsyncConnectionRef& existing) return send_server_ident(); } + if (exproto->server_cookie && exproto->client_cookie && + exproto->client_cookie != client_cookie) { + // Found previous session + // peer has reseted and we're going to reuse the existing connection + // by replacing the communication socket + ldout(cct, 1) << __func__ << " found previous session existing=" << existing + << ", peer must have reseted." << dendl; + if (connection->policy.resetcheck) { + exproto->reset_session(); + } + return reuse_connection(existing, exproto); + } + if (exproto->client_cookie == client_cookie) { // session establishment interrupted between client_ident and server_ident, // continuing... diff --git a/src/vstart.sh b/src/vstart.sh index 15b721d653ce8..63c23f38d8476 100755 --- a/src/vstart.sh +++ b/src/vstart.sh @@ -1622,7 +1622,7 @@ else debug echo "** going verbose **" CMONDEBUG=' debug osd = 20 - debug mon = 30 + debug mon = 20 debug osd = 20 debug paxos = 20 debug auth = 20 -- 2.39.5