From 1dc78fd5b2d11cfe4d2a990b27ae02eee1c91af3 Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Mon, 1 Mar 2021 21:50:36 +0000 Subject: [PATCH] crimson/monc: close() active_con before destructing it on resets. `ProtocolV2` expects `AuthClient` implementations to withstand calling `get_auth_request()` and `handle_auth_reply_more()` even if `handle_auth_done()` had been already called. This is because a network fault may happen on e.g. `AuthSignatureFrame` which is put on the wire after the `AuthDone` handling. `crimson::mon::Client` deals with that by returning `auth::error` from both `get_auth_request()` and `handle_auth_reply_more()` as the preceding invocation of `handle_auth_done()` had already cleared `pending_conns` (and set `active_con`). This leads to `abort_in_close()` and finally to dispatching `ms_handle_reset()` on `mon::Client` which is fine in general but, when comes to the current implementation, it destroys `active_con` without closing it first. One of the consequence is breaking the `mon::Connection::reply` promise; another one is missed `mark_down()` call. ``` DEBUG 2021-03-01 18:10:50,489 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@56752 >> mon.? v2:172.21.15.110:3300/0] GOT AuthDoneFrame: gid=4121, con_mode=se cure, payload_len=995 DEBUG 2021-03-01 18:10:50,489 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@56752 >> mon.? v2:172.21.15.110:3300/0] WRITE AuthSignatureFrame: signature=60ca f49e5a6cf3cc39c4160cb9d09032db5f794e29655dc0124cf5f42b7546fb DEBUG 2021-03-01 18:10:50,489 [shard 0] ms - authenticated_encrypt_update plaintext.length()=80 buffer.length()=80 DEBUG 2021-03-01 18:10:50,489 [shard 0] ms - authenticated_encrypt_final buffer.length()=96 final_len=0 INFO 2021-03-01 18:10:50,489 [shard 0] monc - found mon.noname-a INFO 2021-03-01 18:10:50,489 [shard 0] monc - sending auth(proto 2 2 bytes epoch 0) v1 INFO 2021-03-01 18:10:50,489 [shard 0] monc - waiting DEBUG 2021-03-01 18:10:50,489 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@56752 >> mon.? v2:172.21.15.110:3300/0] GOT AuthSignatureFrame: signature=ea04f1 318cf76808414a853ed37fd232ae886bef036cb4248079c6cba89d669a DEBUG 2021-03-01 18:10:50,490 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@56752 >> mon.? v2:172.21.15.110:3300/0] WRITE ClientIdentFrame: addrs=v2:172.21. 15.110:6800/33954, target=v2:172.21.15.110:3300/0, gid=0, gs=1, features_supported=4540138303579357183, features_required=576460752303432193, flags=1, cookie=9231904580 14536120 ... INFO 2021-03-01 18:10:50,490 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@56752 >> mon.? v2:172.21.15.110:3300/0] execute_connecting(): fault at CONNECTIN G, going to WAIT -- std::system_error (error crimson::net:4, read eof) ... DEBUG 2021-03-01 18:10:50,690 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@0 >> mon.? v2:172.21.15.110:3300/0] GOT HelloFrame: my_type=mon peer_addr=v2:172.21.15.110:63960/0 INFO 2021-03-01 18:10:50,690 [shard 0] monc - get_auth_request(con=[osd.0(client) v2:172.21.15.110:6800/33954@63960 >> mon.? v2:172.21.15.110:3300/0], auth_method=0) ERROR 2021-03-01 18:10:50,690 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@63960 >> mon.? v2:172.21.15.110:3300/0] get_initial_auth_request returned crimson::auth::error (unknown connection) INFO 2021-03-01 18:10:50,690 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@63960 >> mon.? v2:172.21.15.110:3300/0] closing: reset yes, replace no DEBUG 2021-03-01 18:10:50,690 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@63960 >> mon.? v2:172.21.15.110:3300/0] TRIGGER CLOSING, was CONNECTING ... INFO 2021-03-01 18:10:50,690 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@63960 >> mon.? v2:172.21.15.110:3300/0] write_event: dropped INFO 2021-03-01 18:10:50,690 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@63960 >> mon.? v2:172.21.15.110:3300/0] execute_connecting(): protocol aborted at CLOSING -- std::system_error (error crimson::net:6, protocol aborted) INFO 2021-03-01 18:10:50,690 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@63960 >> mon.? v2:172.21.15.110:3300/0] closing: reset yes, replace no DEBUG 2021-03-01 18:10:50,690 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@63960 >> mon.? v2:172.21.15.110:3300/0] TRIGGER CLOSING, was CONNECTING WARN 2021-03-01 18:10:50,690 [shard 0] osd - ms_handle_reset WARN 2021-03-01 18:10:50,690 [shard 0] monc - active conn reset v2:172.21.15.110:3300/0 INFO 2021-03-01 18:10:50,690 [shard 0] monc - reopen_session to mon.-1 WARN 2021-03-01 18:10:50,690 [shard 0] monc - mon.0 does not have an addr compatible with me INFO 2021-03-01 18:10:50,690 [shard 0] monc - connecting to mon.1 INFO 2021-03-01 18:10:50,690 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954 >> mon.? v2:172.21.15.110:3300/0] ProtocolV2::start_connect(): peer_addr=v2:172.21.15.110:3300/0, peer_name=mon.?, cc=14512795460730278364 policy(lossy=true, server=false, standby=false, resetcheck=false) DEBUG 2021-03-01 18:10:50,690 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954 >> mon.? v2:172.21.15.110:3300/0] TRIGGER CONNECTING, was NONE DEBUG 2021-03-01 18:10:50,690 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954 >> mon.? v2:172.21.15.110:3300/0] UPDATE: gs=3 for connect INFO 2021-03-01 18:10:50,690 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954 >> mon.? v2:172.21.15.110:3300/0] write_event: delay ... INFO 2021-03-01 18:10:50,690 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@63960 >> mon.? v2:172.21.15.110:3300/0] write_event: dropped INFO 2021-03-01 18:10:50,690 [shard 0] ms - [osd.0(client) v2:172.21.15.110:6800/33954@63960 >> mon.? v2:172.21.15.110:3300/0] execute_connecting(): protocol aborted at CLOSING -- std::system_error (error crimson::net:6, protocol aborted) ... " WARN 2021-03-01 18:10:50,690 [shard 0] seastar - Exceptional future ignored: seastar::broken_promise (broken promise), backtrace: 0x146f364 0x146f6e1 0x146fb01 0x135c2fe 0x135c481 0x6ee079 0x137db87 0x137def2 0x13ab085 0x1347b27 0x6619f5 /lib64/libc.so.6+0x237b2 0x6b217d -------- N7seastar12continuationINS_8internal22promise_base_with_typeIvEENS_6futureIvE12finally_bodyIZNS_5asyncIZZ4mainENKUlvE_clEvEUlvE_JEEENS_8futurizeINSt9result_ofIFNSt5decayIT_E4typeEDpNSC_IT0_E4typeEEE4typeEE4typeENS_17thread_attributesEOSD_DpOSG_EUlvE1_Lb0EEEZNS5_17then_wrapped_nrvoIS5_SU_EENSA_ISD_E4typeEOT0_EUlOS3_RSU_ONS_12future_stateINS1_9monostateEEEE_vEE ``` Signed-off-by: Radoslaw Zarzynski --- src/crimson/mon/MonClient.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/crimson/mon/MonClient.cc b/src/crimson/mon/MonClient.cc index 0e6df9ae2e6af..7baa3b34f1fc7 100644 --- a/src/crimson/mon/MonClient.cc +++ b/src/crimson/mon/MonClient.cc @@ -515,6 +515,7 @@ void Client::ms_handle_reset(crimson::net::ConnectionRef conn, bool /* is_replac return seastar::now(); } else if (active_con && active_con->is_my_peer(conn->get_peer_addr())) { logger().warn("active conn reset {}", conn->get_peer_addr()); + active_con->close(); active_con.reset(); return reopen_session(-1).then([this](bool opened) { if (opened) { -- 2.47.3