From 39bdd370edf8253c5b77293385c63fbf9cd552c5 Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Tue, 26 Oct 2021 19:27:06 +0000 Subject: [PATCH] crimson/net: drop crimson-specific check for the addr in ClientIdentFrame. In crimson (but not in the classic OSD) we have an extra check that verifies the address sent by our peer in `ClientIdentFrame` matches `AsyncConnection::target_addr` at our side. In the Rook environment this leads to problems with all cluster entities that are lacking the `ms_learn_addr_from_peer=false` setting in their configurations. This is true for `ceph-mgr`: ``` [root@rook-ceph-tools-698545dc56-zxrrx /]# ceph config show mgr.a ms_learn_addr_from_peer true ``` Unfortunately, testing has shown that: * clients in Rook also lack this extra bit of configuration while * removing the extra check in crimson stops requiring any additional configuration at clients. Although this still might look like a workaround for Rook having `ms_learn_addr_from_peer=false` solely for OSDs, I think we should drop the check to preserve both: * consistency of behaviour between OSD implementations, * compatibility with Ceph clients in existing k8s clusters. ``` INFO 2021-10-26 18:53:26,067 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> unknown.? -@59700] ProtocolV2::start_accept(): target_addr=172.17.0.5:59700/0 DEBUG 2021-10-26 18:53:26,067 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> unknown.? -@59700] TRIGGER ACCEPTING, was NONE DEBUG 2021-10-26 18:53:26,067 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> unknown.? -@59700] SEND(26) banner: len_payload=16, supported=1, required=0, banner="ceph v2 " DEBUG 2021-10-26 18:53:26,068 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> unknown.? -@59700] RECV(10) banner: "ceph v2 " DEBUG 2021-10-26 18:53:26,068 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> unknown.? -@59700] GOT banner: payload_len=16 DEBUG 2021-10-26 18:53:26,068 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> unknown.? -@59700] RECV(16) banner features: supported=1 required=0 DEBUG 2021-10-26 18:53:26,068 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> unknown.? -@59700] WRITE HelloFrame: my_type=osd, peer_addr=172.17.0.5:59700/0 DEBUG 2021-10-26 18:53:26,068 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> unknown.? -@59700] GOT HelloFrame: my_type=client peer_addr=v2:172.17.0.2:6800/1270141526 INFO 2021-10-26 18:53:26,068 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> client.? -@59700] UPDATE: peer_type=client, policy(lossy=true server=true standby=false resetcheck=false) WARN 2021-10-26 18:53:26,068 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> client.? -@59700] my_addr_from_peer v2:172.17.0.2:6800/1270141526 port/nonce DOES match myaddr v2:172.17.0.2:6800/1270141526 DEBUG 2021-10-26 18:53:26,068 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> client.? -@59700] GOT AuthRequestFrame: method=2, preferred_modes={1, 2}, payload_len=174 INFO 2021-10-26 18:53:26,068 [shard 0] monc - added challenge on [osd.0(client) v2:172.17.0.2:6800/1270141526 >> client.? -@59700] DEBUG 2021-10-26 18:53:26,068 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> client.? -@59700] WRITE AuthReplyMoreFrame: payload_len=32 DEBUG 2021-10-26 18:53:26,068 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> client.? -@59700] GOT AuthRequestMoreFrame: payload_len=174 DEBUG 2021-10-26 18:53:26,069 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> client.? -@59700] WRITE AuthDoneFrame: gid=14788, con_mode=crc, payload_len=36 DEBUG 2021-10-26 18:53:26,069 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> client.? -@59700] WRITE AuthSignatureFrame: signature=975c5d3ae09036abcb2ca7d4f7704ee681ca13151d9de2ee29394ec8aed9950c DEBUG 2021-10-26 18:53:26,069 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> client.? -@59700] GOT AuthSignatureFrame: signature=6209032314d560a21a3109ec6d7c0623ebd78cf1ea4fc9462411dbabe28b2d8d DEBUG 2021-10-26 18:53:26,069 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> client.? -@59700] GOT ClientIdentFrame: addrs=172.17.0.1:0/1137248631, target=v2:172.17.0.2:6800/1270141526, gid=14788, gs=9, features_supported=4540138297136906239, features_required=576460752303427584, flags=1, cookie=0 WARN 2021-10-26 18:53:26,069 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> client.? -@59700] peer's address 172.17.0.1:0/1137248631 is not v2 or not the same host with 172.17.0.5:59700/0 INFO 2021-10-26 18:53:26,070 [shard 0] ms - [osd.0(client) v2:172.17.0.2:6800/1270141526 >> client.? -@59700] execute_accepting(): fault at ACCEPTING, going to CLOSING -- std::system_error (error crimson::net:2, bad peer address) ``` This connectivity issue has been overcome by appending `--ms_learn_addr_from_peer=false` to the `argv`: ``` [root@rook-ceph-tools-698545dc56-zxrrx /]# bin/rados bench -p test-pool 5 rand --ms_learn_addr_from_peer=false hints = 1 sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s) 0 0 0 0 0 0 - 0 1 16 1235 1219 4.76106 4.76172 0.020999 0.0129408 2 16 2531 2515 4.91158 5.0625 0.0131776 0.0126796 3 16 3746 3730 4.8563 4.74609 0.0145268 0.0128361 4 16 4951 4935 4.81889 4.70703 0.0154604 0.0129421 5 15 6236 6221 4.85972 5.02344 0.0121689 0.0128415 Total time run: 5.01136 Total reads made: 6236 Read size: 4096 Object size: 4096 Bandwidth (MB/sec): 4.86083 Average IOPS: 1244 Stddev IOPS: 43.1706 Max IOPS: 1296 Min IOPS: 1205 Average Latency(s): 0.01284 Max latency(s): 0.0244048 Min latency(s): 0.00201867 ``` However, on classical OSD and **crimson with this patch applied** there is no need for any configurables at the client-side: ``` [rook@rook-ceph-tools-698545dc56-xkkpf /]$ bin/rados bench -p test-pool 5 rand hints = 1 sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s) 0 0 0 0 0 0 - 0 1 16 1124 1108 4.32747 4.32812 0.011878 0.0143472 2 16 2323 2307 4.50534 4.68359 0.0117413 0.0138221 3 16 3517 3501 4.55813 4.66406 0.0195142 0.0136663 4 16 4680 4664 4.55425 4.54297 0.0131425 0.0136958 5 16 5725 5709 4.45976 4.08203 0.0143174 0.0139868 Total time run: 5.01332 Total reads made: 5725 Read size: 4096 Object size: 4096 Bandwidth (MB/sec): 4.46077 Average IOPS: 1141 Stddev IOPS: 65.113 Max IOPS: 1199 Min IOPS: 1045 Average Latency(s): 0.0139892 Max latency(s): 0.0361518 Min latency(s): 0.00231195 ``` Signed-off-by: Radoslaw Zarzynski --- src/crimson/net/ProtocolV2.cc | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/crimson/net/ProtocolV2.cc b/src/crimson/net/ProtocolV2.cc index b7aef560b682e..fa47da50832c0 100644 --- a/src/crimson/net/ProtocolV2.cc +++ b/src/crimson/net/ProtocolV2.cc @@ -1209,18 +1209,7 @@ ProtocolV2::server_connect() throw std::system_error( make_error_code(crimson::net::error::bad_peer_address)); } - // TODO: change peer_addr to entity_addrvec_t - entity_addr_t paddr = client_ident.addrs().front(); - if ((paddr.is_msgr2() || paddr.is_any()) && - paddr.is_same_host(conn.target_addr)) { - // good - } else { - logger().warn("{} peer's address {} is not v2 or not the same host with {}", - conn, paddr, conn.target_addr); - throw std::system_error( - make_error_code(crimson::net::error::bad_peer_address)); - } - conn.peer_addr = paddr; + conn.peer_addr = client_ident.addrs().front(); logger().debug("{} UPDATE: peer_addr={}", conn, conn.peer_addr); conn.target_addr = conn.peer_addr; if (!conn.policy.lossy && !conn.policy.server && conn.target_addr.get_port() <= 0) { -- 2.39.5