From: Patrick Donnelly Date: Mon, 18 May 2026 14:30:02 +0000 (-0400) Subject: osdc: atomize strand tracking and queuing X-Git-Tag: testing/wip-pdonnell-testing-20260520.191703-main^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4908aebd6792bb408f3f4e9e8b201f45a89ff9bb;p=ceph-ci.git osdc: atomize strand tracking and queuing This expands the scope of the strand_track_lock in track_enqueue to cover both the deque insertion and the boost::asio::post call. This ensures that the tracked message order in the deque mirrors the execution order in the strand, preventing race conditions during track_dequeue. New benchmarks: main: Maintaining 512 concurrent writes of 4194304 bytes to objects of size 4194304 for up to 30 seconds or 0 objects Object prefix: benchmark_data_sockeni03.front.sepia.ceph.co_4190499 sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s) 0 0 0 0 0 0 - 0 1 511 772 261 1043.68 1044 0.602092 0.623632 2 511 1573 1062 2123.42 3204 0.659609 0.628934 3 511 2312 1801 2400.7 2956 0.695258 0.645583 4 511 3037 2526 2525.35 2900 0.713942 0.663253 5 511 3741 3230 2583.34 2816 0.752765 0.676368 6 511 4469 3958 2638 2912 0.706926 0.683916 7 511 5170 4659 2661.61 2804 0.741449 0.687722 8 511 5863 5352 2675.32 2772 0.746016 0.694277 9 511 6560 6049 2687.76 2788 0.746805 0.700263 10 511 7264 6753 2700.51 2816 0.740369 0.702443 11 511 7990 7479 2718.94 2904 0.687135 0.704563 12 511 8731 8220 2739.3 2964 0.692164 0.703055 13 511 9463 8952 2753.76 2928 0.71129 0.701936 14 511 10177 9666 2761.01 2856 0.731789 0.703143 15 511 10885 10374 2765.7 2832 0.722195 0.702378 16 511 11636 11125 2780.54 3004 0.658004 0.704229 17 511 12354 11843 2785.88 2872 0.714033 0.703798 18 511 13022 12511 2779.51 2672 0.730546 0.706917 19 511 13725 13214 2781.19 2812 0.746641 0.707174 2026-05-18T18:24:48.615814+0000 min lat: 0.593545 max lat: 0.798149 avg lat: 0.708467 sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s) 20 511 14446 13935 2786.29 2884 0.704788 0.708467 21 511 15124 14613 2782.71 2712 0.752159 0.710038 22 511 15830 15319 2784.55 2824 0.74101 0.710123 23 511 16597 16086 2796.84 3068 0.669602 0.70965 24 511 17307 16796 2798.61 2840 0.721002 0.710038 25 511 17996 17485 2796.88 2756 0.773205 0.709387 26 511 18718 18207 2800.35 2888 0.722115 0.710987 27 511 19424 18913 2801.2 2824 0.72963 0.71117 28 511 20123 19612 2800.99 2796 0.759028 0.710816 29 511 20793 20282 2796.79 2680 0.772401 0.713454 30 364 21457 21093 2811.67 3244 0.562247 0.714675 Total time run: 30.0382 Total writes made: 21457 Write size: 4194304 Object size: 4194304 Bandwidth (MB/sec): 2857.29 Stddev Bandwidth: 358.285 Max bandwidth (MB/sec): 3244 Min bandwidth (MB/sec): 1044 Average IOPS: 714 Stddev IOPS: 89.5711 Max IOPS: 811 Min IOPS: 261 Average Latency(s): 0.707111 Stddev Latency(s): 0.0722934 Max latency(s): 0.864572 Min latency(s): 0.0251892 PATCHED: Object prefix: benchmark_data_sockeni03.front.sepia.ceph.co_4190236 sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s) 0 0 0 0 0 0 - 0 1 511 882 371 1483.61 1484 0.5225 0.55758 2 511 1927 1416 2831.37 4180 0.497644 0.508555 3 511 2953 2442 3255.31 4104 0.520767 0.502161 4 511 3986 3475 3474.28 4132 0.496781 0.502651 5 511 5011 4500 3599.23 4100 0.509399 0.500694 6 511 5997 5486 3656.56 3944 0.548741 0.501145 7 511 7018 6507 3717.51 4084 0.492396 0.504786 8 511 8041 7530 3764.2 4092 0.504898 0.503412 9 511 9067 8556 3801.86 4104 0.502205 0.502793 10 511 10101 9590 3835.19 4136 0.487124 0.50264 11 511 11127 10616 3859.55 4104 0.499829 0.501867 12 511 12165 11654 3883.84 4152 0.49328 0.501305 13 511 13179 12668 3897.01 4056 0.486069 0.501655 14 511 14226 13715 3917.73 4188 0.491326 0.500687 15 511 15251 14740 3929.83 4100 0.503174 0.500373 16 511 16257 15746 3935.67 4024 0.499083 0.501023 17 511 17295 16784 3948.34 4152 0.496384 0.5007 18 511 18323 17812 3957.38 4112 0.493464 0.500522 19 511 19345 18834 3964.21 4088 0.501001 0.500408 2026-05-18T18:22:31.303461+0000 min lat: 0.473011 max lat: 0.657575 avg lat: 0.500032 sec Cur ops started finished avg MB/s cur MB/s last lat(s) avg lat(s) 20 511 20394 19883 3975.76 4196 0.484456 0.500032 21 511 21428 20917 3983.34 4136 0.49132 0.499649 22 511 22435 21924 3985.34 4028 0.519046 0.499929 23 511 23428 22917 3984.72 3972 0.517337 0.500518 24 511 24460 23949 3990.66 4128 0.489751 0.500573 25 511 25487 24976 3995.32 4108 0.497512 0.500408 26 511 26479 25968 3994.23 3968 0.532355 0.50066 27 511 27483 26972 3995.01 4016 0.516575 0.501108 28 511 28516 28005 3999.87 4132 0.504353 0.501063 29 511 29528 29017 4001.5 4048 0.496005 0.501335 30 17 30524 30507 4066.75 5960 0.0224411 0.497765 Total time run: 30.0327 Total writes made: 30524 Write size: 4194304 Object size: 4194304 Bandwidth (MB/sec): 4065.43 Stddev Bandwidth: 598.483 Max bandwidth (MB/sec): 5960 Min bandwidth (MB/sec): 1484 Average IOPS: 1016 Stddev IOPS: 149.621 Max IOPS: 1490 Min IOPS: 371 Average Latency(s): 0.497511 Stddev Latency(s): 0.0378263 Max latency(s): 0.657575 Min latency(s): 0.0224411 Signed-off-by: Patrick Donnelly --- diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index e14c76c00b0..03ab1ec4335 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -1058,8 +1058,7 @@ void Objecter::ms_fast_dispatch2(const MessageRef& m) auto priv = m->get_connection()->get_priv(); auto s = static_cast(priv.get()); if (s) { - s->track_enqueue(m); - boost::asio::post(s->strand, [this, priv, s, m]() { + s->track_enqueue(m, [this, priv, s, m]() { cref_t msg = ref_cast(m); s->track_dequeue(m); handle_osd_op_reply(std::move(msg)); @@ -1074,8 +1073,7 @@ void Objecter::ms_fast_dispatch2(const MessageRef& m) auto priv = m->get_connection()->get_priv(); auto s = static_cast(priv.get()); if (s) { - s->track_enqueue(m); - boost::asio::post(s->strand, [this, priv, s, m]() { + s->track_enqueue(m, [this, priv, s, m]() { cref_t msg = ref_cast(m); s->track_dequeue(m); handle_watch_notify(std::move(msg)); @@ -1099,8 +1097,7 @@ Dispatcher::dispatch_result_t Objecter::ms_dispatch2(const MessageRef& m) auto priv = m->get_connection()->get_priv(); auto s = static_cast(priv.get()); if (s) { - s->track_enqueue(m); - boost::asio::post(s->strand, [this, priv, s, m]() { + s->track_enqueue(m, [this, priv, s, m]() { cref_t msg = ref_cast(m); s->track_dequeue(m); handle_osd_backoff(std::move(msg)); @@ -1118,8 +1115,7 @@ Dispatcher::dispatch_result_t Objecter::ms_dispatch2(const MessageRef& m) auto priv = m->get_connection()->get_priv(); auto s = static_cast(priv.get()); if (s) { - s->track_enqueue(m); - boost::asio::post(s->strand, [this, priv, s, m]() { + s->track_enqueue(m, [this, priv, s, m]() { cref_t msg = ref_cast(m); s->track_dequeue(m); handle_command_reply(std::move(msg)); diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 9f87e4ad0ef..ca884accb7d 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -2509,18 +2509,19 @@ public: std::mutex strand_track_lock; std::deque queued_messages; - void track_enqueue(const MessageRef& m) { + template + void track_enqueue(const MessageRef& m, Callable&& f) { std::lock_guard l(strand_track_lock); queued_messages.push_back(m); + boost::asio::post(strand, std::forward(f)); } void track_dequeue(const MessageRef& m) { std::lock_guard l(strand_track_lock); - if (!queued_messages.empty()) { - auto const& _m = queued_messages.front(); - ceph_assert(_m == m); - queued_messages.pop_front(); - } + ceph_assert(!queued_messages.empty()); + auto const& _m = queued_messages.front(); + ceph_assert(_m == m); + queued_messages.pop_front(); } int incarnation;