From 15479752096d7d927c1e0051cebc6144246d7b2e Mon Sep 17 00:00:00 2001 From: Kamoltat Date: Wed, 13 Dec 2023 16:59:16 +0000 Subject: [PATCH] src/mon/OSDMonitor.cc: more descriptive loggings for crc mismatch Problem: When mons are running different versions, e.g., during upgrades, we sometimes will encounter a crc mismatch between the crc that is generated from the leader MON (version n+1) and the peon MON (version). Which will later then gets resolved when all the MONs are at version n+1. Solution: Provide a more informative loggings when we encounter a CRC mismatch and also output the mon versions so it's easier for us to detect which if the crc mismatch comes from OSDMap running mixed versions or not. Fixes: https://tracker.ceph.com/issues/63389 Signed-off-by: Kamoltat --- src/mon/OSDMonitor.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 79fff068c33..8deaba4f910 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -847,6 +847,7 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap) bufferlist orig_full_bl; get_version_full(osdmap.epoch, orig_full_bl); + dout(20) << __func__ << " mon is running version: " << ceph_version_to_str() << dendl; if (orig_full_bl.length()) { // the primary provided the full map ceph_assert(inc.have_crc); @@ -857,8 +858,12 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap) // sync with the primary for this and all future maps. OSDs // will also be brought back into sync when they discover the // crc mismatch and request a full map from a mon. - derr << __func__ << " full map CRC mismatch, resetting to canonical" - << dendl; + derr << __func__ << " full map CRC mismatch," + << " might be because mons are running mixed versions ..." + << " resetting to canonical" << dendl; + + dout(20) << __func__ << " canonical crc: " << inc.full_crc + << " my crc: " << osdmap.crc << dendl; dout(20) << __func__ << " my (bad) full osdmap:\n"; JSONFormatter jf(true); @@ -2024,6 +2029,8 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t) bufferlist bl; encode(pending_inc, bl, features | CEPH_FEATURE_RESERVED); + dout(20) << __func__ << " mon is running version: " + << ceph_version_to_str() << dendl; dout(20) << " full_crc " << tmp.get_crc() << " inc_crc " << pending_inc.inc_crc << dendl; -- 2.39.5