From: Venky Shankar Date: Wed, 16 Nov 2022 10:03:08 +0000 (-0500) Subject: mds: account for snapshot items when deciding to split or merge a directory X-Git-Tag: v18.1.0~592^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=47cdb3eff519b9ea69c51a2fbc86c20da155f439;p=ceph-ci.git mds: account for snapshot items when deciding to split or merge a directory Its easy to "overload" a directory object with large number of omap entries by doing the following (one shot or over and over again): - touch dir/file{0..11000} ; create 11000 files (> mds_bal_split_size) - mkdir dir/.snap/snap_a - rm -f dir/file{0..11000} End result - the directory object would have 11000 omap entries since the MDS does not fragment directory snapshots. If the number of such entries exceed `osd_deep_scrub_large_omap_object_key_threshold` (default: 200000), a cluster health warning is generated: Large Omap objects found in pool... CDir::should_merge() does not take into account COW'd inodes and the frags get merged. Fixes: http://tracker.ceph.com/issues/55215 Signed-off-by: Venky Shankar --- diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index d87ddbf63fe..d7c5afd83df 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -3794,7 +3794,7 @@ bool CDir::should_merge() const return false; } - return (int)get_frag_size() < g_conf()->mds_bal_merge_size; + return ((int)get_frag_size() + (int)get_num_snap_items()) < g_conf()->mds_bal_merge_size; } MEMPOOL_DEFINE_OBJECT_FACTORY(CDir, co_dir, mds_co); diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 6e69bc25b8e..58507db970b 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -404,7 +404,7 @@ public: bool should_split() const { return g_conf()->mds_bal_split_size > 0 && - (int)get_frag_size() > g_conf()->mds_bal_split_size; + ((int)get_frag_size() + (int)get_num_snap_items()) > g_conf()->mds_bal_split_size; } bool should_split_fast() const; bool should_merge() const;