]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: account for snapshot items when deciding to split or merge a directory
authorVenky Shankar <vshankar@redhat.com>
Wed, 16 Nov 2022 10:03:08 +0000 (05:03 -0500)
committerVenky Shankar <vshankar@redhat.com>
Mon, 9 Jan 2023 10:37:08 +0000 (16:07 +0530)
Its easy to "overload" a directory object with large number of omap entries by
doing the following (one shot or over and over again):

      - touch dir/file{0..11000} ; create 11000 files (> mds_bal_split_size)
      - mkdir dir/.snap/snap_a
      - rm -f dir/file{0..11000}

End result - the directory object would have 11000 omap entries since the MDS
does not fragment directory snapshots. If the number of such entries exceed
`osd_deep_scrub_large_omap_object_key_threshold` (default: 200000), a cluster
health warning is generated:

       Large Omap objects found in pool...

CDir::should_merge() does not take into account COW'd inodes and the frags
get merged.

Fixes: http://tracker.ceph.com/issues/55215
Signed-off-by: Venky Shankar <vshankar@redhat.com>
(cherry picked from commit 47cdb3eff519b9ea69c51a2fbc86c20da155f439)

src/mds/CDir.cc
src/mds/CDir.h

index 7e18263d85704abe95858a3492c79c561e305bc1..f2ab86c65fb2777447437588df8e9b1947bfe02e 100644 (file)
@@ -3673,7 +3673,7 @@ bool CDir::should_merge() const
       return false;
   }
 
-  return (int)get_frag_size() < g_conf()->mds_bal_merge_size;
+  return ((int)get_frag_size() + (int)get_num_snap_items()) < g_conf()->mds_bal_merge_size;
 }
 
 MEMPOOL_DEFINE_OBJECT_FACTORY(CDir, co_dir, mds_co);
index b2dcdafde756b57a2f363a1290c3ddbb936fdd2c..b01abc200cd8f6e2fa34a5616f9ef268ac1eb5ad 100644 (file)
@@ -398,7 +398,7 @@ public:
 
   bool should_split() const {
     return g_conf()->mds_bal_split_size > 0 &&
-           (int)get_frag_size() > g_conf()->mds_bal_split_size;
+      ((int)get_frag_size() + (int)get_num_snap_items()) > g_conf()->mds_bal_split_size;
   }
   bool should_split_fast() const;
   bool should_merge() const;