From c743ea7d0046d943091066b541a0fe32d38acd7d Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 9 Mar 2017 09:17:06 -0500 Subject: [PATCH] mds: enable dirfrags by default in new filesystems Signed-off-by: john Spray --- PendingReleaseNotes | 4 +++- doc/cephfs/dirfrags.rst | 5 +++-- doc/cephfs/experimental-features.rst | 29 ++++++++++++++++------------ src/common/config_opts.h | 2 +- src/include/ceph_fs.h | 2 ++ src/mds/MDSMap.h | 2 +- 6 files changed, 27 insertions(+), 17 deletions(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 47f5b55e3c650..5ecdae2050db8 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -20,7 +20,9 @@ string will no longer be able to set quotas or any layout fields. This flag previously only restricted modification of the pool and namespace fields in layouts. - +* CephFS directory fragmentation (large directory support) is enabled + by default on new filesystems. To enable it on existing filesystems + use "ceph fs set allow_dirfrags". 12.0.0 ------ diff --git a/doc/cephfs/dirfrags.rst b/doc/cephfs/dirfrags.rst index e6bd045b2251d..717553fea9afc 100644 --- a/doc/cephfs/dirfrags.rst +++ b/doc/cephfs/dirfrags.rst @@ -14,7 +14,7 @@ here should be left at their default values. While directory fragmentation enables CephFS to handle very large numbers of entries in a single directory, application programmers should -remain cautious about creating very large directories, as they still +remain conservative about creating very large directories, as they still have a resource cost in situations such as a CephFS client listing the directory, where all the fragments must be loaded at once. @@ -27,7 +27,8 @@ Splitting and merging An MDS will only consider doing splits and merges if the ``mds_bal_frag`` setting is true in the MDS's configuration file, and the allow_dirfrags -setting is true in the filesystem map (set on the mons). +setting is true in the filesystem map (set on the mons). These settings +are both true by default since the *Luminous* (12.2.x) release of Ceph. When an MDS identifies a directory fragment to be split, it does not do the split immediately. Because splitting interrupts metadata IO, diff --git a/doc/cephfs/experimental-features.rst b/doc/cephfs/experimental-features.rst index 1f6e3c2af41cb..bdfa998a99e2c 100644 --- a/doc/cephfs/experimental-features.rst +++ b/doc/cephfs/experimental-features.rst @@ -12,18 +12,6 @@ what is required to enable them. Note that doing so will *irrevocably* flag maps in the monitor as having once enabled this flag to improve debugging and support processes. - -Directory Fragmentation ------------------------ -CephFS directories are generally stored within a single RADOS object. But this has -certain negative results once they become large enough. The filesystem is capable -of "fragmenting" these directories into multiple objects. There are no known bugs -with doing so but it is not sufficiently tested to support at this time. - -Directory fragmentation has always been off by default and required setting -```mds bal frag = true`` in the MDS' config file. It has been further protected -by requiring the user to set the "allow_dirfrags" flag for Jewel. - Inline data ----------- By default, all CephFS file data is stored in RADOS objects. The inline data @@ -97,3 +85,20 @@ and may not work together; see above. Multiple filesystems were available starting in the Jewel release candidates but were protected behind the "enable_multiple" flag before the final release. + + +Previously experimental features +================================ + +Directory Fragmentation +----------------------- + +Directory fragmentation was considered experimental prior to the *Luminous* +(12.2.x). It is now enabled by default on new filesystems. To enable directory +fragmentation on filesystems created with older versions of Ceph, set +the ``allow_dirfrags`` flag on the filesystem: + +:: + + ceph fs set allow_dirfrags + diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 8c22e3837824d..222d147a89c92 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -526,7 +526,7 @@ OPTION(mds_log_max_expiring, OPT_INT, 20) OPTION(mds_bal_sample_interval, OPT_DOUBLE, 3.0) // every 3 seconds OPTION(mds_bal_replicate_threshold, OPT_FLOAT, 8000) OPTION(mds_bal_unreplicate_threshold, OPT_FLOAT, 0) -OPTION(mds_bal_frag, OPT_BOOL, false) +OPTION(mds_bal_frag, OPT_BOOL, true) OPTION(mds_bal_split_size, OPT_INT, 10000) OPTION(mds_bal_split_rd, OPT_FLOAT, 25000) OPTION(mds_bal_split_wr, OPT_FLOAT, 10000) diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 2d1486d94c737..479999d06c00e 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -239,6 +239,8 @@ struct ceph_mon_subscribe_ack { #define CEPH_MDSMAP_ALLOW_CLASSICS (CEPH_MDSMAP_ALLOW_SNAPS | CEPH_MDSMAP_ALLOW_MULTIMDS | \ CEPH_MDSMAP_ALLOW_DIRFRAGS) +#define CEPH_MDSMAP_DEFAULTS CEPH_MDSMAP_ALLOW_DIRFRAGS + /* * mds states * > 0 -> in diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h index e1874459bfb0f..1d8e29b2a36be 100644 --- a/src/mds/MDSMap.h +++ b/src/mds/MDSMap.h @@ -220,7 +220,7 @@ public: public: MDSMap() : epoch(0), enabled(false), fs_name(MDS_FS_NAME_DEFAULT), - flags(0), last_failure(0), + flags(CEPH_MDSMAP_DEFAULTS), last_failure(0), last_failure_osd_epoch(0), tableserver(0), root(0), session_timeout(0), -- 2.39.5