From: Jakob Haufe <sur5r@sur5r.net>
Date: Thu, 11 May 2023 07:52:11 +0000 (+0530)
Subject: mgr/snap_schedule: Use mds_max_snaps_per_dir as snapshot count limit
X-Git-Tag: v17.2.7~92^2~5
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=935f537ab655bcc12435193ec8da5c01a70efe3c;p=ceph.git

mgr/snap_schedule: Use mds_max_snaps_per_dir as snapshot count limit

The module currently hardcodes a limit of 50 snapshots. Use
mds_max_snaps_per_dir instead to make this configurable.

Fixes: https://tracker.ceph.com/issues/59582
Signed-off-by: Jakob Haufe <sur5r@sur5r.net>
(cherry picked from commit dabac4c8dff527ffa5a2c2141e4a6e1896f00f71)
---

diff --git a/src/pybind/mgr/snap_schedule/fs/schedule_client.py b/src/pybind/mgr/snap_schedule/fs/schedule_client.py
index fae4491bdd9e..a490d771d052 100644
--- a/src/pybind/mgr/snap_schedule/fs/schedule_client.py
+++ b/src/pybind/mgr/snap_schedule/fs/schedule_client.py
@@ -19,7 +19,6 @@ from .schedule import Schedule
 import traceback
 
 
-MAX_SNAPS_PER_PATH = 50
 SNAP_SCHEDULE_NAMESPACE = 'cephfs-snap-schedule'
 SNAP_DB_PREFIX = 'snap_db'
 # increment this every time the db schema changes and provide upgrade code
@@ -72,7 +71,8 @@ def updates_schedule_db(func: FuncT) -> FuncT:
 
 
 def get_prune_set(candidates: Set[Tuple[cephfs.DirEntry, datetime]],
-                  retention: Dict[str, int]) -> Set:
+                  retention: Dict[str, int],
+                  max_snaps_to_retain: int) -> Set:
     PRUNING_PATTERNS = OrderedDict([
         # n is for keep last n snapshots, uses the snapshot name timestamp
         # format for lowest granularity
@@ -88,8 +88,8 @@ def get_prune_set(candidates: Set[Tuple[cephfs.DirEntry, datetime]],
     ])
     keep = []
     if not retention:
-        log.info(f'no retention set, assuming n: {MAX_SNAPS_PER_PATH}')
-        retention = {'n': MAX_SNAPS_PER_PATH}
+        log.info(f'no retention set, assuming n: {max_snaps_to_retain}')
+        retention = {'n': max_snaps_to_retain}
     for period, date_pattern in PRUNING_PATTERNS.items():
         log.debug(f'compiling keep set for period {period}')
         period_count = retention.get(period, 0)
@@ -111,10 +111,10 @@ def get_prune_set(candidates: Set[Tuple[cephfs.DirEntry, datetime]],
                         log.debug(('found enough snapshots for '
                                    f'{period_count}{period}'))
                         break
-    if len(keep) > MAX_SNAPS_PER_PATH:
-        log.info((f'Would keep more then {MAX_SNAPS_PER_PATH}, '
+    if len(keep) > max_snaps_to_retain:
+        log.info((f'Would keep more then {max_snaps_to_retain}, '
                   'pruning keep set'))
-        keep = keep[:MAX_SNAPS_PER_PATH]
+        keep = keep[:max_snaps_to_retain]
     return candidates - set(keep)
 
 def snap_name_to_timestamp(scheduled_snap_name: str) -> str:
@@ -324,6 +324,7 @@ class SnapSchedClient(CephfsClient):
             path = sched.path
             prune_candidates = set()
             time = datetime.now(timezone.utc)
+            mds_max_snaps_per_dir = self.mgr.get_ceph_option('mds_max_snaps_per_dir')
             with open_filesystem(self, sched.fs) as fs_handle:
                 snap_dir = self.mgr.rados.conf_get('client_snapdir')
                 with fs_handle.opendir(f'{path}/{snap_dir}') as d_handle:
@@ -337,7 +338,9 @@ class SnapSchedClient(CephfsClient):
                         else:
                             log.debug(f'skipping dir entry {dir_.d_name}')
                         dir_ = fs_handle.readdir(d_handle)
-                to_prune = get_prune_set(prune_candidates, ret)
+                # Limit ourselves to one snapshot less than allowed by config to allow for
+                # snapshot creation before pruning
+                to_prune = get_prune_set(prune_candidates, ret, mds_max_snaps_per_dir - 1)
                 for k in to_prune:
                     dirname = k[0].d_name.decode('utf-8')
                     log.debug(f'rmdir on {dirname}')