From: Jakob Haufe <sur5r@sur5r.net>
Date: Thu, 11 May 2023 07:52:11 +0000 (+0530)
Subject: mgr/snap_schedule: Use mds_max_snaps_per_dir as snapshot count limit
X-Git-Tag: v16.2.15~186^2~5
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1c0da15e4eb48aac49247689880b39cca3ff7afe;p=ceph.git

mgr/snap_schedule: Use mds_max_snaps_per_dir as snapshot count limit

The module currently hardcodes a limit of 50 snapshots. Use
mds_max_snaps_per_dir instead to make this configurable.

Fixes: https://tracker.ceph.com/issues/59582
Signed-off-by: Jakob Haufe <sur5r@sur5r.net>
(cherry picked from commit dabac4c8dff527ffa5a2c2141e4a6e1896f00f71)

Conflicts:
	src/pybind/mgr/snap_schedule/fs/schedule_client.py
	- mostly type hinting additions
---

diff --git a/src/pybind/mgr/snap_schedule/fs/schedule_client.py b/src/pybind/mgr/snap_schedule/fs/schedule_client.py
index 319e068d3f86..162e980a5625 100644
--- a/src/pybind/mgr/snap_schedule/fs/schedule_client.py
+++ b/src/pybind/mgr/snap_schedule/fs/schedule_client.py
@@ -20,7 +20,6 @@ import traceback
 import errno
 
 
-MAX_SNAPS_PER_PATH = 50
 SNAP_SCHEDULE_NAMESPACE = 'cephfs-snap-schedule'
 SNAP_DB_PREFIX = 'snap_db'
 # increment this every time the db schema changes and provide upgrade code
@@ -64,7 +63,9 @@ def updates_schedule_db(func):
     return f
 
 
-def get_prune_set(candidates, retention):
+def get_prune_set(candidates: Set[Tuple[cephfs.DirEntry, datetime]],
+                  retention: Dict[str, int],
+                  max_snaps_to_retain: int) -> Set:
     PRUNING_PATTERNS = OrderedDict([
         # n is for keep last n snapshots, uses the snapshot name timestamp
         # format for lowest granularity
@@ -80,8 +81,8 @@ def get_prune_set(candidates, retention):
     ])
     keep = []
     if not retention:
-        log.info(f'no retention set, assuming n: {MAX_SNAPS_PER_PATH}')
-        retention = {'n': MAX_SNAPS_PER_PATH}
+        log.info(f'no retention set, assuming n: {max_snaps_to_retain}')
+        retention = {'n': max_snaps_to_retain}
     for period, date_pattern in PRUNING_PATTERNS.items():
         log.debug(f'compiling keep set for period {period}')
         period_count = retention.get(period, 0)
@@ -102,9 +103,10 @@ def get_prune_set(candidates, retention):
                         log.debug(('found enough snapshots for '
                                    f'{period_count}{period}'))
                         break
-    if len(keep) > MAX_SNAPS_PER_PATH:
-        log.info(f'Would keep more then {MAX_SNAPS_PER_PATH}, pruning keep set')
-        keep = keep[:MAX_SNAPS_PER_PATH]
+    if len(keep) > max_snaps_to_retain:
+        log.info((f'Would keep more then {max_snaps_to_retain}, '
+                  'pruning keep set'))
+        keep = keep[:max_snaps_to_retain]
     return candidates - set(keep)
 
 def snap_name_to_timestamp(scheduled_snap_name: str) -> str:
@@ -311,6 +313,7 @@ class SnapSchedClient(CephfsClient):
             path = sched.path
             prune_candidates = set()
             time = datetime.now(timezone.utc)
+            mds_max_snaps_per_dir = self.mgr.get_ceph_option('mds_max_snaps_per_dir')
             with open_filesystem(self, sched.fs) as fs_handle:
                 snap_dir = self.mgr.rados.conf_get('client_snapdir')
                 with fs_handle.opendir(f'{path}/{snap_dir}') as d_handle:
@@ -324,7 +327,9 @@ class SnapSchedClient(CephfsClient):
                         else:
                             log.debug(f'skipping dir entry {dir_.d_name}')
                         dir_ = fs_handle.readdir(d_handle)
-                to_prune = get_prune_set(prune_candidates, ret)
+                # Limit ourselves to one snapshot less than allowed by config to allow for
+                # snapshot creation before pruning
+                to_prune = get_prune_set(prune_candidates, ret, mds_max_snaps_per_dir - 1)
                 for k in to_prune:
                     dirname = k[0].d_name.decode('utf-8')
                     log.debug(f'rmdir on {dirname}')