]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/volumes: Better handle config file on osd full scenario
authorKotresh HR <khiremat@redhat.com>
Tue, 16 Aug 2022 11:38:16 +0000 (17:08 +0530)
committerKotresh HR <khiremat@redhat.com>
Tue, 16 Aug 2022 11:38:16 +0000 (17:08 +0530)
The 'metadata_mgr.flush()' used to truncate the config file
before flushing the new config data. This could lead to an
empty config file when there is no space to write new config
data. This patch handles this scenario by writing it to
temporary file and rename it to config file. This would
retain the config file without truncating it.

Also, there are bunch of places which wasn't handling
'MetadataMgrException' because of this. Fixed those.

Fixes: https://tracker.ceph.com/issues/55976
Signed-off-by: Kotresh HR <khiremat@redhat.com>
src/pybind/mgr/volumes/fs/async_cloner.py
src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py
src/pybind/mgr/volumes/fs/operations/versions/subvolume_base.py
src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py
src/pybind/mgr/volumes/fs/operations/versions/subvolume_v2.py

index 5a1fcbfdf5d006919211fe24e0d22e800b85a324..95f7d64e1b36422995b5941032c9e4bf39656b18 100644 (file)
@@ -296,9 +296,10 @@ def start_clone_sm(fs_client, volspec, volname, index, groupname, subvolname, st
                                                                                          current_state, next_state))
                 set_clone_state(fs_client, volspec, volname, groupname, subvolname, next_state)
                 current_state = next_state
-    except VolumeException as ve:
-        log.error("clone failed for ({0}, {1}, {2}) (current_state: {3}, reason: {4})".format(volname, groupname,\
-                                                                                             subvolname, current_state, ve))
+    except (MetadataMgrException, VolumeException) as e:
+        log.error(f"clone failed for ({volname}, {groupname}, {subvolname}) "
+                  f"(current_state: {current_state}, reason: {e} {os.strerror(-e.args[0])})")
+        raise
 
 def clone(fs_client, volspec, volname, index, clone_path, state_table, should_cancel, snapshot_clone_delay):
     log.info("cloning to subvolume path: {0}".format(clone_path))
@@ -312,8 +313,8 @@ def clone(fs_client, volspec, volname, index, clone_path, state_table, should_ca
         log.info("starting clone: ({0}, {1}, {2})".format(volname, groupname, subvolname))
         start_clone_sm(fs_client, volspec, volname, index, groupname, subvolname, state_table, should_cancel, snapshot_clone_delay)
         log.info("finished clone: ({0}, {1}, {2})".format(volname, groupname, subvolname))
-    except VolumeException as ve:
-        log.error("clone failed for ({0}, {1}, {2}), reason: {3}".format(volname, groupname, subvolname, ve))
+    except (MetadataMgrException, VolumeException) as e:
+        log.error(f"clone failed for ({volname}, {groupname}, {subvolname}), reason: {e} {os.strerror(-e.args[0])}")
 
 class Cloner(AsyncJobs):
     """
index 3c536f8a9b4b53807dac8a8555bd4594ff9a9c14..764f4bc25751a876a1607e67737f44bd5aee52df 100644 (file)
@@ -97,14 +97,16 @@ class MetadataManager(object):
             if len(self.config.items(section)) == 0:
                 self.config.remove_section(section)
 
-
         try:
             with _lock:
-                fd = self.fs.open(self.config_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, self.mode)
+                tmp_config_path = self.config_path + b'.tmp'
+                fd = self.fs.open(tmp_config_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, self.mode)
                 with _ConfigWriter(self.fs, fd) as cfg_writer:
                     self.config.write(cfg_writer)
                     cfg_writer.fsync()
-            log.info("wrote {0} bytes to config {1}".format(cfg_writer.wrote, self.config_path))
+                self.fs.rename(tmp_config_path, self.config_path)
+            log.info(f"wrote {cfg_writer.wrote} bytes to config {tmp_config_path}")
+            log.info(f"Renamed {tmp_config_path} to config {self.config_path}")
         except cephfs.Error as e:
             raise MetadataMgrException(-e.args[0], e.args[1])
 
index 9e3ca9e25b2eda4faf70089f1088411fefe9bc53..906b55e7b83dfe5b62884ea915da0dfbae9b6a5c 100644 (file)
@@ -433,9 +433,14 @@ class SubvolumeBase(object):
                 'features': self.features, 'state': self.state.value}
 
     def set_user_metadata(self, keyname, value):
-        self.metadata_mgr.add_section(MetadataManager.USER_METADATA_SECTION)
-        self.metadata_mgr.update_section(MetadataManager.USER_METADATA_SECTION, keyname, str(value))
-        self.metadata_mgr.flush()
+        try:
+            self.metadata_mgr.add_section(MetadataManager.USER_METADATA_SECTION)
+            self.metadata_mgr.update_section(MetadataManager.USER_METADATA_SECTION, keyname, str(value))
+            self.metadata_mgr.flush()
+        except MetadataMgrException as me:
+            log.error(f"Failed to set user metadata key={keyname} value={value} on subvolume={self.subvol_name} "
+                      f"group={self.group_name} reason={me.args[1]}, errno:{-me.args[0]}, {os.strerror(-me.args[0])}")
+            raise VolumeException(-me.args[0], me.args[1])
 
     def get_user_metadata(self, keyname):
         try:
@@ -457,7 +462,9 @@ class SubvolumeBase(object):
             self.metadata_mgr.flush()
         except MetadataMgrException as me:
             if me.errno == -errno.ENOENT:
-                raise VolumeException(-errno.ENOENT, "subvolume metadata not does not exist")
+                raise VolumeException(-errno.ENOENT, "subvolume metadata does not exist")
+            log.error(f"Failed to remove user metadata key={keyname} on subvolume={self.subvol_name} "
+                      f"group={self.group_name} reason={me.args[1]}, errno:{-me.args[0]}, {os.strerror(-me.args[0])}")
             raise VolumeException(-me.args[0], me.args[1])
 
     def get_snap_section_name(self, snapname):
@@ -465,10 +472,16 @@ class SubvolumeBase(object):
         return section;
 
     def set_snapshot_metadata(self, snapname, keyname, value):
-        section = self.get_snap_section_name(snapname)
-        self.metadata_mgr.add_section(section)
-        self.metadata_mgr.update_section(section, keyname, str(value))
-        self.metadata_mgr.flush()
+        try:
+            section = self.get_snap_section_name(snapname)
+            self.metadata_mgr.add_section(section)
+            self.metadata_mgr.update_section(section, keyname, str(value))
+            self.metadata_mgr.flush()
+        except MetadataMgrException as me:
+            log.error(f"Failed to set snapshot metadata key={keyname} value={value} on snap={snapname} "
+                      f"subvolume={self.subvol_name} group={self.group_name} "
+                      f"reason={me.args[1]}, errno:{-me.args[0]}, {os.strerror(-me.args[0])}")
+            raise VolumeException(-me.args[0], me.args[1])
 
     def get_snapshot_metadata(self, snapname, keyname):
         try:
@@ -476,6 +489,9 @@ class SubvolumeBase(object):
         except MetadataMgrException as me:
             if me.errno == -errno.ENOENT:
                 raise VolumeException(-errno.ENOENT, "key '{0}' does not exist.".format(keyname))
+            log.error(f"Failed to get snapshot metadata key={keyname} on snap={snapname} "
+                      f"subvolume={self.subvol_name} group={self.group_name} "
+                      f"reason={me.args[1]}, errno:{-me.args[0]}, {os.strerror(-me.args[0])}")
             raise VolumeException(-me.args[0], me.args[1])
         return value
 
@@ -491,4 +507,7 @@ class SubvolumeBase(object):
         except MetadataMgrException as me:
             if me.errno == -errno.ENOENT:
                 raise VolumeException(-errno.ENOENT, "snapshot metadata not does not exist")
+            log.error(f"Failed to remove snapshot metadata key={keyname} on snap={snapname} "
+                      f"subvolume={self.subvol_name} group={self.group_name} "
+                      f"reason={me.args[1]}, errno:{-me.args[0]}, {os.strerror(-me.args[0])}")
             raise VolumeException(-me.args[0], me.args[1])
index 019054c51465cee599b01bf5a4fc80ba6cf100b8..e2857cec3d184a81fef15fbe9213235d197198c2 100644 (file)
@@ -120,7 +120,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
 
             if isinstance(e, MetadataMgrException):
                 log.error("metadata manager exception: {0}".format(e))
-                e = VolumeException(-errno.EINVAL, "exception in subvolume metadata")
+                e = VolumeException(-errno.EINVAL, f"exception in subvolume metadata: {os.strerror(-e.args[0])}")
             elif isinstance(e, cephfs.Error):
                 e = VolumeException(-e.args[0], e.args[1])
             raise e
@@ -141,12 +141,16 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
             self.metadata_mgr.flush()
 
     def add_clone_failure(self, errno, error_msg):
-        self.metadata_mgr.add_section(MetadataManager.CLONE_FAILURE_SECTION)
-        self.metadata_mgr.update_section(MetadataManager.CLONE_FAILURE_SECTION,
-                                         MetadataManager.CLONE_FAILURE_META_KEY_ERRNO, errno)
-        self.metadata_mgr.update_section(MetadataManager.CLONE_FAILURE_SECTION,
-                                         MetadataManager.CLONE_FAILURE_META_KEY_ERROR_MSG, error_msg)
-        self.metadata_mgr.flush()
+        try:
+            self.metadata_mgr.add_section(MetadataManager.CLONE_FAILURE_SECTION)
+            self.metadata_mgr.update_section(MetadataManager.CLONE_FAILURE_SECTION,
+                                             MetadataManager.CLONE_FAILURE_META_KEY_ERRNO, errno)
+            self.metadata_mgr.update_section(MetadataManager.CLONE_FAILURE_SECTION,
+                                             MetadataManager.CLONE_FAILURE_META_KEY_ERROR_MSG, error_msg)
+            self.metadata_mgr.flush()
+        except MetadataMgrException as me:
+            log.error(f"Failed to add clone failure status clone={self.subvol_name} group={self.group_name} "
+                      f"reason={me.args[1]}, errno:{-me.args[0]}, {os.strerror(-me.args[0])}")
 
     def create_clone(self, pool, source_volname, source_subvolume, snapname):
         subvolume_type = SubvolumeTypes.TYPE_CLONE
@@ -192,7 +196,7 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
 
             if isinstance(e, MetadataMgrException):
                 log.error("metadata manager exception: {0}".format(e))
-                e = VolumeException(-errno.EINVAL, "exception in subvolume metadata")
+                e = VolumeException(-errno.EINVAL, f"exception in subvolume metadata: {os.strerror(-e.args[0])}")
             elif isinstance(e, cephfs.Error):
                 e = VolumeException(-e.args[0], e.args[1])
             raise e
@@ -806,9 +810,14 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
         if self.has_pending_clones(snapname):
             raise VolumeException(-errno.EAGAIN, "snapshot '{0}' has pending clones".format(snapname))
         snappath = self.snapshot_path(snapname)
+        try:
+            self.metadata_mgr.remove_section(self.get_snap_section_name(snapname))
+            self.metadata_mgr.flush()
+        except MetadataMgrException as me:
+            log.error(f"Failed to remove snapshot metadata on snap={snapname} subvol={self.subvol_name} "
+                      f"group={self.group_name} reason={me.args[1]}, errno:{-me.args[0]}, {os.strerror(-me.args[0])}")
+            raise VolumeException(-errno.EAGAIN, f"failed to remove snapshot metadata on snap={snapname} reason={me.args[0]} {me.args[1]}")
         rmsnap(self.fs, snappath)
-        self.metadata_mgr.remove_section(self.get_snap_section_name(snapname))
-        self.metadata_mgr.flush()
 
     def snapshot_info(self, snapname):
         if is_inherited_snap(snapname):
index bfebd24e3bf225c71ed02d2598038cb9ee6b6e27..31d5f2443c501f49c8af1c6a23c9e9d5e819337a 100644 (file)
@@ -198,7 +198,7 @@ class SubvolumeV2(SubvolumeV1):
 
             if isinstance(e, MetadataMgrException):
                 log.error("metadata manager exception: {0}".format(e))
-                e = VolumeException(-errno.EINVAL, "exception in subvolume metadata")
+                e = VolumeException(-errno.EINVAL, f"exception in subvolume metadata: {os.strerror(-e.args[0])}")
             elif isinstance(e, cephfs.Error):
                 e = VolumeException(-e.args[0], e.args[1])
             raise e
@@ -252,7 +252,7 @@ class SubvolumeV2(SubvolumeV1):
 
             if isinstance(e, MetadataMgrException):
                 log.error("metadata manager exception: {0}".format(e))
-                e = VolumeException(-errno.EINVAL, "exception in subvolume metadata")
+                e = VolumeException(-errno.EINVAL, f"exception in subvolume metadata: {os.strerror(-e.args[0])}")
             elif isinstance(e, cephfs.Error):
                 e = VolumeException(-e.args[0], e.args[1])
             raise e