]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: Implement object map for tracking in-use objects
authorJason Dillaman <dillaman@redhat.com>
Mon, 10 Nov 2014 15:25:13 +0000 (10:25 -0500)
committerJason Dillaman <dillaman@redhat.com>
Thu, 29 Jan 2015 02:12:52 +0000 (21:12 -0500)
The object map is stored in a new 'rbd_object_map.<id>'
object in the image's pool. The object map tracks blocks
that are in-use, non-existent, and are pending deletion.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
src/common/config_opts.h
src/include/rbd/features.h
src/include/rbd_types.h
src/librbd/ImageCtx.cc
src/librbd/ImageCtx.h
src/librbd/internal.cc
src/librbd/internal.h
src/rbd.cc

index 378df88157601e6c344a432fb91742ba520c366a..d7f53cebf24c737b56b1960a87d0b1e3630c4646 100644 (file)
@@ -878,7 +878,7 @@ OPTION(rbd_default_stripe_count, OPT_U64, 0) // changing requires stripingv2 fea
 OPTION(rbd_default_stripe_unit, OPT_U64, 0) // changing to non-object size requires stripingv2 feature
 OPTION(rbd_default_features, OPT_INT, 7) // only applies to format 2 images
                                         // +1 for layering, +2 for stripingv2,
-                                        // +4 for exclusive lock
+                                        // +4 for exclusive lock, +8 for object map
 
 OPTION(nss_db_path, OPT_STR, "") // path to nss db
 
index 7e35baac348e85113f8494e97ac5654dfba8f61d..1d170679d49ddff6952339cb5b124cb06e491287 100644 (file)
@@ -4,15 +4,18 @@
 #define RBD_FEATURE_LAYERING           (1<<0)
 #define RBD_FEATURE_STRIPINGV2         (1<<1)
 #define RBD_FEATURE_EXCLUSIVE_LOCK     (1<<2)
+#define RBD_FEATURE_OBJECT_MAP         (1<<3)
 
-#define RBD_FEATURES_INCOMPATIBLE      (RBD_FEATURE_LAYERING |      \
+#define RBD_FEATURES_INCOMPATIBLE      (RBD_FEATURE_LAYERING |       \
                                         RBD_FEATURE_STRIPINGV2)
 
-#define RBD_FEATURES_RW_INCOMPATIBLE   (RBD_FEATURES_INCOMPATIBLE | \
-                                        RBD_FEATURE_EXCLUSIVE_LOCK)
+#define RBD_FEATURES_RW_INCOMPATIBLE   (RBD_FEATURES_INCOMPATIBLE |  \
+                                        RBD_FEATURE_EXCLUSIVE_LOCK | \
+                                        RBD_FEATURE_OBJECT_MAP)
 
-#define RBD_FEATURES_ALL               (RBD_FEATURE_LAYERING |      \
-                                        RBD_FEATURE_STRIPINGV2 |    \
-                                        RBD_FEATURE_EXCLUSIVE_LOCK)
+#define RBD_FEATURES_ALL               (RBD_FEATURE_LAYERING |       \
+                                        RBD_FEATURE_STRIPINGV2 |     \
+                                        RBD_FEATURE_EXCLUSIVE_LOCK | \
+                                         RBD_FEATURE_OBJECT_MAP)
 
 #endif
index 558bbaada8b1978cd45444f20a72707c450d3a34..ad1c1b94502cff56f684bad43d242b4d1dcd0172 100644 (file)
 /* New-style rbd image 'foo' consists of objects
  *   rbd_id.foo              - id of image
  *   rbd_header.<id>         - image metadata
+ *   rbd_object_map.<id>     - optional image object map
  *   rbd_data.<id>.00000000
  *   rbd_data.<id>.00000001
  *   ...                     - data
  */
 
 #define RBD_HEADER_PREFIX      "rbd_header."
+#define RBD_OBJECT_MAP_PREFIX  "rbd_object_map."
 #define RBD_DATA_PREFIX        "rbd_data."
 #define RBD_ID_PREFIX          "rbd_id."
 
index ddc5cfe524fb1d63506aa7bf060400d5ee0f099f..53a37ae827cc20b28509c8ed67dfccb1328409ae 100644 (file)
@@ -46,6 +46,7 @@ namespace librbd {
       snap_lock("librbd::ImageCtx::snap_lock"),
       parent_lock("librbd::ImageCtx::parent_lock"),
       refresh_lock("librbd::ImageCtx::refresh_lock"),
+      object_map_lock("librbd::ImageCtx::object_map_lock"),
       aio_lock("librbd::ImageCtx::aio_lock"),
       copyup_list_lock("librbd::ImageCtx::copyup_list_lock"),
       copyup_list_cond(),
@@ -682,4 +683,118 @@ namespace librbd {
       copyup_list_cond.Wait(copyup_list_lock);
     }
   }
+
+  int ImageCtx::refresh_object_map()
+  {
+    if ((features & RBD_FEATURE_OBJECT_MAP) == 0) {
+      return 0;
+    }
+
+    int r = cls_client::object_map_load(&data_ctx, object_map_name(id),
+                                       &object_map);
+    if (r < 0) {
+      lderr(cct) << "error refreshing object map: " << cpp_strerror(r)
+                << dendl;
+      // TODO: flag object map as invalid
+      object_map.clear();
+      return r;
+    }
+
+    ldout(cct, 20) << "refreshed object map: " << object_map.size()
+                   << dendl;
+
+    uint64_t num_objs = Striper::get_num_objects(layout, get_current_size());
+    if (object_map.size() != num_objs) {
+      // resize op might have been interrupted
+      lderr(cct) << "incorrect object map size: " << object_map.size()
+                << " != " << num_objs << dendl;
+      // TODO: flag object map as invalid
+      return -EINVAL;
+    }
+    return 0;
+  }
+
+  int ImageCtx::resize_object_map(uint8_t default_object_state)
+  {
+    if ((features & RBD_FEATURE_OBJECT_MAP) == 0) {
+      return 0;
+    }
+
+    uint64_t num_objs = Striper::get_num_objects(layout, get_current_size());
+    ldout(cct, 20) << "resizing object map: " << num_objs << dendl;
+    librados::ObjectWriteOperation op;
+    cls_client::object_map_resize(&op, num_objs, default_object_state);
+    int r = data_ctx.operate(object_map_name(id), &op);
+    if (r < 0) {
+      lderr(cct) << "error resizing object map: size=" << num_objs << ", "
+                 << "state=" << default_object_state << ", "
+                 << "error=" << cpp_strerror(r) << dendl;
+      // TODO: flag object map as invalid
+      return 0;
+    }
+
+    size_t orig_object_map_size = object_map.size();
+    object_map.resize(num_objs);
+    for (uint64_t i = orig_object_map_size; i < object_map.size(); ++i) {
+      object_map[i] = default_object_state;
+    }
+    return 0;
+  }
+
+  int ImageCtx::update_object_map(uint64_t object_no, uint8_t object_state)
+  {
+    return update_object_map(object_no, object_no + 1, object_state,
+                            boost::optional<uint8_t>());
+  }
+
+  int ImageCtx::update_object_map(uint64_t start_object_no,
+                                  uint64_t end_object_no, uint8_t new_state,
+                                 const boost::optional<uint8_t> &current_state)
+  {
+    if ((features & RBD_FEATURE_OBJECT_MAP) == 0) {
+      return 0;
+    }
+
+    assert(start_object_no <= end_object_no);
+    assert(/* flagged as invalid || */ end_object_no <= object_map.size());
+    if (end_object_no > object_map.size()) {
+      ldout(cct, 20) << "skipping update of invalid object map" << dendl;
+      return 0;
+    }
+
+    bool update_required = false;
+    for (uint64_t object_no = start_object_no; object_no < end_object_no;
+        ++object_no) {
+      if ((!current_state || object_map[object_no] == *current_state) &&
+         object_map[object_no] != new_state) {
+       update_required = true;
+       break;
+      }
+    }
+
+    if (!update_required) {
+      return 0;
+    }
+
+    ldout(cct, 20) << "updating object map: [" << start_object_no << ","
+                  << end_object_no << ") = "
+                  << static_cast<uint32_t>(new_state) << dendl;
+
+    librados::ObjectWriteOperation op;
+    cls_client::object_map_update(&op, start_object_no, end_object_no,
+                                  new_state, current_state);
+    int r = data_ctx.operate(object_map_name(id), &op);
+    if (r < 0) {
+      lderr(cct) << "object map update failed: " << cpp_strerror(r) << dendl;
+      // TODO: disable object map
+    } else {
+      for (uint64_t object_no = start_object_no; object_no < end_object_no;
+           ++object_no) {
+       if (!current_state || object_map[object_no] == *current_state) {
+         object_map[object_no] = new_state;
+        }
+      }
+    }
+    return r;
+  }
 }
index 41dfd33980051f48ad01ae22f4bdf0138bc44866..e6c741f14c80b16a11600ca6b0d3006432377f3a 100644 (file)
@@ -9,11 +9,13 @@
 #include <set>
 #include <string>
 #include <vector>
+#include <boost/optional.hpp>
 
 #include "common/Cond.h"
 #include "common/Mutex.h"
 #include "common/Readahead.h"
 #include "common/RWLock.h"
+#include "common/bit_vector.hpp"
 #include "common/snap_types.h"
 #include "include/buffer.h"
 #include "include/rbd/librbd.hpp"
@@ -65,7 +67,7 @@ namespace librbd {
     /**
      * Lock ordering:
      * owner_lock, md_lock, cache_lock, snap_lock, parent_lock, refresh_lock,
-     * aio_lock
+     * object_map_lock, aio_lock
      */
     RWLock owner_lock; // protects exclusive lock leadership updates
     RWLock md_lock; // protects access to the mutable image metadata that
@@ -75,6 +77,7 @@ namespace librbd {
     RWLock snap_lock; // protects snapshot-related member variables:
     RWLock parent_lock; // protects parent_md and parent
     Mutex refresh_lock; // protects refresh_seq and last_refresh
+    RWLock object_map_lock; // protects object map updates
     Mutex aio_lock; // protects pending_aio and pending_aio_cond
     Mutex copyup_list_lock; // protects copyup_waiting_list
 
@@ -109,6 +112,8 @@ namespace librbd {
     Cond pending_aio_cond;
     uint64_t pending_aio;
 
+    ceph::BitVector<2> object_map;
+
     /**
      * Either image_name or image_id must be set.
      * If id is not known, pass the empty std::string,
@@ -174,6 +179,13 @@ namespace librbd {
                                  uint64_t overlap);
     void wait_for_pending_aio();
     void wait_for_pending_copyup();
+
+    int refresh_object_map();
+    int resize_object_map(uint8_t default_object_state);
+    int update_object_map(uint64_t object_no, uint8_t object_state);
+    int update_object_map(uint64_t start_object_no, uint64_t end_object_no,
+                         uint8_t new_state,
+                         const boost::optional<uint8_t> &current_state);
   };
 }
 
index 7c51b5c36580a2a1cde59abde323cb8e708e2df0..50b99f26022f7121e42ae0ab4fb0317181821e99 100644 (file)
@@ -68,6 +68,11 @@ namespace librbd {
     return image_name + RBD_SUFFIX;
   }
 
+  const string object_map_name(const string &image_id)
+  {
+    return RBD_OBJECT_MAP_PREFIX + image_id;
+  }
+
   int detect_format(IoCtx &io_ctx, const string &name,
                    bool *old_format, uint64_t *size)
   {
index 1578f00ccbdc7bdccb19cbeb8ec99d169d4b6607..af0bbb5dbc35810024d931650bb5b40df9d7e662 100644 (file)
@@ -72,9 +72,14 @@ namespace librbd {
     }
   };
 
+  static const uint8_t OBJECT_NONEXISTENT = 0;
+  static const uint8_t OBJECT_EXISTS = 1;
+  static const uint8_t OBJECT_PENDING = 2;
+
   const std::string id_obj_name(const std::string &name);
   const std::string header_name(const std::string &image_id);
   const std::string old_header_name(const std::string &image_name);
+  const std::string object_map_name(const std::string &image_id);
 
   int detect_format(librados::IoCtx &io_ctx, const std::string &name,
                    bool *old_format, uint64_t *size);
index 40f158593a9a9d347bed2099a2a481811015687b..da35e8c2e7bdb4b50e3289b1afa5af774aeb4680 100644 (file)
@@ -174,6 +174,8 @@ static string feature_str(uint64_t feature)
     return "striping";
   case RBD_FEATURE_EXCLUSIVE_LOCK:
     return "exclusive";
+  case RBD_FEATURE_OBJECT_MAP:
+    return "object map";
   default:
     return "";
   }
@@ -183,7 +185,7 @@ static string features_str(uint64_t features)
 {
   string s = "";
 
-  for (uint64_t feature = 1; feature <= RBD_FEATURE_EXCLUSIVE_LOCK;
+  for (uint64_t feature = 1; feature <= RBD_FEATURE_OBJECT_MAP;
        feature <<= 1) {
     if (feature & features) {
       if (s.size())
@@ -197,7 +199,7 @@ static string features_str(uint64_t features)
 static void format_features(Formatter *f, uint64_t features)
 {
   f->open_array_section("features");
-  for (uint64_t feature = 1; feature <= RBD_FEATURE_EXCLUSIVE_LOCK;
+  for (uint64_t feature = 1; feature <= RBD_FEATURE_OBJECT_MAP;
        feature <<= 1) {
     f->dump_string("feature", feature_str(feature));
   }
@@ -437,7 +439,8 @@ static int do_create(librbd::RBD &rbd, librados::IoCtx& io_ctx,
     r = rbd.create(io_ctx, imgname, size, order);
   } else {
     if (features == 0) {
-      features = RBD_FEATURE_LAYERING | RBD_FEATURE_EXCLUSIVE_LOCK;
+      features = RBD_FEATURE_LAYERING | RBD_FEATURE_EXCLUSIVE_LOCK |
+                RBD_FEATURE_OBJECT_MAP;
     }
     if ((stripe_unit || stripe_count) &&
        (stripe_unit != (1ull << *order) && stripe_count != 1)) {
@@ -2469,7 +2472,8 @@ int main(int argc, const char **argv)
   bool format_specified = false,
     output_format_specified = false;
   int format = 1;
-  uint64_t features = RBD_FEATURE_LAYERING | RBD_FEATURE_EXCLUSIVE_LOCK;
+  uint64_t features = RBD_FEATURE_LAYERING | RBD_FEATURE_EXCLUSIVE_LOCK |
+                     RBD_FEATURE_OBJECT_MAP;
   const char *imgname = NULL, *snapname = NULL, *destname = NULL,
     *dest_poolname = NULL, *dest_snapname = NULL, *path = NULL,
     *devpath = NULL, *lock_cookie = NULL, *lock_client = NULL,
@@ -2573,7 +2577,7 @@ int main(int argc, const char **argv)
     } else if (ceph_argparse_flag(args, i , "--allow-shrink", (char *)NULL)) {
       resize_allow_shrink = true;
     } else if (ceph_argparse_flag(args, i, "--image-shared", (char *)NULL)) {
-      features &= ~RBD_FEATURE_EXCLUSIVE_LOCK;
+      features &= ~(RBD_FEATURE_EXCLUSIVE_LOCK | RBD_FEATURE_OBJECT_MAP);
     } else if (ceph_argparse_witharg(args, i, &val, "--format", (char *) NULL)) {
       long long ret = strict_strtoll(val.c_str(), 10, &parse_err);
       if (parse_err.empty()) {