]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/librados: add op to list clones/snaps for an object
authorDavid Zafman <david.zafman@inktank.com>
Fri, 22 Feb 2013 23:15:06 +0000 (15:15 -0800)
committerDavid Zafman <david.zafman@inktank.com>
Tue, 5 Mar 2013 07:16:43 +0000 (23:16 -0800)
Returning snap_set_t with clone info
and snapshots in ascending order
Add clones with snapshots to obj_list_snap_response_t
New rados_types.hpp with snap_set_t/clone_info_t
Move snap_t to rados_types.hpp
Add generate_test_instances() and TYPE() to encoding/types.h

Feature: #4207

Signed-off-by: David Zafman <david.zafman@inktank.com>
src/Makefile.am
src/include/rados.h
src/include/rados/librados.hpp
src/include/rados/rados_types.hpp [new file with mode: 0644]
src/librados/librados.cc
src/osd/ReplicatedPG.cc
src/osd/osd_types.cc
src/osd/osd_types.h
src/osdc/Objecter.h
src/test/encoding/types.h

index 4a1d7633a38b109431bc7bc7c5ac487f1c056b52..af4a259efbc1ace2b960e2352e52948e3304e19d 100644 (file)
@@ -1178,6 +1178,7 @@ rados_includedir = $(includedir)/rados
 rados_include_DATA = \
        $(srcdir)/include/rados/librados.h \
        $(srcdir)/include/rados/rados_types.h \
+       $(srcdir)/include/rados/rados_types.hpp \
        $(srcdir)/include/rados/librados.hpp \
        $(srcdir)/include/buffer.h \
        $(srcdir)/include/page.h \
@@ -1642,6 +1643,7 @@ noinst_HEADERS = \
         include/xlist.h\
        include/rados/librados.h\
        include/rados/rados_types.h\
+       include/rados/rados_types.hpp\
        include/rados/librados.hpp\
        include/rados/librgw.h\
        include/rados/page.h\
index 093a04baf86dd0613bce4fa4e984eaad241f2c8b..f4f120a8f15079ce4c64d575cbc61b15049e459e 100644 (file)
@@ -179,6 +179,8 @@ enum {
 
        CEPH_OSD_OP_LIST_WATCHERS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 9,
 
+       CEPH_OSD_OP_LIST_SNAPS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 10,
+
        /* write */
        CEPH_OSD_OP_WRITE     = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1,
        CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2,
index 5bc1495e1c7321197c7bb93db901cce494db4681..1463a34996d875040492b77a06f419461586c512 100644 (file)
@@ -12,6 +12,7 @@
 #include "buffer.h"
 
 #include "librados.h"
+#include "include/rados/rados_types.hpp"
 
 namespace librados
 {
@@ -26,7 +27,6 @@ namespace librados
   class RadosClient;
 
   typedef void *list_ctx_t;
-  typedef uint64_t snap_t;
   typedef uint64_t auid_t;
   typedef void *config_t;
 
@@ -328,6 +328,7 @@ namespace librados
      * @param prval [out] place error code in prval upon completion
      */
     void list_watchers(std::list<obj_watch_t> *out_watchers, int *prval);
+    void list_snaps(snap_set_t *out_snaps, int *prval);
 
   };
 
@@ -501,6 +502,7 @@ namespace librados
     int unwatch(const std::string& o, uint64_t handle);
     int notify(const std::string& o, uint64_t ver, bufferlist& bl);
     int list_watchers(const std::string& o, std::list<obj_watch_t> *out_watchers);
+    int list_snaps(const std::string& o, snap_set_t *out_snaps);
     void set_notify_timeout(uint32_t timeout);
 
     // assert version for next sync operations
diff --git a/src/include/rados/rados_types.hpp b/src/include/rados/rados_types.hpp
new file mode 100644 (file)
index 0000000..eb28d4f
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef CEPH_RADOS_TYPES_HPP
+#define CEPH_RADOS_TYPES_HPP
+
+#include <utility>
+#include <vector>
+#include "include/inttypes.h"
+
+namespace librados {
+
+typedef uint64_t snap_t;
+
+struct clone_info_t {
+  static const snap_t HEAD = ((snap_t)-1);
+  snap_t cloneid;
+  std::vector<snap_t> snaps;          // ascending
+  std::vector< std::pair<uint64_t,uint64_t> > overlap;
+  uint64_t size;
+};
+
+struct snap_set_t {
+  std::vector<clone_info_t> clones;   // ascending
+};
+
+}
+#endif
index c03a20f8d12645dce50de9f7703f053f7e7204fe..59ff0d1c3e869137f3d081eb22769893dcd0fc8f 100644 (file)
@@ -219,6 +219,14 @@ void librados::ObjectReadOperation::list_watchers(
   o->list_watchers(out_watchers, prval);
 }
 
+void librados::ObjectReadOperation::list_snaps(
+  snap_set_t *out_snaps,
+  int *prval)
+{
+  ::ObjectOperation *o = (::ObjectOperation *)impl;
+  o->list_snaps(out_snaps, prval);
+}
+
 int librados::IoCtx::omap_get_vals(const std::string& oid,
                                    const std::string& start_after,
                                    const std::string& filter_prefix,
@@ -1040,6 +1048,20 @@ int librados::IoCtx::list_watchers(const std::string& oid,
   return r;
 }
 
+int librados::IoCtx::list_snaps(const std::string& oid,
+                                   snap_set_t *out_snaps)
+{
+  ObjectReadOperation op;
+  int r;
+  op.list_snaps(out_snaps, &r);
+  bufferlist bl;
+  int ret = operate(oid, &op, &bl);
+  if (ret < 0)
+    return ret;
+
+  return r;
+}
+
 void librados::IoCtx::set_notify_timeout(uint32_t timeout)
 {
   io_ctx_impl->set_notify_timeout(timeout);
index b1f60d4f0dc027cd8a4b2898313334f97841f128..ea71f2b81e8af952fe3f5eb77e557ac35943c911 100644 (file)
@@ -2252,6 +2252,94 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
         break;
       }
 
+    case CEPH_OSD_OP_LIST_SNAPS:
+      {
+        obj_list_snap_response_t resp;
+
+        if (!ssc) {
+            ssc = ctx->obc->ssc = get_snapset_context(soid.oid,
+                soid.get_key(), soid.hash, false);
+        }
+
+        assert(ssc);
+
+        vector<snapid_t>::reverse_iterator snap_iter =
+            ssc->snapset.snaps.rbegin();
+
+        int clonecount = ssc->snapset.clones.size();
+        if (ssc->snapset.head_exists)
+          clonecount++;
+        resp.clones.reserve(clonecount);
+        for (vector<snapid_t>::const_iterator clone_iter = ssc->snapset.clones.begin();
+               clone_iter != ssc->snapset.clones.end(); ++clone_iter) {
+          clone_info ci;
+
+          dout(20) << "List clones id=" << *clone_iter << dendl;
+
+          ci.cloneid = *clone_iter;
+
+          for (;snap_iter != ssc->snapset.snaps.rend()
+               && (*snap_iter <= ci.cloneid); snap_iter++) {
+
+            dout(20) << "List snaps id=" << *snap_iter << dendl;
+
+            assert(*snap_iter != CEPH_NOSNAP);
+            assert(*snap_iter != CEPH_SNAPDIR);
+
+            ci.snaps.push_back(*snap_iter);
+          }
+
+          map<snapid_t, interval_set<uint64_t> >::const_iterator coi;
+          coi = ssc->snapset.clone_overlap.find(ci.cloneid);
+          if (coi == ssc->snapset.clone_overlap.end()) {
+            osd->clog.error() << "osd." << osd->whoami << ": inconsistent clone_overlap found for oid "
+                    << soid << " clone " << *clone_iter;
+            result = EINVAL;
+            break;
+          }
+          const interval_set<uint64_t> &o = coi->second;
+          ci.overlap.reserve(o.num_intervals());
+          for (interval_set<uint64_t>::const_iterator r = o.begin();
+               r != o.end(); ++r) {
+            ci.overlap.push_back(pair<uint64_t,uint64_t>(r.get_start(), r.get_len()));
+          }
+
+          map<snapid_t, uint64_t>::const_iterator si;
+          si = ssc->snapset.clone_size.find(ci.cloneid);
+          if (si == ssc->snapset.clone_size.end()) {
+            osd->clog.error() << "osd." << osd->whoami << ": inconsistent clone_size found for oid "
+                    << soid << " clone " << *clone_iter;
+            result = EINVAL;
+            break;
+          }
+          ci.size = si->second;
+
+          resp.clones.push_back(ci);
+        }
+        if (ssc->snapset.head_exists) {
+          clone_info ci;
+
+          assert(obs.exists);
+
+          ci.cloneid = clone_info::HEAD;
+
+          //Put remaining snapshots into head clone
+          for (;snap_iter != ssc->snapset.snaps.rend(); snap_iter++)
+            ci.snaps.push_back(*snap_iter);
+
+          //Size for HEAD is oi.size
+          ci.size = oi.size;
+
+          resp.clones.push_back(ci);
+        }
+
+        resp.encode(osd_op.outdata);
+        result = 0;
+
+        ctx->delta_stats.num_rd++;
+        break;
+      }
+
     case CEPH_OSD_OP_ASSERT_SRC_VERSION:
       {
        uint64_t ver = op.watch.ver;
index 219c1bfdec8678468a6caeb149de652835db1bfb..9c890397e8d19603fdd237f7f8ba9bb378cee092 100644 (file)
@@ -20,6 +20,8 @@ extern "C" {
 #include "PG.h"
 #include "OSDMap.h"
 
+const snapid_t clone_info::HEAD((uint64_t)-1);
+
 // -- osd_reqid_t --
 void osd_reqid_t::encode(bufferlist &bl) const
 {
index fbdc51d816400649dbc594f70812ae01f241fd15..3f464852398509bf9fe55cb3283b961f3788aef6 100644 (file)
@@ -2093,4 +2093,113 @@ struct obj_list_watch_response_t {
 
 WRITE_CLASS_ENCODER(obj_list_watch_response_t)
 
+struct clone_info {
+  static const snapid_t HEAD;
+
+  snapid_t cloneid;
+  vector<snapid_t> snaps;  // ascending
+  vector< pair<uint64_t,uint64_t> > overlap;
+  uint64_t size;
+
+  void encode(bufferlist& bl) const {
+    ENCODE_START(1, 1, bl);
+    ::encode(cloneid, bl);
+    ::encode(snaps, bl);
+    ::encode(overlap, bl);
+    ::encode(size, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(bufferlist::iterator& bl) {
+    DECODE_START(1, bl);
+    ::decode(cloneid, bl);
+    ::decode(snaps, bl);
+    ::decode(overlap, bl);
+    ::decode(size, bl);
+    DECODE_FINISH(bl);
+  }
+  void dump(Formatter *f) const {
+    if (cloneid == HEAD)
+      f->dump_string("cloneid", "HEAD");
+    else
+      f->dump_unsigned("cloneid", cloneid.val);
+    f->open_array_section("snapshots");
+    for (vector<snapid_t>::const_iterator p = snaps.begin(); p != snaps.end(); ++p) {
+      f->open_object_section("snap");
+      f->dump_unsigned("id", p->val);
+      f->close_section();
+    }
+    f->close_section();
+    f->open_array_section("overlaps");
+    for (vector< pair<uint64_t,uint64_t> >::const_iterator q = overlap.begin();
+          q != overlap.end(); ++q) {
+      f->open_object_section("overlap");
+      f->dump_unsigned("offset", q->first);
+      f->dump_unsigned("length", q->second);
+      f->close_section();
+    }
+    f->close_section();
+    f->dump_unsigned("size", size);
+  }
+  static void generate_test_instances(list<clone_info*>& o) {
+    o.push_back(new clone_info);
+    o.push_back(new clone_info);
+    o.back()->cloneid = 1;
+    o.back()->snaps.push_back(1);
+    o.back()->overlap.push_back(pair<uint64_t,uint64_t>(0,4096));
+    o.back()->overlap.push_back(pair<uint64_t,uint64_t>(8192,4096));
+    o.back()->size = 16384;
+    o.push_back(new clone_info);
+    o.back()->cloneid = HEAD;
+    o.back()->size = 32768;
+  }
+};
+
+WRITE_CLASS_ENCODER(clone_info)
+
+/**
+ * obj list snaps response format
+ *
+ */
+struct obj_list_snap_response_t {
+  vector<clone_info> clones;   // ascending
+
+  void encode(bufferlist& bl) const {
+    ENCODE_START(1, 1, bl);
+    ::encode(clones, bl);
+    ENCODE_FINISH(bl);
+  }
+  void decode(bufferlist::iterator& bl) {
+    DECODE_START(1, bl);
+    ::decode(clones, bl);
+    DECODE_FINISH(bl);
+  }
+  void dump(Formatter *f) const {
+    f->open_array_section("clones");
+    for (vector<clone_info>::const_iterator p = clones.begin(); p != clones.end(); ++p) {
+      f->open_object_section("clone");
+      p->dump(f);
+      f->close_section();
+    }
+    f->close_section();
+  }
+  static void generate_test_instances(list<obj_list_snap_response_t*>& o) {
+    o.push_back(new obj_list_snap_response_t);
+    o.push_back(new obj_list_snap_response_t);
+    clone_info cl;
+    cl.cloneid = 1;
+    cl.snaps.push_back(1);
+    cl.overlap.push_back(pair<uint64_t,uint64_t>(0,4096));
+    cl.overlap.push_back(pair<uint64_t,uint64_t>(8192,4096));
+    cl.size = 16384;
+    o.back()->clones.push_back(cl);
+    cl.cloneid = clone_info::HEAD;
+    cl.snaps.clear();
+    cl.overlap.clear();
+    cl.size = 32768;
+    o.back()->clones.push_back(cl);
+  }
+};
+
+WRITE_CLASS_ENCODER(obj_list_snap_response_t)
+
 #endif
index 6cc3cb88426acc115d87e697eb1a90630a2ad7d5..f9583400c6ecdbb0a01443191f10bcdff6bfd057 100644 (file)
@@ -25,6 +25,7 @@
 #include "common/admin_socket.h"
 #include "common/Timer.h"
 #include "include/rados/rados_types.h"
+#include "include/rados/rados_types.hpp"
 
 #include <list>
 #include <map>
@@ -344,6 +345,43 @@ struct ObjectOperation {
       }        
     }
   };
+  struct C_ObjectOperation_decodesnaps : public Context {
+    bufferlist bl;
+    librados::snap_set_t *psnaps;
+    int *prval;
+    C_ObjectOperation_decodesnaps(librados::snap_set_t *ps, int *pr)
+      : psnaps(ps), prval(pr) {}
+    void finish(int r) {
+      if (r >= 0) {
+       bufferlist::iterator p = bl.begin();
+       try {
+          obj_list_snap_response_t resp;
+         ::decode(resp, p);
+         if (psnaps) {
+
+            psnaps->clones.clear();
+            vector<clone_info>::iterator ci;
+            for (ci = resp.clones.begin(); ci != resp.clones.end(); ci++) {
+              librados::clone_info_t clone;
+
+              clone.cloneid = ci->cloneid;
+              clone.snaps.reserve(ci->snaps.size());
+              clone.snaps.insert(clone.snaps.end(), ci->snaps.begin(), ci->snaps.end());
+              clone.overlap = ci->overlap;
+              clone.size = ci->size;
+
+              psnaps->clones.push_back(clone);
+            }
+          }
+          *prval = 0;
+       }
+       catch (buffer::error& e) {
+         if (prval)
+           *prval = -EIO;
+       }
+      }
+    }
+  };
   void getxattrs(std::map<std::string,bufferlist> *pattrs, int *prval) {
     add_op(CEPH_OSD_OP_GETXATTRS);
     if (pattrs || prval) {
@@ -539,6 +577,18 @@ struct ObjectOperation {
     }
   }
 
+  void list_snaps(librados::snap_set_t *out, int *prval) {
+    (void)add_op(CEPH_OSD_OP_LIST_SNAPS);
+    if (prval || out) {
+      unsigned p = ops.size() - 1;
+      C_ObjectOperation_decodesnaps *h =
+       new C_ObjectOperation_decodesnaps(out, prval);
+      out_handler[p] = h;
+      out_bl[p] = &h->bl;
+      out_rval[p] = prval;
+    }
+  }
+
   void assert_version(uint64_t ver) {
     bufferlist bl;
     add_watch(CEPH_OSD_OP_ASSERT_VER, 0, ver, 0, bl);
index b69bb9fd07c51a27d410c3cb10ca8bb050e4857a..a554d7e114ea97e2cde9aab956f38575d8a5f69a 100644 (file)
@@ -63,6 +63,8 @@ TYPE(ObjectRecoveryProgress)
 TYPE(ScrubMap::object)
 TYPE(ScrubMap)
 TYPE(osd_peer_stat_t)
+TYPE(clone_info)
+TYPE(obj_list_snap_response_t)
 
 #include "os/ObjectStore.h"
 TYPE(ObjectStore::Transaction)