]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
OSD,PG: add upgrade procedure for snap_mapper
authorSamuel Just <sam.just@inktank.com>
Mon, 4 Mar 2013 19:16:05 +0000 (11:16 -0800)
committerSamuel Just <sam.just@inktank.com>
Thu, 14 Mar 2013 02:45:12 +0000 (19:45 -0700)
Also, sub_op_modify transactions currently carry the operations
for creating snap links in the shipped transaction.  To handle
ops shipped by unenlightened osds, transactions can now be
tagged with a tolerate_collection_add_enoent flag.

Signed-off-by: Samuel Just <sam.just@inktank.com>
src/include/ceph_features.h
src/os/FileStore.cc
src/os/ObjectStore.h
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PG.h
src/osd/ReplicatedPG.cc
src/osd/osd_types.h

index 111e924055f6df1c2efbe1bcf0c9ac3f952237b5..b99faa9c86143d28f42355e7085c7539c8aa279c 100644 (file)
@@ -36,6 +36,7 @@
 #define CEPH_FEATURE_MDSENC         (1<<29)
 #define CEPH_FEATURE_OSDHASHPSPOOL  (1<<30)
 #define CEPH_FEATURE_MON_SINGLE_PAXOS (1<<31)
+#define CEPH_FEATURE_OSD_SNAPMAPPER (1L<<32)
 
 /*
  * Features supported.  Should be everything above.
@@ -72,7 +73,8 @@
         CEPH_FEATURE_OSD_HBMSGS |              \
         CEPH_FEATURE_MDSENC |                  \
         CEPH_FEATURE_OSDHASHPSPOOL |       \
-        CEPH_FEATURE_MON_SINGLE_PAXOS)
+        CEPH_FEATURE_MON_SINGLE_PAXOS |    \
+   CEPH_FEATURE_OSD_SNAPMAPPER)
 
 #define CEPH_FEATURES_SUPPORTED_DEFAULT  CEPH_FEATURES_ALL
 
index 7ebbe4bb707a16d74e6f0d100a97427a8c449d1b..e61d489e89fd69b8f2dd8fac6ba55aff941364a5 100644 (file)
@@ -2621,6 +2621,10 @@ unsigned FileStore::_do_transaction(Transaction& t, uint64_t op_seq, int trans_n
        // -ENOENT is normally okay
        // ...including on a replayed OP_RMCOLL with !stable_commits
        ok = true;
+      if (r == -ENOENT && (
+         op == Transaction::OP_COLL_ADD &&
+         i.tolerate_collection_add_enoent()))
+       ok = true; // Hack for upgrade from snapcolls to snapmapper
       if (r == -ENODATA)
        ok = true;
 
index 23265041f29a4ca1c6df8e4567a2443ab0493606..ebcf058dce223ed8cacfc5892cadc3860c15b1c2 100644 (file)
@@ -166,12 +166,16 @@ public:
     int64_t pool_override;
     bool use_pool_override;
     bool replica;
+    bool tolerate_collection_add_enoent;
 
     list<Context *> on_applied;
     list<Context *> on_commit;
     list<Context *> on_applied_sync;
 
   public:
+    void set_tolerate_collection_add_enoent() {
+      tolerate_collection_add_enoent = true;
+    }
     void register_on_applied(Context *c) {
       on_applied.push_back(c);
     }
@@ -294,17 +298,23 @@ public:
       int64_t pool_override;
       bool use_pool_override;
       bool replica;
+      bool _tolerate_collection_add_enoent;
 
       iterator(Transaction *t)
        : p(t->tbl.begin()),
          sobject_encoding(t->sobject_encoding),
          pool_override(t->pool_override),
          use_pool_override(t->use_pool_override),
-         replica(t->replica) {}
+         replica(t->replica),
+         _tolerate_collection_add_enoent(
+           t->tolerate_collection_add_enoent) {}
 
       friend class Transaction;
 
     public:
+      bool tolerate_collection_add_enoent() const {
+       return _tolerate_collection_add_enoent;
+      }
       bool have_op() {
        return !p.end();
       }
@@ -628,31 +638,40 @@ public:
     // etc.
     Transaction() :
       ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0),
-      sobject_encoding(false), pool_override(-1), use_pool_override(false), replica(false) {}
+      sobject_encoding(false), pool_override(-1), use_pool_override(false),
+      replica(false),
+      tolerate_collection_add_enoent(false) {}
+
     Transaction(bufferlist::iterator &dp) :
       ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0),
-      sobject_encoding(false), pool_override(-1), use_pool_override(false), replica(false) {
+      sobject_encoding(false), pool_override(-1), use_pool_override(false),
+      replica(false),
+      tolerate_collection_add_enoent(false) {
       decode(dp);
     }
+
     Transaction(bufferlist &nbl) :
       ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0),
-      sobject_encoding(false), pool_override(-1), use_pool_override(false), replica(false) {
+      sobject_encoding(false), pool_override(-1), use_pool_override(false),
+      replica(false),
+      tolerate_collection_add_enoent(false) {
       bufferlist::iterator dp = nbl.begin();
       decode(dp); 
     }
 
     void encode(bufferlist& bl) const {
-      ENCODE_START(6, 5, bl);
+      ENCODE_START(7, 5, bl);
       ::encode(ops, bl);
       ::encode(pad_unused_bytes, bl);
       ::encode(largest_data_len, bl);
       ::encode(largest_data_off, bl);
       ::encode(largest_data_off_in_tbl, bl);
       ::encode(tbl, bl);
+      ::encode(tolerate_collection_add_enoent, bl);
       ENCODE_FINISH(bl);
     }
     void decode(bufferlist::iterator &bl) {
-      DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl);
+      DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl);
       DECODE_OLDEST(2);
       if (struct_v < 4)
        sobject_encoding = true;
@@ -666,10 +685,13 @@ public:
        ::decode(largest_data_off_in_tbl, bl);
       }
       ::decode(tbl, bl);
-      DECODE_FINISH(bl);
       if (struct_v < 6) {
        use_pool_override = true;
       }
+      if (struct_v >= 7) {
+       ::decode(tolerate_collection_add_enoent, bl);
+      }
+      DECODE_FINISH(bl);
     }
 
     void dump(ceph::Formatter *f);
index fd23dd569838c8633f6f0c09e6d62dda61e7eea9..f2cf321d9971f6ade231c95009b8960ef87f9449 100644 (file)
@@ -141,6 +141,7 @@ static CompatSet get_osd_compat_set() {
   ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BIGINFO);
   ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO);
   ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG);
+  ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER);
   return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat,
                   ceph_osd_feature_incompat);
 }
@@ -1616,6 +1617,14 @@ void OSD::load_pgs()
     // read pg state, log
     pg->read_state(store, bl);
 
+    if (pg->must_upgrade()) {
+      derr << "PG " << pg->info.pgid
+          << " must upgrade..." << dendl;
+      pg->upgrade(store, i->second);
+    } else {
+      assert(i->second.empty());
+    }
+
     set<pg_t> split_pgs;
     if (osdmap->have_pg_pool(pg->info.pgid.pool()) &&
        pg->info.pgid.is_split(pg->get_osdmap()->get_pg_num(pg->info.pgid.pool()),
index 49e522b3123abf219b0d83b9430b5e8a4c3032aa..b4b298144aa17beb76f28395b14983b9a7d12412 100644 (file)
@@ -2352,10 +2352,149 @@ void PG::init(int role, vector<int>& newup, vector<int>& newacting, pg_history_t
   write_if_dirty(*t);
 }
 
+void PG::upgrade(
+  ObjectStore *store,
+  const interval_set<snapid_t> &snapcolls) {
+  unsigned removed = 0;
+  for (interval_set<snapid_t>::const_iterator i = snapcolls.begin();
+       i != snapcolls.end();
+       ++i) {
+    for (snapid_t next_dir = i.get_start();
+        next_dir != i.get_start() + i.get_len();
+        ++next_dir) {
+      ++removed;
+      coll_t cid(info.pgid, next_dir);
+      dout(1) << "Removing collection " << cid
+             << " (" << removed << "/" << snapcolls.size()
+             << ")" << dendl;
+
+      hobject_t cur;
+      vector<hobject_t> objects;
+      while (1) {
+       int r = store->collection_list_partial(
+         cid,
+         cur,
+         store->get_ideal_list_min(),
+         store->get_ideal_list_max(),
+         0,
+         &objects,
+         &cur);
+       if (r != 0) {
+         derr << __func__ << ": collection_list_partial returned "
+              << cpp_strerror(r) << dendl;
+         assert(0);
+       }
+       if (objects.empty()) {
+         assert(cur.is_max());
+         break;
+       }
+       ObjectStore::Transaction t;
+       for (vector<hobject_t>::iterator j = objects.begin();
+            j != objects.end();
+            ++j) {
+         t.remove(cid, *j);
+       }
+       r = store->apply_transaction(t);
+       if (r != 0) {
+         derr << __func__ << ": apply_transaction returned "
+              << cpp_strerror(r) << dendl;
+         assert(0);
+       }
+       objects.clear();
+      }
+      ObjectStore::Transaction t;
+      t.remove_collection(cid);
+      int r = store->apply_transaction(t);
+      if (r != 0) {
+       derr << __func__ << ": apply_transaction returned "
+            << cpp_strerror(r) << dendl;
+       assert(0);
+      }
+    }
+  }
+
+  hobject_t cur;
+  coll_t cid(info.pgid);
+  unsigned done = 0;
+  vector<hobject_t> objects;
+  while (1) {
+    dout(1) << "Updating snap_mapper from main collection, "
+           << done << " objects done" << dendl;
+    int r = store->collection_list_partial(
+      cid,
+      cur,
+      store->get_ideal_list_min(),
+      store->get_ideal_list_max(),
+      0,
+      &objects,
+      &cur);
+    if (r != 0) {
+      derr << __func__ << ": collection_list_partial returned "
+          << cpp_strerror(r) << dendl;
+      assert(0);
+    }
+    if (objects.empty()) {
+      assert(cur.is_max());
+      break;
+    }
+    done += objects.size();
+    ObjectStore::Transaction t;
+    for (vector<hobject_t>::iterator j = objects.begin();
+        j != objects.end();
+        ++j) {
+      if (j->snap < CEPH_MAXSNAP) {
+       OSDriver::OSTransaction _t(osdriver.get_transaction(&t));
+       bufferptr bp;
+       r = store->getattr(
+         cid,
+         *j,
+         OI_ATTR,
+         bp);
+       if (r < 0) {
+         derr << __func__ << ": getattr returned "
+              << cpp_strerror(r) << dendl;
+         assert(0);
+       }
+       bufferlist bl;
+       bl.push_back(bp);
+       object_info_t oi(bl);
+       set<snapid_t> oi_snaps(oi.snaps.begin(), oi.snaps.end());
+       set<snapid_t> cur_snaps;
+       r = snap_mapper.get_snaps(*j, &cur_snaps);
+       if (r == 0) {
+         assert(cur_snaps == oi_snaps);
+       } else if (r == -ENOENT) {
+         snap_mapper.add_oid(*j, oi_snaps, &_t);
+       } else {
+         derr << __func__ << ": get_snaps returned "
+              << cpp_strerror(r) << dendl;
+         assert(0);
+       }
+      }
+    }
+    r = store->apply_transaction(t);
+    if (r != 0) {
+      derr << __func__ << ": apply_transaction returned "
+          << cpp_strerror(r) << dendl;
+      assert(0);
+    }
+    objects.clear();
+  }
+  ObjectStore::Transaction t;
+  dirty_info = true;
+  write_if_dirty(t);
+  int r = store->apply_transaction(t);
+  if (r != 0) {
+    derr << __func__ << ": apply_transaction returned "
+        << cpp_strerror(r) << dendl;
+    assert(0);
+  }
+  assert(r == 0);
+}
+
 void PG::write_info(ObjectStore::Transaction& t)
 {
   // pg state
-  __u8 cur_struct_v = 6;
 
   assert(info_struct_v <= cur_struct_v);
 
index fdfe25d61d862ceaa423dd23e4ed46e0c3cfe5e2..0488a7ebaa98b75d096842bf30fe65956d101f02 100644 (file)
@@ -440,6 +440,14 @@ public:
   // pg state
   pg_info_t        info;
   __u8 info_struct_v;
+  static const __u8 cur_struct_v = 7;
+  bool must_upgrade() {
+    return info_struct_v < 7;
+  }
+  void upgrade(
+    ObjectStore *store,
+    const interval_set<snapid_t> &snapcolls);
+
   const coll_t coll;
   IndexedLog  log;
   static string get_info_key(pg_t pgid) {
index 9821c4fd4085b435f33898ac9e7b3a8a48ada15a..23d71f11e5ed97c53387e716b36249e463128465 100644 (file)
@@ -4558,6 +4558,8 @@ void ReplicatedPG::sub_op_modify(OpRequestRef op)
       bufferlist::iterator p = m->get_data().begin();
 
       ::decode(rm->opt, p);
+      if (!(m->get_connection()->get_features() & CEPH_FEATURE_OSD_SNAPMAPPER))
+       rm->opt.set_tolerate_collection_add_enoent();
       p = m->logbl.begin();
       ::decode(log, p);
       if (m->hobject_incorrect_pool) {
index c84000d696809d82207487a78f01622b0c0a2901..d20e842859cd57e4d2b2d6d17f8bcc192d2eca25 100644 (file)
@@ -40,6 +40,7 @@
 #define CEPH_OSD_FEATURE_INCOMPAT_BIGINFO CompatSet::Feature(7, "biginfo")
 #define CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO CompatSet::Feature(8, "leveldbinfo")
 #define CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG CompatSet::Feature(9, "leveldblog")
+#define CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER CompatSet::Feature(10, "snapmapper")
 
 
 typedef hobject_t collection_list_handle_t;