]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: move pg log and info to a per-pg object
authorSage Weil <sage@redhat.com>
Thu, 20 Nov 2014 22:43:37 +0000 (14:43 -0800)
committerSage Weil <sage@redhat.com>
Wed, 17 Dec 2014 01:07:57 +0000 (17:07 -0800)
Move all PG metadata (info, biginfo, epoch, and log) to a single per-PG
object.  This should make it easier for ObjectStore implementations to
avoid lock contention in the write path.

Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PG.h
src/osd/PGLog.cc
src/osd/PGLog.h
src/tools/ceph_objectstore_tool.cc

index e0dcf008f746ec01f25c804203e3024e7a50ab92..016760c99a2d6a2b08cbf5418476499d441525d5 100644 (file)
@@ -2784,7 +2784,7 @@ void OSD::load_pgs()
 
     dout(10) << "pgid " << pgid << " coll " << coll_t(pgid) << dendl;
     bufferlist bl;
-    epoch_t map_epoch = PG::peek_map_epoch(store, coll_t(pgid), service.infos_oid, &bl);
+    epoch_t map_epoch = PG::peek_map_epoch(store, pgid, service.infos_oid, &bl);
 
     PG *pg = _open_lock_pg(map_epoch == 0 ? osdmap : service.get_map(map_epoch), pgid);
     // there can be no waiters here, so we don't call wake_pg_waiters
@@ -4147,11 +4147,7 @@ void OSD::RemoveWQ::_process(
     return;
 
   ObjectStore::Transaction *t = new ObjectStore::Transaction;
-  PGLog::clear_info_log(
-    pg->info.pgid,
-    OSD::make_infos_oid(),
-    pg->log_oid,
-    t);
+  PGLog::clear_info_log(pg->info.pgid, t);
 
   for (list<coll_t>::iterator i = colls_to_remove.begin();
        i != colls_to_remove.end();
index 134efc71d3dfb190fc829b5c402b02b85490f07e..614c148d21130d1cdd1bd13d67738afd80d7d4d7 100644 (file)
 
 static coll_t META_COLL("meta");
 
+// prefix pgmeta_oid keys with _ so that PGLog::read_log() can
+// easily skip them
+const string infover_key("_infover");
+const string info_key("_info");
+const string biginfo_key("_biginfo");
+const string epoch_key("_epoch");
+
+
 template <class T>
 static ostream& _prefix(std::ostream *_dout, T *t)
 {
@@ -182,7 +190,6 @@ PG::PG(OSDService *o, OSDMapRef curmap,
   info_struct_v(0),
   coll(p), pg_log(cct),
   pgmeta_oid(p.make_pgmeta_oid()),
-  log_oid(OSD::make_pg_log_oid(p)),
   missing_loc(this),
   recovery_item(this), scrub_item(this), scrub_finalize_item(this), snap_trim_item(this), stat_queue_item(this),
   recovery_ops_active(0),
@@ -2462,6 +2469,42 @@ void PG::init(
 }
 
 void PG::upgrade(ObjectStore *store, const interval_set<snapid_t> &snapcolls)
+{
+  assert(info_struct_v <= 8);
+  ObjectStore::Transaction t;
+
+  if (info_struct_v < 7) {
+    _upgrade_v7(store, snapcolls);
+  }
+
+  // 7 -> 8
+  pg_log.mark_log_for_rewrite();
+  hobject_t log_oid(OSD::make_pg_log_oid(pg_id));
+  hobject_t biginfo_oid(OSD::make_pg_biginfo_oid(pg_id));
+  t.remove(META_COLL, log_oid);
+  t.remove(META_COLL, biginfo_oid);
+  t.collection_rmattr(coll, "info");
+
+  t.touch(coll, pgmeta_oid);
+  map<string,bufferlist> v;
+  __u8 ver = cur_struct_v;
+  ::encode(ver, v[infover_key]);
+  t.omap_setkeys(coll, pgmeta_oid, v);
+
+  dirty_info = true;
+  dirty_big_info = true;
+  write_if_dirty(t);
+
+  int r = store->apply_transaction(t);
+  if (r != 0) {
+    derr << __func__ << ": apply_transaction returned "
+        << cpp_strerror(r) << dendl;
+    assert(0);
+  }
+  assert(r == 0);
+}
+
+void PG::_upgrade_v7(ObjectStore *store, const interval_set<snapid_t> &snapcolls)
 {
   unsigned removed = 0;
   for (interval_set<snapid_t>::const_iterator i = snapcolls.begin();
@@ -2583,58 +2626,36 @@ void PG::upgrade(ObjectStore *store, const interval_set<snapid_t> &snapcolls)
     }
     objects.clear();
   }
-  ObjectStore::Transaction t;
   snap_collections.clear();
-  dirty_info = true;
-  write_if_dirty(t);
-  int r = store->apply_transaction(t);
-  if (r != 0) {
-    derr << __func__ << ": apply_transaction returned "
-        << cpp_strerror(r) << dendl;
-    assert(0);
-  }
-  assert(r == 0);
 }
 
 int PG::_write_info(ObjectStore::Transaction& t, epoch_t epoch,
-    pg_info_t &info, coll_t coll,
-    map<epoch_t,pg_interval_t> &past_intervals,
-    interval_set<snapid_t> &snap_collections,
-    hobject_t &infos_oid,
-    __u8 info_struct_v, bool dirty_big_info, bool force_ver)
+                   pg_info_t &info, coll_t coll,
+                   map<epoch_t,pg_interval_t> &past_intervals,
+                   interval_set<snapid_t> &snap_collections,
+                   ghobject_t &pgmeta_oid,
+                   bool dirty_big_info)
 {
   // pg state
-
-  if (info_struct_v > cur_struct_v)
-    return -EINVAL;
-
-  // Only need to write struct_v to attr when upgrading
-  if (force_ver || info_struct_v < cur_struct_v) {
-    bufferlist attrbl;
-    info_struct_v = cur_struct_v;
-    ::encode(info_struct_v, attrbl);
-    t.collection_setattr(coll, "info", attrbl);
-    dirty_big_info = true;
-  }
+  map<string,bufferlist> v;
 
   // info.  store purged_snaps separately.
   interval_set<snapid_t> purged_snaps;
-  map<string,bufferlist> v;
-  ::encode(epoch, v[get_epoch_key(info.pgid)]);
+  ::encode(epoch, v[epoch_key]);
   purged_snaps.swap(info.purged_snaps);
-  ::encode(info, v[get_info_key(info.pgid)]);
+  ::encode(info, v[info_key]);
   purged_snaps.swap(info.purged_snaps);
 
   if (dirty_big_info) {
     // potentially big stuff
-    bufferlist& bigbl = v[get_biginfo_key(info.pgid)];
+    bufferlist& bigbl = v[biginfo_key];
     ::encode(past_intervals, bigbl);
     ::encode(snap_collections, bigbl);
     ::encode(info.purged_snaps, bigbl);
     //dout(20) << "write_info bigbl " << bigbl.length() << dendl;
   }
 
-  t.omap_setkeys(META_COLL, infos_oid, v);
+  t.omap_setkeys(coll, pgmeta_oid, v);
 
   return 0;
 }
@@ -2659,6 +2680,13 @@ void PG::_init(ObjectStore::Transaction& t, spg_t pgid, const pg_pool_t *pool)
     uint32_t hint_type = ObjectStore::Transaction::COLL_HINT_EXPECTED_NUM_OBJECTS;
     t.collection_hint(coll, hint_type, hint);
   }
+
+  ghobject_t pgmeta_oid(pgid.make_pgmeta_oid());
+  t.touch(coll, pgmeta_oid);
+  map<string,bufferlist> values;
+  __u8 struct_v = cur_struct_v;
+  ::encode(struct_v, values[infover_key]);
+  t.omap_setkeys(coll, pgmeta_oid, values);
 }
 
 void PG::write_info(ObjectStore::Transaction& t)
@@ -2667,8 +2695,8 @@ void PG::write_info(ObjectStore::Transaction& t)
   unstable_stats.clear();
 
   int ret = _write_info(t, get_osdmap()->get_epoch(), info, coll,
-     past_intervals, snap_collections, osd->infos_oid,
-     info_struct_v, dirty_big_info);
+                       past_intervals, snap_collections, pgmeta_oid,
+                       dirty_big_info);
   assert(ret == 0);
   last_persisted_osdmap_ref = osdmap_ref;
 
@@ -2676,35 +2704,67 @@ void PG::write_info(ObjectStore::Transaction& t)
   dirty_big_info = false;
 }
 
-epoch_t PG::peek_map_epoch(ObjectStore *store, coll_t coll, hobject_t &infos_oid, bufferlist *bl)
+epoch_t PG::peek_map_epoch(ObjectStore *store,
+                          spg_t pgid,
+                          hobject_t &legacy_infos_oid,
+                          bufferlist *bl)
 {
-  assert(bl);
-  spg_t pgid;
-  snapid_t snap;
-  bool ok = coll.is_pg(pgid, snap);
-  assert(ok);
-  int r = store->collection_getattr(coll, "info", *bl);
-  assert(r > 0);
-  bufferlist::iterator bp = bl->begin();
-  __u8 struct_v = 0;
-  ::decode(struct_v, bp);
-  if (struct_v < 5)
-    return 0;
+  coll_t coll(pgid);
+  ghobject_t pgmeta_oid(pgid.make_pgmeta_oid());
   epoch_t cur_epoch = 0;
-  if (struct_v < 6) {
+
+  assert(bl);
+  {
+    // validate collection name
+    spg_t pgid_temp;
+    snapid_t snap;
+    bool ok = coll.is_pg(pgid_temp, snap);
+    assert(ok);
+  }
+
+  // try for v8
+  set<string> keys;
+  keys.insert(infover_key);
+  keys.insert(epoch_key);
+  map<string,bufferlist> values;
+  int r = store->omap_get_values(coll, pgmeta_oid, keys, &values);
+  if (r == 0) {
+    assert(values.size() == 2);
+
+    // sanity check version
+    bufferlist::iterator bp = values[infover_key].begin();
+    __u8 struct_v = 0;
+    ::decode(struct_v, bp);
+    assert(struct_v >= 8);
+
+    // get epoch
+    bp = values[epoch_key].begin();
     ::decode(cur_epoch, bp);
-  } else {
+  } else if (r == -ENOENT) {
+    // legacy: try v7 or older
+    r = store->collection_getattr(coll, "info", *bl);
+    assert(r > 0);
+    bufferlist::iterator bp = bl->begin();
+    __u8 struct_v = 0;
+    ::decode(struct_v, bp);
+    if (struct_v < 5)
+      return 0;
+    if (struct_v < 6) {
+      ::decode(cur_epoch, bp);
+      return cur_epoch;
+    }
+
     // get epoch out of leveldb
-    bufferlist tmpbl;
     string ek = get_epoch_key(pgid);
-    set<string> keys;
-    keys.insert(get_epoch_key(pgid));
-    map<string,bufferlist> values;
-    store->omap_get_values(META_COLL, infos_oid, keys, &values);
+    keys.clear();
+    values.clear();
+    keys.insert(ek);
+    store->omap_get_values(META_COLL, legacy_infos_oid, keys, &values);
     assert(values.size() == 1);
-    tmpbl = values[ek];
-    bufferlist::iterator p = tmpbl.begin();
+    bufferlist::iterator p = values[ek].begin();
     ::decode(cur_epoch, p);
+  } else {
+    assert(0 == "unable to open pg metadata");
   }
   return cur_epoch;
 }
@@ -2713,7 +2773,7 @@ void PG::write_if_dirty(ObjectStore::Transaction& t)
 {
   if (dirty_big_info || dirty_info)
     write_info(t);
-  pg_log.write_log(t, log_oid);
+  pg_log.write_log(t, coll, pgmeta_oid);
 }
 
 void PG::trim_peers()
@@ -2806,7 +2866,7 @@ void PG::append_log(
   }
 
   dout(10) << "append_log  adding " << keys.size() << " keys" << dendl;
-  t.omap_setkeys(META_COLL, log_oid, keys);
+  t.omap_setkeys(coll, pgmeta_oid, keys);
 
   pg_log.trim(&handler, trim_to, info);
 
@@ -2844,11 +2904,37 @@ std::string PG::get_corrupt_pg_log_name() const
 }
 
 int PG::read_info(
-  ObjectStore *store, const coll_t &coll, bufferlist &bl,
+  ObjectStore *store, spg_t pgid, const coll_t &coll, bufferlist &bl,
   pg_info_t &info, map<epoch_t,pg_interval_t> &past_intervals,
-  hobject_t &biginfo_oid, hobject_t &infos_oid,
+  hobject_t &infos_oid,
   interval_set<snapid_t>  &snap_collections, __u8 &struct_v)
 {
+  // try for v8 or later
+  set<string> keys;
+  keys.insert(infover_key);
+  keys.insert(info_key);
+  keys.insert(biginfo_key);
+  ghobject_t pgmeta_oid(pgid.make_pgmeta_oid());
+  map<string,bufferlist> values;
+  int r = store->omap_get_values(coll, pgmeta_oid, keys, &values);
+  if (r == 0) {
+    assert(values.size() == 3);
+
+    bufferlist::iterator p = values[infover_key].begin();
+    ::decode(struct_v, p);
+    assert(struct_v >= 8);
+
+    p = values[info_key].begin();
+    ::decode(info, p);
+
+    p = values[biginfo_key].begin();
+    ::decode(past_intervals, p);
+    ::decode(snap_collections, p);
+    ::decode(info.purged_snaps, p);
+    return 0;
+  }
+
+  // legacy (ver < 8)
   bufferlist::iterator p = bl.begin();
   bufferlist lbl;
 
@@ -2865,6 +2951,7 @@ int PG::read_info(
     ::decode(struct_v, p);
   } else {
     if (struct_v < 6) {
+      hobject_t biginfo_oid(OSD::make_pg_biginfo_oid(pgid));
       int r = store->read(META_COLL, biginfo_oid, 0, 0, lbl);
       if (r < 0)
         return r;
@@ -2911,23 +2998,23 @@ int PG::read_info(
 
 void PG::read_state(ObjectStore *store, bufferlist &bl)
 {
-  hobject_t biginfo_oid(OSD::make_pg_biginfo_oid(pg_id));
-
-  int r = read_info(store, coll, bl, info, past_intervals, biginfo_oid,
-    osd->infos_oid, snap_collections, info_struct_v);
+  int r = read_info(store, pg_id, coll, bl, info, past_intervals,
+                   osd->infos_oid, snap_collections, info_struct_v);
   assert(r >= 0);
 
   ostringstream oss;
-  if (pg_log.read_log(
-      store, coll, META_COLL, log_oid, info,
-      oss)) {
+  if (pg_log.read_log(store,
+                     coll,
+                     info_struct_v < 8 ? META_COLL : coll,
+                     info_struct_v < 8 ? OSD::make_pg_log_oid(pg_id) : pgmeta_oid,
+                     info, oss)) {
     /* We don't want to leave the old format around in case the next log
      * write happens to be an append_log()
      */
     pg_log.mark_log_for_rewrite();
     ObjectStore::Transaction t;
-    t.remove(coll_t(), log_oid); // remove old version
-    pg_log.write_log(t, log_oid);
+    t.remove(META_COLL, log_oid); // remove old version
+    pg_log.write_log(t, coll, pgmeta_oid);
     int r = osd->store->apply_transaction(t);
     assert(!r);
   }
index d9f97dd23b399f2052754ab42361218b7c36c975..4a804c80bcc3580783bad129ae9dbacba68562d1 100644 (file)
@@ -280,13 +280,14 @@ public:
   // pg state
   pg_info_t        info;
   __u8 info_struct_v;
-  static const __u8 cur_struct_v = 7;
+  static const __u8 cur_struct_v = 8;
   bool must_upgrade() {
-    return info_struct_v < 7;
+    return info_struct_v < 8;
   }
   void upgrade(
     ObjectStore *store,
     const interval_set<snapid_t> &snapcolls);
+  void _upgrade_v7(ObjectStore *store, const interval_set<snapid_t> &snapcolls);
 
   const coll_t coll;
   PGLog  pg_log;
@@ -300,7 +301,6 @@ public:
     return stringify(pgid) + "_epoch";
   }
   ghobject_t    pgmeta_oid;
-  hobject_t    log_oid;
 
   class MissingLoc {
     map<hobject_t, pg_missing_t::item> needs_recovery_map;
@@ -2100,8 +2100,8 @@ public:
     pg_info_t &info, coll_t coll,
     map<epoch_t,pg_interval_t> &past_intervals,
     interval_set<snapid_t> &snap_collections,
-    hobject_t &infos_oid,
-    __u8 info_struct_v, bool dirty_big_info, bool force_ver = false);
+    ghobject_t &pgmeta_oid,
+    bool dirty_big_info);
   void write_if_dirty(ObjectStore::Transaction& t);
 
   eversion_t get_next_version() const {
@@ -2124,13 +2124,14 @@ public:
 
   std::string get_corrupt_pg_log_name() const;
   static int read_info(
-    ObjectStore *store, const coll_t &coll,
+    ObjectStore *store, spg_t pgid, const coll_t &coll,
     bufferlist &bl, pg_info_t &info, map<epoch_t,pg_interval_t> &past_intervals,
-    hobject_t &biginfo_oid, hobject_t &infos_oid,
+    hobject_t &infos_oid,
     interval_set<snapid_t>  &snap_collections, __u8 &);
   void read_state(ObjectStore *store, bufferlist &bl);
-  static epoch_t peek_map_epoch(ObjectStore *store, coll_t coll,
-                               hobject_t &infos_oid, bufferlist *bl);
+  static epoch_t peek_map_epoch(ObjectStore *store, spg_t pgid,
+                               hobject_t &legacy_infos_oid,
+                               bufferlist *bl);
   void update_snap_map(
     vector<pg_log_entry_t> &log_entries,
     ObjectStore::Transaction& t);
index 6ea2d7d606a3e148454a8d9d88c1d0a8ea0af743..bdb507c0dfb1212f11c70c6200d7da9dc219899a 100644 (file)
@@ -146,17 +146,9 @@ void PGLog::clear() {
 
 void PGLog::clear_info_log(
   spg_t pgid,
-  const hobject_t &infos_oid,
-  const hobject_t &log_oid,
   ObjectStore::Transaction *t) {
-
-  set<string> keys_to_remove;
-  keys_to_remove.insert(PG::get_epoch_key(pgid));
-  keys_to_remove.insert(PG::get_biginfo_key(pgid));
-  keys_to_remove.insert(PG::get_info_key(pgid));
-
-  t->remove(META_COLL, log_oid);
-  t->omap_rmkeys(META_COLL, infos_oid, keys_to_remove);
+  coll_t coll(pgid);
+  t->remove(coll, pgid.make_pgmeta_oid());
 }
 
 void PGLog::trim(
@@ -690,7 +682,7 @@ void PGLog::merge_log(ObjectStore::Transaction& t,
 }
 
 void PGLog::write_log(
-  ObjectStore::Transaction& t, const ghobject_t &log_oid)
+  ObjectStore::Transaction& t, const coll_t& coll, const ghobject_t &log_oid)
 {
   if (is_dirty()) {
     dout(10) << "write_log with: "
@@ -701,7 +693,7 @@ void PGLog::write_log(
             << ", trimmed: " << trimmed
             << dendl;
     _write_log(
-      t, log, log_oid, divergent_priors,
+      t, log, coll, log_oid, divergent_priors,
       dirty_to,
       dirty_from,
       writeout_from,
@@ -716,10 +708,11 @@ void PGLog::write_log(
 }
 
 void PGLog::write_log(ObjectStore::Transaction& t, pg_log_t &log,
-    const ghobject_t &log_oid, map<eversion_t, hobject_t> &divergent_priors)
+    const coll_t& coll, const ghobject_t &log_oid,
+    map<eversion_t, hobject_t> &divergent_priors)
 {
   _write_log(
-    t, log, log_oid,
+    t, log, coll, log_oid,
     divergent_priors, eversion_t::max(), eversion_t(), eversion_t(),
     set<eversion_t>(),
     true, true, 0);
@@ -727,7 +720,8 @@ void PGLog::write_log(ObjectStore::Transaction& t, pg_log_t &log,
 
 void PGLog::_write_log(
   ObjectStore::Transaction& t, pg_log_t &log,
-  const ghobject_t &log_oid, map<eversion_t, hobject_t> &divergent_priors,
+  const coll_t& coll, const ghobject_t &log_oid,
+  map<eversion_t, hobject_t> &divergent_priors,
   eversion_t dirty_to,
   eversion_t dirty_from,
   eversion_t writeout_from,
@@ -750,17 +744,17 @@ void PGLog::_write_log(
 
 //dout(10) << "write_log, clearing up to " << dirty_to << dendl;
   if (touch_log)
-    t.touch(coll_t(), log_oid);
+    t.touch(coll, log_oid);
   if (dirty_to != eversion_t()) {
     t.omap_rmkeyrange(
-      coll_t(), log_oid,
+      coll, log_oid,
       eversion_t().get_key_name(), dirty_to.get_key_name());
     clear_up_to(log_keys_debug, dirty_to.get_key_name());
   }
   if (dirty_to != eversion_t::max() && dirty_from != eversion_t::max()) {
     //   dout(10) << "write_log, clearing from " << dirty_from << dendl;
     t.omap_rmkeyrange(
-      coll_t(), log_oid,
+      coll, log_oid,
       dirty_from.get_key_name(), eversion_t::max().get_key_name());
     clear_after(log_keys_debug, dirty_from.get_key_name());
   }
@@ -801,8 +795,8 @@ void PGLog::_write_log(
   ::encode(log.rollback_info_trimmed_to, keys["rollback_info_trimmed_to"]);
 
   if (!to_remove.empty())
-    t.omap_rmkeys(META_COLL, log_oid, to_remove);
-  t.omap_setkeys(META_COLL, log_oid, keys);
+    t.omap_rmkeys(coll, log_oid, to_remove);
+  t.omap_setkeys(coll, log_oid, keys);
 }
 
 bool PGLog::read_log(ObjectStore *store, coll_t pg_coll,
@@ -832,6 +826,9 @@ bool PGLog::read_log(ObjectStore *store, coll_t pg_coll,
 
     ObjectMap::ObjectMapIterator p = store->get_omap_iterator(log_coll, log_oid);
     if (p) for (p->seek_to_first(); p->valid() ; p->next()) {
+      // non-log pgmeta_oid keys are prefixed with _; skip those
+      if (p->key()[0] == '_')
+       continue;
       bufferlist bl = p->value();//Copy bufferlist before creating iterator
       bufferlist::iterator bp = bl.begin();
       if (p->key() == "divergent_priors") {
index 407abbc31557ec44abe2abd5b08b765a2edee3ea..5ac3a7fc38615371cb185c80cd0a30d081f7beb8 100644 (file)
@@ -358,8 +358,6 @@ public:
 
   static void clear_info_log(
     spg_t pgid,
-    const hobject_t &infos_oid,
-    const hobject_t &log_oid,
     ObjectStore::Transaction *t);
 
   void trim(
@@ -543,14 +541,17 @@ public:
                 pg_info_t &info, LogEntryHandler *rollbacker,
                 bool &dirty_info, bool &dirty_big_info);
 
-  void write_log(ObjectStore::Transaction& t, const ghobject_t &log_oid);
+  void write_log(ObjectStore::Transaction& t, const coll_t& coll,
+                const ghobject_t &log_oid);
 
   static void write_log(ObjectStore::Transaction& t, pg_log_t &log,
+    const coll_t& coll,
     const ghobject_t &log_oid, map<eversion_t, hobject_t> &divergent_priors);
 
   static void _write_log(
     ObjectStore::Transaction& t, pg_log_t &log,
-    const ghobject_t &log_oid, map<eversion_t, hobject_t> &divergent_priors,
+    const coll_t& coll, const ghobject_t &log_oid,
+    map<eversion_t, hobject_t> &divergent_priors,
     eversion_t dirty_to,
     eversion_t dirty_from,
     eversion_t writeout_from,
index fb3bb45bbcbd79a47756adaf720b03179638f804..69abc6677c48eef4b473bb8f7fb8224b0b199ce8 100644 (file)
@@ -515,7 +515,8 @@ struct lookup_ghobject : public action_on_object_t {
 };
 
 hobject_t infos_oid = OSD::make_infos_oid();
-hobject_t biginfo_oid, log_oid;
+ghobject_t log_oid;
+hobject_t biginfo_oid;
 
 int file_fd = fd_none;
 bool debug = false;
@@ -597,13 +598,18 @@ static void invalid_filestore_path(string &path)
   exit(1);
 }
 
-int get_log(ObjectStore *fs, coll_t coll, spg_t pgid, const pg_info_t &info,
+int get_log(ObjectStore *fs, __u8 struct_ver,
+   coll_t coll, spg_t pgid, const pg_info_t &info,
    PGLog::IndexedLog &log, pg_missing_t &missing)
 {
   map<eversion_t, hobject_t> divergent_priors;
   try {
     ostringstream oss;
-    PGLog::read_log(fs, coll, META_COLL, log_oid, info, divergent_priors, log, missing, oss);
+    assert(struct_ver > 0);
+    PGLog::read_log(fs, coll,
+                   struct_ver >= 8 ? coll : META_COLL,
+                   struct_ver >= 8 ? pgid.make_pgmeta_oid() : log_oid,
+                   info, divergent_priors, log, missing, oss);
     if (debug && oss.str().size())
       cerr << oss.str() << std::endl;
   }
@@ -714,7 +720,7 @@ int initiate_new_remove_pg(ObjectStore *store, spg_t r_pgid)
     return ENOENT;
   }
 
-  cout << "remove " << META_COLL << " " << log_oid.oid << std::endl;
+  cout << "remove " << META_COLL << " " << log_oid.hobj.oid << std::endl;
   rmt->remove(META_COLL, log_oid);
   cout << "remove " << META_COLL << " " << biginfo_oid.oid << std::endl;
   rmt->remove(META_COLL, biginfo_oid);
@@ -769,31 +775,27 @@ int write_info(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
   //Empty for this
   interval_set<snapid_t> snap_collections; // obsolete
   coll_t coll(info.pgid);
-
+  ghobject_t pgmeta_oid(info.pgid.make_pgmeta_oid());
   int ret = PG::_write_info(t, epoch,
     info, coll,
     past_intervals,
     snap_collections,
-    infos_oid,
-    struct_ver,
-    true, true);
+    pgmeta_oid,
+    true);
   if (ret < 0) ret = -ret;
   if (ret) cerr << "Failed to write info" << std::endl;
   return ret;
 }
 
-void write_log(ObjectStore::Transaction &t, pg_log_t &log)
-{
-  map<eversion_t, hobject_t> divergent_priors;
-  PGLog::write_log(t, log, log_oid, divergent_priors);
-}
-
 int write_pg(ObjectStore::Transaction &t, epoch_t epoch, pg_info_t &info,
     pg_log_t &log, __u8 struct_ver, map<epoch_t,pg_interval_t> &past_intervals)
 {
   int ret = write_info(t, epoch, info, struct_ver, past_intervals);
-  if (ret) return ret;
-  write_log(t, log);
+  if (ret)
+    return ret;
+  map<eversion_t, hobject_t> divergent_priors;
+  coll_t coll(info.pgid);
+  PGLog::write_log(t, log, coll, info.pgid.make_pgmeta_oid(), divergent_priors);
   return 0;
 }
 
@@ -979,7 +981,7 @@ int do_export(ObjectStore *fs, coll_t coll, spg_t pgid, pg_info_t &info,
 
   cerr << "Exporting " << pgid << std::endl;
 
-  int ret = get_log(fs, coll, pgid, info, log, missing);
+  int ret = get_log(fs, struct_ver, coll, pgid, info, log, missing);
   if (ret > 0)
       return ret;
 
@@ -1569,6 +1571,7 @@ int do_import(ObjectStore *store, OSDSuperblock& sb)
     return 1;
   }
 
+  ghobject_t pgmeta_oid = pgid.make_pgmeta_oid();
   log_oid = OSD::make_pg_log_oid(pgid);
   biginfo_oid = OSD::make_pg_biginfo_oid(pgid);
 
@@ -2777,17 +2780,15 @@ int main(int argc, char **argv)
     }
 
     bufferlist bl;
-    map_epoch = PG::peek_map_epoch(fs, coll, infos_oid, &bl);
+    map_epoch = PG::peek_map_epoch(fs, pgid, infos_oid, &bl);
     if (debug)
       cerr << "map_epoch " << map_epoch << std::endl;
 
     pg_info_t info(pgid);
     map<epoch_t,pg_interval_t> past_intervals;
-    hobject_t biginfo_oid = OSD::make_pg_biginfo_oid(pgid);
     interval_set<snapid_t> snap_collections;
-
     __u8 struct_ver;
-    r = PG::read_info(fs, coll, bl, info, past_intervals, biginfo_oid,
+    r = PG::read_info(fs, pgid, coll, bl, info, past_intervals,
       infos_oid, snap_collections, struct_ver);
     if (r < 0) {
       cerr << "read_info error " << cpp_strerror(-r) << std::endl;
@@ -2810,7 +2811,7 @@ int main(int argc, char **argv)
     } else if (op == "log") {
       PGLog::IndexedLog log;
       pg_missing_t missing;
-      ret = get_log(fs, coll, pgid, info, log, missing);
+      ret = get_log(fs, struct_ver, coll, pgid, info, log, missing);
       if (ret > 0)
           goto out;