From: Samuel Just Date: Mon, 4 Mar 2013 19:16:05 +0000 (-0800) Subject: OSD,PG: add upgrade procedure for snap_mapper X-Git-Tag: v0.60~78^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=86658392516d5175b2756659ef7ffaaf95b0f8ad;p=ceph.git OSD,PG: add upgrade procedure for snap_mapper Also, sub_op_modify transactions currently carry the operations for creating snap links in the shipped transaction. To handle ops shipped by unenlightened osds, transactions can now be tagged with a tolerate_collection_add_enoent flag. Signed-off-by: Samuel Just --- diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h index 111e924055f6..b99faa9c8614 100644 --- a/src/include/ceph_features.h +++ b/src/include/ceph_features.h @@ -36,6 +36,7 @@ #define CEPH_FEATURE_MDSENC (1<<29) #define CEPH_FEATURE_OSDHASHPSPOOL (1<<30) #define CEPH_FEATURE_MON_SINGLE_PAXOS (1<<31) +#define CEPH_FEATURE_OSD_SNAPMAPPER (1L<<32) /* * Features supported. Should be everything above. @@ -72,7 +73,8 @@ CEPH_FEATURE_OSD_HBMSGS | \ CEPH_FEATURE_MDSENC | \ CEPH_FEATURE_OSDHASHPSPOOL | \ - CEPH_FEATURE_MON_SINGLE_PAXOS) + CEPH_FEATURE_MON_SINGLE_PAXOS | \ + CEPH_FEATURE_OSD_SNAPMAPPER) #define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index 7ebbe4bb707a..e61d489e89fd 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -2621,6 +2621,10 @@ unsigned FileStore::_do_transaction(Transaction& t, uint64_t op_seq, int trans_n // -ENOENT is normally okay // ...including on a replayed OP_RMCOLL with !stable_commits ok = true; + if (r == -ENOENT && ( + op == Transaction::OP_COLL_ADD && + i.tolerate_collection_add_enoent())) + ok = true; // Hack for upgrade from snapcolls to snapmapper if (r == -ENODATA) ok = true; diff --git a/src/os/ObjectStore.h b/src/os/ObjectStore.h index 23265041f29a..ebcf058dce22 100644 --- a/src/os/ObjectStore.h +++ b/src/os/ObjectStore.h @@ -166,12 +166,16 @@ public: int64_t pool_override; bool use_pool_override; bool replica; + bool tolerate_collection_add_enoent; list on_applied; list on_commit; list on_applied_sync; public: + void set_tolerate_collection_add_enoent() { + tolerate_collection_add_enoent = true; + } void register_on_applied(Context *c) { on_applied.push_back(c); } @@ -294,17 +298,23 @@ public: int64_t pool_override; bool use_pool_override; bool replica; + bool _tolerate_collection_add_enoent; iterator(Transaction *t) : p(t->tbl.begin()), sobject_encoding(t->sobject_encoding), pool_override(t->pool_override), use_pool_override(t->use_pool_override), - replica(t->replica) {} + replica(t->replica), + _tolerate_collection_add_enoent( + t->tolerate_collection_add_enoent) {} friend class Transaction; public: + bool tolerate_collection_add_enoent() const { + return _tolerate_collection_add_enoent; + } bool have_op() { return !p.end(); } @@ -628,31 +638,40 @@ public: // etc. Transaction() : ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0), - sobject_encoding(false), pool_override(-1), use_pool_override(false), replica(false) {} + sobject_encoding(false), pool_override(-1), use_pool_override(false), + replica(false), + tolerate_collection_add_enoent(false) {} + Transaction(bufferlist::iterator &dp) : ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0), - sobject_encoding(false), pool_override(-1), use_pool_override(false), replica(false) { + sobject_encoding(false), pool_override(-1), use_pool_override(false), + replica(false), + tolerate_collection_add_enoent(false) { decode(dp); } + Transaction(bufferlist &nbl) : ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0), - sobject_encoding(false), pool_override(-1), use_pool_override(false), replica(false) { + sobject_encoding(false), pool_override(-1), use_pool_override(false), + replica(false), + tolerate_collection_add_enoent(false) { bufferlist::iterator dp = nbl.begin(); decode(dp); } void encode(bufferlist& bl) const { - ENCODE_START(6, 5, bl); + ENCODE_START(7, 5, bl); ::encode(ops, bl); ::encode(pad_unused_bytes, bl); ::encode(largest_data_len, bl); ::encode(largest_data_off, bl); ::encode(largest_data_off_in_tbl, bl); ::encode(tbl, bl); + ::encode(tolerate_collection_add_enoent, bl); ENCODE_FINISH(bl); } void decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl); + DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl); DECODE_OLDEST(2); if (struct_v < 4) sobject_encoding = true; @@ -666,10 +685,13 @@ public: ::decode(largest_data_off_in_tbl, bl); } ::decode(tbl, bl); - DECODE_FINISH(bl); if (struct_v < 6) { use_pool_override = true; } + if (struct_v >= 7) { + ::decode(tolerate_collection_add_enoent, bl); + } + DECODE_FINISH(bl); } void dump(ceph::Formatter *f); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index fd23dd569838..f2cf321d9971 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -141,6 +141,7 @@ static CompatSet get_osd_compat_set() { ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BIGINFO); ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO); ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG); + ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER); return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat, ceph_osd_feature_incompat); } @@ -1616,6 +1617,14 @@ void OSD::load_pgs() // read pg state, log pg->read_state(store, bl); + if (pg->must_upgrade()) { + derr << "PG " << pg->info.pgid + << " must upgrade..." << dendl; + pg->upgrade(store, i->second); + } else { + assert(i->second.empty()); + } + set split_pgs; if (osdmap->have_pg_pool(pg->info.pgid.pool()) && pg->info.pgid.is_split(pg->get_osdmap()->get_pg_num(pg->info.pgid.pool()), diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 49e522b3123a..b4b298144aa1 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2352,10 +2352,149 @@ void PG::init(int role, vector& newup, vector& newacting, pg_history_t write_if_dirty(*t); } +void PG::upgrade( + ObjectStore *store, + const interval_set &snapcolls) { + unsigned removed = 0; + for (interval_set::const_iterator i = snapcolls.begin(); + i != snapcolls.end(); + ++i) { + for (snapid_t next_dir = i.get_start(); + next_dir != i.get_start() + i.get_len(); + ++next_dir) { + ++removed; + coll_t cid(info.pgid, next_dir); + dout(1) << "Removing collection " << cid + << " (" << removed << "/" << snapcolls.size() + << ")" << dendl; + + hobject_t cur; + vector objects; + while (1) { + int r = store->collection_list_partial( + cid, + cur, + store->get_ideal_list_min(), + store->get_ideal_list_max(), + 0, + &objects, + &cur); + if (r != 0) { + derr << __func__ << ": collection_list_partial returned " + << cpp_strerror(r) << dendl; + assert(0); + } + if (objects.empty()) { + assert(cur.is_max()); + break; + } + ObjectStore::Transaction t; + for (vector::iterator j = objects.begin(); + j != objects.end(); + ++j) { + t.remove(cid, *j); + } + r = store->apply_transaction(t); + if (r != 0) { + derr << __func__ << ": apply_transaction returned " + << cpp_strerror(r) << dendl; + assert(0); + } + objects.clear(); + } + ObjectStore::Transaction t; + t.remove_collection(cid); + int r = store->apply_transaction(t); + if (r != 0) { + derr << __func__ << ": apply_transaction returned " + << cpp_strerror(r) << dendl; + assert(0); + } + } + } + + hobject_t cur; + coll_t cid(info.pgid); + unsigned done = 0; + vector objects; + while (1) { + dout(1) << "Updating snap_mapper from main collection, " + << done << " objects done" << dendl; + int r = store->collection_list_partial( + cid, + cur, + store->get_ideal_list_min(), + store->get_ideal_list_max(), + 0, + &objects, + &cur); + if (r != 0) { + derr << __func__ << ": collection_list_partial returned " + << cpp_strerror(r) << dendl; + assert(0); + } + if (objects.empty()) { + assert(cur.is_max()); + break; + } + done += objects.size(); + ObjectStore::Transaction t; + for (vector::iterator j = objects.begin(); + j != objects.end(); + ++j) { + if (j->snap < CEPH_MAXSNAP) { + OSDriver::OSTransaction _t(osdriver.get_transaction(&t)); + bufferptr bp; + r = store->getattr( + cid, + *j, + OI_ATTR, + bp); + if (r < 0) { + derr << __func__ << ": getattr returned " + << cpp_strerror(r) << dendl; + assert(0); + } + bufferlist bl; + bl.push_back(bp); + object_info_t oi(bl); + set oi_snaps(oi.snaps.begin(), oi.snaps.end()); + set cur_snaps; + r = snap_mapper.get_snaps(*j, &cur_snaps); + if (r == 0) { + assert(cur_snaps == oi_snaps); + } else if (r == -ENOENT) { + snap_mapper.add_oid(*j, oi_snaps, &_t); + } else { + derr << __func__ << ": get_snaps returned " + << cpp_strerror(r) << dendl; + assert(0); + } + } + } + r = store->apply_transaction(t); + if (r != 0) { + derr << __func__ << ": apply_transaction returned " + << cpp_strerror(r) << dendl; + assert(0); + } + objects.clear(); + } + ObjectStore::Transaction t; + dirty_info = true; + write_if_dirty(t); + int r = store->apply_transaction(t); + if (r != 0) { + derr << __func__ << ": apply_transaction returned " + << cpp_strerror(r) << dendl; + assert(0); + } + assert(r == 0); +} + void PG::write_info(ObjectStore::Transaction& t) { // pg state - __u8 cur_struct_v = 6; assert(info_struct_v <= cur_struct_v); diff --git a/src/osd/PG.h b/src/osd/PG.h index fdfe25d61d86..0488a7ebaa98 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -440,6 +440,14 @@ public: // pg state pg_info_t info; __u8 info_struct_v; + static const __u8 cur_struct_v = 7; + bool must_upgrade() { + return info_struct_v < 7; + } + void upgrade( + ObjectStore *store, + const interval_set &snapcolls); + const coll_t coll; IndexedLog log; static string get_info_key(pg_t pgid) { diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 9821c4fd4085..23d71f11e5ed 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -4558,6 +4558,8 @@ void ReplicatedPG::sub_op_modify(OpRequestRef op) bufferlist::iterator p = m->get_data().begin(); ::decode(rm->opt, p); + if (!(m->get_connection()->get_features() & CEPH_FEATURE_OSD_SNAPMAPPER)) + rm->opt.set_tolerate_collection_add_enoent(); p = m->logbl.begin(); ::decode(log, p); if (m->hobject_incorrect_pool) { diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index c84000d69680..d20e842859cd 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -40,6 +40,7 @@ #define CEPH_OSD_FEATURE_INCOMPAT_BIGINFO CompatSet::Feature(7, "biginfo") #define CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO CompatSet::Feature(8, "leveldbinfo") #define CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG CompatSet::Feature(9, "leveldblog") +#define CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER CompatSet::Feature(10, "snapmapper") typedef hobject_t collection_list_handle_t;