#define CEPH_FEATURE_MDSENC (1<<29)
#define CEPH_FEATURE_OSDHASHPSPOOL (1<<30)
#define CEPH_FEATURE_MON_SINGLE_PAXOS (1<<31)
+#define CEPH_FEATURE_OSD_SNAPMAPPER (1L<<32)
/*
* Features supported. Should be everything above.
CEPH_FEATURE_OSD_HBMSGS | \
CEPH_FEATURE_MDSENC | \
CEPH_FEATURE_OSDHASHPSPOOL | \
- CEPH_FEATURE_MON_SINGLE_PAXOS)
+ CEPH_FEATURE_MON_SINGLE_PAXOS | \
+ CEPH_FEATURE_OSD_SNAPMAPPER)
#define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL
// -ENOENT is normally okay
// ...including on a replayed OP_RMCOLL with !stable_commits
ok = true;
+ if (r == -ENOENT && (
+ op == Transaction::OP_COLL_ADD &&
+ i.tolerate_collection_add_enoent()))
+ ok = true; // Hack for upgrade from snapcolls to snapmapper
if (r == -ENODATA)
ok = true;
int64_t pool_override;
bool use_pool_override;
bool replica;
+ bool tolerate_collection_add_enoent;
list<Context *> on_applied;
list<Context *> on_commit;
list<Context *> on_applied_sync;
public:
+ void set_tolerate_collection_add_enoent() {
+ tolerate_collection_add_enoent = true;
+ }
void register_on_applied(Context *c) {
on_applied.push_back(c);
}
int64_t pool_override;
bool use_pool_override;
bool replica;
+ bool _tolerate_collection_add_enoent;
iterator(Transaction *t)
: p(t->tbl.begin()),
sobject_encoding(t->sobject_encoding),
pool_override(t->pool_override),
use_pool_override(t->use_pool_override),
- replica(t->replica) {}
+ replica(t->replica),
+ _tolerate_collection_add_enoent(
+ t->tolerate_collection_add_enoent) {}
friend class Transaction;
public:
+ bool tolerate_collection_add_enoent() const {
+ return _tolerate_collection_add_enoent;
+ }
bool have_op() {
return !p.end();
}
// etc.
Transaction() :
ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0),
- sobject_encoding(false), pool_override(-1), use_pool_override(false), replica(false) {}
+ sobject_encoding(false), pool_override(-1), use_pool_override(false),
+ replica(false),
+ tolerate_collection_add_enoent(false) {}
+
Transaction(bufferlist::iterator &dp) :
ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0),
- sobject_encoding(false), pool_override(-1), use_pool_override(false), replica(false) {
+ sobject_encoding(false), pool_override(-1), use_pool_override(false),
+ replica(false),
+ tolerate_collection_add_enoent(false) {
decode(dp);
}
+
Transaction(bufferlist &nbl) :
ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0),
- sobject_encoding(false), pool_override(-1), use_pool_override(false), replica(false) {
+ sobject_encoding(false), pool_override(-1), use_pool_override(false),
+ replica(false),
+ tolerate_collection_add_enoent(false) {
bufferlist::iterator dp = nbl.begin();
decode(dp);
}
void encode(bufferlist& bl) const {
- ENCODE_START(6, 5, bl);
+ ENCODE_START(7, 5, bl);
::encode(ops, bl);
::encode(pad_unused_bytes, bl);
::encode(largest_data_len, bl);
::encode(largest_data_off, bl);
::encode(largest_data_off_in_tbl, bl);
::encode(tbl, bl);
+ ::encode(tolerate_collection_add_enoent, bl);
ENCODE_FINISH(bl);
}
void decode(bufferlist::iterator &bl) {
- DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl);
+ DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl);
DECODE_OLDEST(2);
if (struct_v < 4)
sobject_encoding = true;
::decode(largest_data_off_in_tbl, bl);
}
::decode(tbl, bl);
- DECODE_FINISH(bl);
if (struct_v < 6) {
use_pool_override = true;
}
+ if (struct_v >= 7) {
+ ::decode(tolerate_collection_add_enoent, bl);
+ }
+ DECODE_FINISH(bl);
}
void dump(ceph::Formatter *f);
ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_BIGINFO);
ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO);
ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG);
+ ceph_osd_feature_incompat.insert(CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER);
return CompatSet(ceph_osd_feature_compat, ceph_osd_feature_ro_compat,
ceph_osd_feature_incompat);
}
// read pg state, log
pg->read_state(store, bl);
+ if (pg->must_upgrade()) {
+ derr << "PG " << pg->info.pgid
+ << " must upgrade..." << dendl;
+ pg->upgrade(store, i->second);
+ } else {
+ assert(i->second.empty());
+ }
+
set<pg_t> split_pgs;
if (osdmap->have_pg_pool(pg->info.pgid.pool()) &&
pg->info.pgid.is_split(pg->get_osdmap()->get_pg_num(pg->info.pgid.pool()),
write_if_dirty(*t);
}
+void PG::upgrade(
+ ObjectStore *store,
+ const interval_set<snapid_t> &snapcolls) {
+ unsigned removed = 0;
+ for (interval_set<snapid_t>::const_iterator i = snapcolls.begin();
+ i != snapcolls.end();
+ ++i) {
+ for (snapid_t next_dir = i.get_start();
+ next_dir != i.get_start() + i.get_len();
+ ++next_dir) {
+ ++removed;
+ coll_t cid(info.pgid, next_dir);
+ dout(1) << "Removing collection " << cid
+ << " (" << removed << "/" << snapcolls.size()
+ << ")" << dendl;
+
+ hobject_t cur;
+ vector<hobject_t> objects;
+ while (1) {
+ int r = store->collection_list_partial(
+ cid,
+ cur,
+ store->get_ideal_list_min(),
+ store->get_ideal_list_max(),
+ 0,
+ &objects,
+ &cur);
+ if (r != 0) {
+ derr << __func__ << ": collection_list_partial returned "
+ << cpp_strerror(r) << dendl;
+ assert(0);
+ }
+ if (objects.empty()) {
+ assert(cur.is_max());
+ break;
+ }
+ ObjectStore::Transaction t;
+ for (vector<hobject_t>::iterator j = objects.begin();
+ j != objects.end();
+ ++j) {
+ t.remove(cid, *j);
+ }
+ r = store->apply_transaction(t);
+ if (r != 0) {
+ derr << __func__ << ": apply_transaction returned "
+ << cpp_strerror(r) << dendl;
+ assert(0);
+ }
+ objects.clear();
+ }
+ ObjectStore::Transaction t;
+ t.remove_collection(cid);
+ int r = store->apply_transaction(t);
+ if (r != 0) {
+ derr << __func__ << ": apply_transaction returned "
+ << cpp_strerror(r) << dendl;
+ assert(0);
+ }
+ }
+ }
+
+ hobject_t cur;
+ coll_t cid(info.pgid);
+ unsigned done = 0;
+ vector<hobject_t> objects;
+ while (1) {
+ dout(1) << "Updating snap_mapper from main collection, "
+ << done << " objects done" << dendl;
+ int r = store->collection_list_partial(
+ cid,
+ cur,
+ store->get_ideal_list_min(),
+ store->get_ideal_list_max(),
+ 0,
+ &objects,
+ &cur);
+ if (r != 0) {
+ derr << __func__ << ": collection_list_partial returned "
+ << cpp_strerror(r) << dendl;
+ assert(0);
+ }
+ if (objects.empty()) {
+ assert(cur.is_max());
+ break;
+ }
+ done += objects.size();
+ ObjectStore::Transaction t;
+ for (vector<hobject_t>::iterator j = objects.begin();
+ j != objects.end();
+ ++j) {
+ if (j->snap < CEPH_MAXSNAP) {
+ OSDriver::OSTransaction _t(osdriver.get_transaction(&t));
+ bufferptr bp;
+ r = store->getattr(
+ cid,
+ *j,
+ OI_ATTR,
+ bp);
+ if (r < 0) {
+ derr << __func__ << ": getattr returned "
+ << cpp_strerror(r) << dendl;
+ assert(0);
+ }
+ bufferlist bl;
+ bl.push_back(bp);
+ object_info_t oi(bl);
+ set<snapid_t> oi_snaps(oi.snaps.begin(), oi.snaps.end());
+ set<snapid_t> cur_snaps;
+ r = snap_mapper.get_snaps(*j, &cur_snaps);
+ if (r == 0) {
+ assert(cur_snaps == oi_snaps);
+ } else if (r == -ENOENT) {
+ snap_mapper.add_oid(*j, oi_snaps, &_t);
+ } else {
+ derr << __func__ << ": get_snaps returned "
+ << cpp_strerror(r) << dendl;
+ assert(0);
+ }
+ }
+ }
+ r = store->apply_transaction(t);
+ if (r != 0) {
+ derr << __func__ << ": apply_transaction returned "
+ << cpp_strerror(r) << dendl;
+ assert(0);
+ }
+ objects.clear();
+ }
+ ObjectStore::Transaction t;
+ dirty_info = true;
+ write_if_dirty(t);
+ int r = store->apply_transaction(t);
+ if (r != 0) {
+ derr << __func__ << ": apply_transaction returned "
+ << cpp_strerror(r) << dendl;
+ assert(0);
+ }
+ assert(r == 0);
+}
+
void PG::write_info(ObjectStore::Transaction& t)
{
// pg state
- __u8 cur_struct_v = 6;
assert(info_struct_v <= cur_struct_v);
// pg state
pg_info_t info;
__u8 info_struct_v;
+ static const __u8 cur_struct_v = 7;
+ bool must_upgrade() {
+ return info_struct_v < 7;
+ }
+ void upgrade(
+ ObjectStore *store,
+ const interval_set<snapid_t> &snapcolls);
+
const coll_t coll;
IndexedLog log;
static string get_info_key(pg_t pgid) {
bufferlist::iterator p = m->get_data().begin();
::decode(rm->opt, p);
+ if (!(m->get_connection()->get_features() & CEPH_FEATURE_OSD_SNAPMAPPER))
+ rm->opt.set_tolerate_collection_add_enoent();
p = m->logbl.begin();
::decode(log, p);
if (m->hobject_incorrect_pool) {
#define CEPH_OSD_FEATURE_INCOMPAT_BIGINFO CompatSet::Feature(7, "biginfo")
#define CEPH_OSD_FEATURE_INCOMPAT_LEVELDBINFO CompatSet::Feature(8, "leveldbinfo")
#define CEPH_OSD_FEATURE_INCOMPAT_LEVELDBLOG CompatSet::Feature(9, "leveldblog")
+#define CEPH_OSD_FEATURE_INCOMPAT_SNAPMAPPER CompatSet::Feature(10, "snapmapper")
typedef hobject_t collection_list_handle_t;