From 47ba928ba928f4f4ae0202b7a509748a6bb06c95 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 14 May 2010 14:18:21 -0700 Subject: [PATCH] osd: include snapdir objects in pg log for proper replication, recovery --- src/osd/OSD.cc | 10 +++++++-- src/osd/PG.h | 2 +- src/osd/ReplicatedPG.cc | 46 ++++++++++++++++++++++++++++++++++------- src/osd/ReplicatedPG.h | 3 ++- 4 files changed, 49 insertions(+), 12 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index fbe64ef2de122..2f222a3a9a711 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4207,8 +4207,14 @@ void OSD::handle_op(MOSDOp *op) // snap read. hrm. // are we missing a revision that we might need? // let's get them all. - for (unsigned i=0; iget_snaps().size(); i++) { - sobject_t soid(op->get_oid(), op->get_snaps()[i]); + sobject_t soid(op->get_oid(), CEPH_NOSNAP); + for (int i=-2; i<(int)op->get_snaps().size(); i++) { + if (i >= 0) + soid.snap = op->get_snaps()[i]; + else if (i == -1) + soid.snap = CEPH_NOSNAP; + else + soid.snap = CEPH_SNAPDIR; if (pg->is_missing_object(soid)) { dout(10) << "handle_op _may_ need missing rev " << soid << ", pulling" << dendl; pg->wait_for_missing_object(soid, op); diff --git a/src/osd/PG.h b/src/osd/PG.h index 29fe0be3bf94f..f9a953aa8d39a 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -255,7 +255,7 @@ public: bool is_backlog() const { return op == BACKLOG; } bool is_update() const { return is_clone() || is_modify() || is_backlog(); } - bool reqid_is_indexed() const { return op != BACKLOG && op != CLONE; } + bool reqid_is_indexed() const { return op != BACKLOG && op != CLONE && soid.snap != CEPH_SNAPDIR; } void encode(bufferlist &bl) const { __u8 struct_v = 1; diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 5af6072566403..11a231584d9a8 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1770,6 +1770,13 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx) ::encode(ctx->obs->ssc->snapset, bss); assert(ctx->obs->exists == ctx->obs->ssc->snapset.head_exists); + // append to log + int logopcode = Log::Entry::MODIFY; + if (!ctx->obs->exists) + logopcode = Log::Entry::DELETE; + ctx->log.push_back(Log::Entry(logopcode, soid, ctx->at_version, old_version, + ctx->reqid, ctx->mtime)); + if (ctx->obs->exists) { poi->version = ctx->at_version; poi->prior_version = old_version; @@ -1791,24 +1798,43 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx) if (!head_existed) { // if we logically recreated the head, remove old _snapdir object sobject_t snapoid(soid.oid, CEPH_SNAPDIR); - ctx->op_t.remove(coll_t::build_pg_coll(info.pgid), snapoid); - dout(10) << " removing old " << snapoid << dendl; + + ctx->snapset_obc = get_object_context(snapoid, false); + if (ctx->snapset_obc && ctx->snapset_obc->obs.exists) { + ctx->op_t.remove(coll_t::build_pg_coll(info.pgid), snapoid); + dout(10) << " removing old " << snapoid << dendl; + + ctx->at_version.version++; + ctx->log.push_back(Log::Entry(Log::Entry::DELETE, snapoid, ctx->at_version, old_version, + ctx->reqid, ctx->mtime)); + + ctx->snapset_obc->obs.exists = false; + register_object_context(ctx->snapset_obc); + } } } else if (ctx->obs->ssc->snapset.clones.size()) { // save snapset on _snap sobject_t snapoid(soid.oid, CEPH_SNAPDIR); dout(10) << " final snapset " << ctx->obs->ssc->snapset << " in " << snapoid << dendl; + ctx->at_version.version++; + ctx->log.push_back(Log::Entry(Log::Entry::MODIFY, snapoid, ctx->at_version, old_version, + ctx->reqid, ctx->mtime)); + + ctx->snapset_obc = get_object_context(snapoid, true); + ctx->snapset_obc->obs.exists = true; + ctx->snapset_obc->obs.oi.version = ctx->at_version; + ctx->snapset_obc->obs.oi.last_reqid = ctx->reqid; + ctx->snapset_obc->obs.oi.mtime = ctx->mtime; + register_object_context(ctx->snapset_obc); + + bufferlist bv(sizeof(*poi)); + ::encode(ctx->snapset_obc->obs.oi, bv); ctx->op_t.touch(coll_t::build_pg_coll(info.pgid), snapoid); + ctx->op_t.setattr(coll_t::build_pg_coll(info.pgid), snapoid, OI_ATTR, bv); ctx->op_t.setattr(coll_t::build_pg_coll(info.pgid), snapoid, SS_ATTR, bss); } - // append to log - int logopcode = Log::Entry::MODIFY; - if (!ctx->obs->exists) - logopcode = Log::Entry::DELETE; - ctx->log.push_back(Log::Entry(logopcode, soid, ctx->at_version, old_version, - ctx->reqid, ctx->mtime)); return result; } @@ -1915,6 +1941,10 @@ void ReplicatedPG::op_applied(RepGather *repop) put_object_context(repop->ctx->clone_obc); repop->ctx->clone_obc = 0; } + if (repop->ctx->snapset_obc) { + put_object_context(repop->ctx->snapset_obc); + repop->ctx->snapset_obc = 0; + } dout(10) << "op_applied mode was " << mode << dendl; mode.write_applied(); diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index df7479d437d32..df8935d32c958 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -295,6 +295,7 @@ public: vector log; ObjectContext *clone_obc; // if we created a clone + ObjectContext *snapset_obc; // if we created/deleted a snapdir int data_off; // FIXME: we may want to kill this msgr hint off at some point! @@ -303,7 +304,7 @@ public: OpContext(Message *_op, osd_reqid_t _reqid, vector& _ops, bufferlist& _data, ObjectState *_obs, ReplicatedPG *_pg) : op(_op), reqid(_reqid), ops(_ops), indata(_data), obs(_obs), - clone_obc(0), data_off(0), pg(_pg) {} + clone_obc(0), snapset_obc(0), data_off(0), pg(_pg) {} ~OpContext() { assert(!clone_obc); } -- 2.39.5