From: Sage Weil Date: Tue, 4 Sep 2012 22:25:20 +0000 (-0700) Subject: osd: fill in user log entry last after snapdir tran X-Git-Tag: v0.53~174 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=03136d057f0048e9cd840a6e83efedfc20969247;p=ceph.git osd: fill in user log entry last after snapdir tran Reorder the snapdir logic and ctx->at_version adjustments prior to filling in the object_info_t and user_versions and all that stuff. Adjust at_version after appending the log entry (so that it points to the next position/version we will write at.. culminating in the actual user event). The user log entry contains the request id, which will be used by replay ops to put themselves in the correct place in the waiting_for_commit/ack maps. Thus, the repop needs to be tagged with the same version as the log entry with the request id. Thus, the request id bearing log entry should be the last in the log entry vector. This should fix #3072, wherein a replay which should wait on the repop tagged as version '36 will instead wait on '35. Signed-off-by: Sage Weil Reviewed-by: Samuel Just --- diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index e3326cae7ea8..f35309090284 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -3309,50 +3309,15 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx) } - // there was a modification! + // clone, if necessary make_writeable(ctx); - if (ctx->user_modify) { - /* update the user_version for any modify ops, except for the watch op */ - ctx->new_obs.oi.user_version = ctx->at_version; - } - - ctx->reply_version = ctx->new_obs.oi.user_version; - - ctx->bytes_written = ctx->op_t.get_encoded_bytes(); - - // finish and log the op. - ctx->new_obs.oi.version = ctx->at_version; - + // snapset bufferlist bss; ::encode(ctx->new_snapset, bss); assert(ctx->new_obs.exists == ctx->new_snapset.head_exists); - // append to log - int logopcode = pg_log_entry_t::MODIFY; - if (!ctx->new_obs.exists) - logopcode = pg_log_entry_t::DELETE; - ctx->log.push_back(pg_log_entry_t(logopcode, soid, ctx->at_version, old_version, - ctx->reqid, ctx->mtime)); - if (ctx->new_obs.exists) { - ctx->new_obs.oi.version = ctx->at_version; - ctx->new_obs.oi.prior_version = old_version; - ctx->new_obs.oi.last_reqid = ctx->reqid; - if (ctx->mtime != utime_t()) { - ctx->new_obs.oi.mtime = ctx->mtime; - dout(10) << " set mtime to " << ctx->new_obs.oi.mtime << dendl; - } else { - dout(10) << " mtime unchanged at " << ctx->new_obs.oi.mtime << dendl; - } - - bufferlist bv(sizeof(ctx->new_obs.oi)); - ::encode(ctx->new_obs.oi, bv); - ctx->op_t.setattr(coll, soid, OI_ATTR, bv); - - dout(10) << " final snapset " << ctx->new_snapset - << " in " << soid << dendl; - ctx->op_t.setattr(coll, soid, SS_ATTR, bss); if (!head_existed) { // if we logically recreated the head, remove old _snapdir object hobject_t snapoid(soid.oid, soid.get_key(), CEPH_SNAPDIR, soid.hash, @@ -3363,9 +3328,9 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx) ctx->op_t.remove(coll, snapoid); dout(10) << " removing old " << snapoid << dendl; - ctx->at_version.version++; ctx->log.push_back(pg_log_entry_t(pg_log_entry_t::DELETE, snapoid, ctx->at_version, old_version, osd_reqid_t(), ctx->mtime)); + ctx->at_version.version++; ctx->snapset_obc->obs.exists = false; assert(ctx->snapset_obc->registered); @@ -3377,7 +3342,6 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx) info.pgid.pool()); dout(10) << " final snapset " << ctx->new_snapset << " in " << snapoid << dendl; - ctx->at_version.version++; ctx->log.push_back(pg_log_entry_t(pg_log_entry_t::MODIFY, snapoid, ctx->at_version, old_version, osd_reqid_t(), ctx->mtime)); @@ -3393,7 +3357,45 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx) ctx->op_t.touch(coll, snapoid); ctx->op_t.setattr(coll, snapoid, OI_ATTR, bv); ctx->op_t.setattr(coll, snapoid, SS_ATTR, bss); + ctx->at_version.version++; + } + + // finish and log the op. + if (ctx->user_modify) { + /* update the user_version for any modify ops, except for the watch op */ + ctx->new_obs.oi.user_version = ctx->at_version; } + ctx->reply_version = ctx->new_obs.oi.user_version; + ctx->bytes_written = ctx->op_t.get_encoded_bytes(); + ctx->new_obs.oi.version = ctx->at_version; + + if (ctx->new_obs.exists) { + // on the head object + ctx->new_obs.oi.version = ctx->at_version; + ctx->new_obs.oi.prior_version = old_version; + ctx->new_obs.oi.last_reqid = ctx->reqid; + if (ctx->mtime != utime_t()) { + ctx->new_obs.oi.mtime = ctx->mtime; + dout(10) << " set mtime to " << ctx->new_obs.oi.mtime << dendl; + } else { + dout(10) << " mtime unchanged at " << ctx->new_obs.oi.mtime << dendl; + } + + bufferlist bv(sizeof(ctx->new_obs.oi)); + ::encode(ctx->new_obs.oi, bv); + ctx->op_t.setattr(coll, soid, OI_ATTR, bv); + + dout(10) << " final snapset " << ctx->new_snapset + << " in " << soid << dendl; + ctx->op_t.setattr(coll, soid, SS_ATTR, bss); + } + + // append to log + int logopcode = pg_log_entry_t::MODIFY; + if (!ctx->new_obs.exists) + logopcode = pg_log_entry_t::DELETE; + ctx->log.push_back(pg_log_entry_t(logopcode, soid, ctx->at_version, old_version, + ctx->reqid, ctx->mtime)); // apply new object state. ctx->obc->obs = ctx->new_obs;