From: Josh Durgin Date: Tue, 10 May 2016 21:04:25 +0000 (-0700) Subject: osd_types: add an error pg log entry type X-Git-Tag: ses5-milestone5~413^2~11 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2b8062ebc171d58e2d785d7b995b18aa743bf90c;p=ceph.git osd_types: add an error pg log entry type This will store write error codes for use in dup op detection. A few places use checks assuming is_update() or is_delete() are opposites - fix those to ignore or consider errors, as appropriate. Refs: http://tracker.ceph.com/issues/14468 Signed-off-by: Josh Durgin --- diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 7365a8d1a707..b26759294fd8 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -3455,7 +3455,8 @@ ReplicatedPG::OpContextUPtr ReplicatedPG::trim_object(const hobject_t &coid) ctx->obs->oi.version, 0, osd_reqid_t(), - ctx->mtime) + ctx->mtime, + 0) ); if (pool.info.require_rollback()) { set snaps( @@ -3492,7 +3493,8 @@ ReplicatedPG::OpContextUPtr ReplicatedPG::trim_object(const hobject_t &coid) coi.prior_version, 0, osd_reqid_t(), - ctx->mtime) + ctx->mtime, + 0) ); if (pool.info.require_rollback()) { set changing; @@ -3523,7 +3525,8 @@ ReplicatedPG::OpContextUPtr ReplicatedPG::trim_object(const hobject_t &coid) ctx->snapset_obc->obs.oi.version, 0, osd_reqid_t(), - ctx->mtime) + ctx->mtime, + 0) ); ctx->snapset_obc->obs.exists = false; @@ -3551,7 +3554,8 @@ ReplicatedPG::OpContextUPtr ReplicatedPG::trim_object(const hobject_t &coid) ctx->snapset_obc->obs.oi.version, 0, osd_reqid_t(), - ctx->mtime) + ctx->mtime, + 0) ); ctx->snapset_obc->obs.oi.prior_version = @@ -6395,7 +6399,7 @@ void ReplicatedPG::make_writeable(OpContext *ctx) ctx->log.push_back(pg_log_entry_t(pg_log_entry_t::CLONE, coid, ctx->at_version, ctx->obs->oi.version, ctx->obs->oi.user_version, - osd_reqid_t(), ctx->new_obs.oi.mtime)); + osd_reqid_t(), ctx->new_obs.oi.mtime, 0)); ::encode(snaps, ctx->log.back().snaps); ctx->log.back().mod_desc.create(); @@ -6657,7 +6661,7 @@ void ReplicatedPG::finish_ctx(OpContext *ctx, int log_op_type, bool maintain_ssc ctx->log.push_back(pg_log_entry_t(pg_log_entry_t::DELETE, snapoid, ctx->at_version, ctx->snapset_obc->obs.oi.version, - 0, osd_reqid_t(), ctx->mtime)); + 0, osd_reqid_t(), ctx->mtime, 0)); if (pool.info.require_rollback()) { if (ctx->log.back().mod_desc.rmobject(ctx->at_version.version)) { ctx->op_t->stash(snapoid, ctx->at_version.version); @@ -6686,7 +6690,7 @@ void ReplicatedPG::finish_ctx(OpContext *ctx, int log_op_type, bool maintain_ssc ctx->log.push_back(pg_log_entry_t(pg_log_entry_t::MODIFY, snapoid, ctx->at_version, eversion_t(), - 0, osd_reqid_t(), ctx->mtime)); + 0, osd_reqid_t(), ctx->mtime, 0)); if (!ctx->snapset_obc) ctx->snapset_obc = get_object_context(snapoid, true); @@ -6795,7 +6799,7 @@ void ReplicatedPG::finish_ctx(OpContext *ctx, int log_op_type, bool maintain_ssc ctx->log.push_back(pg_log_entry_t(log_op_type, soid, ctx->at_version, ctx->obs->oi.version, ctx->user_at_version, ctx->reqid, - ctx->mtime)); + ctx->mtime, 0)); if (soid.snap < CEPH_NOSNAP) { switch (log_op_type) { case pg_log_entry_t::MODIFY: @@ -8862,7 +8866,7 @@ void ReplicatedPG::handle_watch_timeout(WatchRef watch) ctx->at_version, oi.version, 0, - osd_reqid_t(), ctx->mtime)); + osd_reqid_t(), ctx->mtime, 0)); oi.prior_version = obc->obs.oi.version; oi.version = ctx->at_version; @@ -9762,7 +9766,7 @@ void ReplicatedPG::mark_all_unfound_lost( ++v.version; pg_log_entry_t e( pg_log_entry_t::LOST_REVERT, oid, v, - m->second.need, 0, osd_reqid_t(), mtime); + m->second.need, 0, osd_reqid_t(), mtime, 0); e.reverting_to = prev; e.mod_desc.mark_unrollbackable(); log_entries.push_back(e); @@ -9777,7 +9781,7 @@ void ReplicatedPG::mark_all_unfound_lost( { ++v.version; pg_log_entry_t e(pg_log_entry_t::LOST_DELETE, oid, v, m->second.need, - 0, osd_reqid_t(), mtime); + 0, osd_reqid_t(), mtime, 0); if (get_osdmap()->test_flag(CEPH_OSDMAP_REQUIRE_JEWEL)) { if (pool.info.require_rollback()) { e.mod_desc.try_rmobject(v.version); @@ -11655,7 +11659,8 @@ void ReplicatedPG::hit_set_persist() eversion_t(), 0, osd_reqid_t(), - ctx->mtime) + ctx->mtime, + 0) ); if (pool.info.require_rollback()) { ctx->log.back().mod_desc.create(); @@ -11690,7 +11695,8 @@ void ReplicatedPG::hit_set_trim(OpContextUPtr &ctx, unsigned max) p->version, 0, osd_reqid_t(), - ctx->mtime)); + ctx->mtime, + 0)); if (pool.info.require_rollback()) { if (ctx->log.back().mod_desc.rmobject( ctx->at_version.version)) { diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 5805fbddc32d..02ce45b289a4 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -3433,7 +3433,7 @@ void pg_log_entry_t::decode_with_checksum(bufferlist::iterator& p) void pg_log_entry_t::encode(bufferlist &bl) const { - ENCODE_START(10, 4, bl); + ENCODE_START(11, 4, bl); ::encode(op, bl); ::encode(soid, bl); ::encode(version, bl); @@ -3458,12 +3458,14 @@ void pg_log_entry_t::encode(bufferlist &bl) const ::encode(user_version, bl); ::encode(mod_desc, bl); ::encode(extra_reqids, bl); + if (op == ERROR) + ::encode(return_code, bl); ENCODE_FINISH(bl); } void pg_log_entry_t::decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(10, 4, 4, bl); + DECODE_START_LEGACY_COMPAT_LEN(11, 4, 4, bl); ::decode(op, bl); if (struct_v < 2) { sobject_t old_soid; @@ -3512,7 +3514,8 @@ void pg_log_entry_t::decode(bufferlist::iterator &bl) mod_desc.mark_unrollbackable(); if (struct_v >= 10) ::decode(extra_reqids, bl); - + if (struct_v >= 11 && op == ERROR) + ::decode(return_code, bl); DECODE_FINISH(bl); } @@ -3535,6 +3538,7 @@ void pg_log_entry_t::dump(Formatter *f) const } f->close_section(); f->dump_stream("mtime") << mtime; + f->dump_int("return_code", return_code); if (snaps.length() > 0) { vector v; bufferlist c = snaps; @@ -3562,13 +3566,17 @@ void pg_log_entry_t::generate_test_instances(list& o) hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); o.push_back(new pg_log_entry_t(MODIFY, oid, eversion_t(1,2), eversion_t(3,4), 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999), - utime_t(8,9))); + utime_t(8,9), 0)); + o.push_back(new pg_log_entry_t(ERROR, oid, eversion_t(1,2), eversion_t(3,4), + 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999), + utime_t(8,9), -ENOENT)); } ostream& operator<<(ostream& out, const pg_log_entry_t& e) { out << e.version << " (" << e.prior_version << ") " - << e.get_op_name() << ' ' << e.soid << " by " << e.reqid << " " << e.mtime; + << e.get_op_name() << ' ' << e.soid << " by " << e.reqid << " " << e.mtime + << " " << e.return_code; if (e.snaps.length()) { vector snaps; bufferlist c = e.snaps; diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 04e040d43fb1..67eb6840fecc 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -2539,6 +2539,7 @@ struct pg_log_entry_t { LOST_MARK = 7, // lost new version, now EIO PROMOTE = 8, // promoted object from another tier CLEAN = 9, // mark an object clean + ERROR = 10, // write that returned an error }; static const char *get_op_name(int op) { switch (op) { @@ -2560,6 +2561,8 @@ struct pg_log_entry_t { return "l_mark "; case CLEAN: return "clean "; + case ERROR: + return "error "; default: return "unknown "; } @@ -2577,20 +2580,23 @@ struct pg_log_entry_t { eversion_t version, prior_version, reverting_to; version_t user_version; // the user version for this entry utime_t mtime; // this is the _user_ mtime, mind you + int32_t return_code; // only stored for ERRORs for dup detection __s32 op; bool invalid_hash; // only when decoding sobject_t based entries bool invalid_pool; // only when decoding pool-less hobject based entries pg_log_entry_t() - : user_version(0), op(0), + : user_version(0), return_code(0), op(0), invalid_hash(false), invalid_pool(false) {} pg_log_entry_t(int _op, const hobject_t& _soid, const eversion_t& v, const eversion_t& pv, version_t uv, - const osd_reqid_t& rid, const utime_t& mt) + const osd_reqid_t& rid, const utime_t& mt, + int return_code) : soid(_soid), reqid(rid), version(v), prior_version(pv), user_version(uv), - mtime(mt), op(_op), invalid_hash(false), invalid_pool(false) + mtime(mt), return_code(return_code), op(_op), + invalid_hash(false), invalid_pool(false) {} bool is_clone() const { return op == CLONE; } @@ -2601,6 +2607,7 @@ struct pg_log_entry_t { bool is_lost_revert() const { return op == LOST_REVERT; } bool is_lost_delete() const { return op == LOST_DELETE; } bool is_lost_mark() const { return op == LOST_MARK; } + bool is_error() const { return op == ERROR; } bool is_update() const { return @@ -2612,7 +2619,8 @@ struct pg_log_entry_t { } bool reqid_is_indexed() const { - return reqid != osd_reqid_t() && (op == MODIFY || op == DELETE); + return reqid != osd_reqid_t() && + (op == MODIFY || op == DELETE || op == ERROR); } string get_key_name() const; diff --git a/src/test/osd/TestPGLog.cc b/src/test/osd/TestPGLog.cc index 34d13133a0c7..cc0738639c25 100644 --- a/src/test/osd/TestPGLog.cc +++ b/src/test/osd/TestPGLog.cc @@ -2047,6 +2047,46 @@ TEST_F(PGLogTest, filter_log_1) { } } +TEST_F(PGLogTest, get_request) { + clear(); + + // make sure writes, deletes, and errors are found + vector entries; + hobject_t oid(object_t("objname"), "key", 123, 456, 0, ""); + entries.push_back( + pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(6,2), eversion_t(3,4), + 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 1), + utime_t(0,1), -ENOENT)); + entries.push_back( + pg_log_entry_t(pg_log_entry_t::MODIFY, oid, eversion_t(6,3), eversion_t(3,4), + 2, osd_reqid_t(entity_name_t::CLIENT(777), 8, 2), + utime_t(1,2), 0)); + entries.push_back( + pg_log_entry_t(pg_log_entry_t::DELETE, oid, eversion_t(7,4), eversion_t(7,4), + 3, osd_reqid_t(entity_name_t::CLIENT(777), 8, 3), + utime_t(10,2), 0)); + entries.push_back( + pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(7,5), eversion_t(7,4), + 3, osd_reqid_t(entity_name_t::CLIENT(777), 8, 4), + utime_t(20,1), -ENOENT)); + + for (auto &entry : entries) { + log.add(entry); + } + + for (auto &entry : entries) { + eversion_t replay_version; + version_t user_version; + int return_code = 0; + bool got = log.get_request( + entry.reqid, &replay_version, &user_version, &return_code); + EXPECT_TRUE(got); + EXPECT_EQ(entry.return_code, return_code); + EXPECT_EQ(entry.version, replay_version); + EXPECT_EQ(entry.user_version, user_version); + } +} + int main(int argc, char **argv) { vector args; argv_to_vec(argc, (const char **)argv, args); diff --git a/src/test/osd/types.cc b/src/test/osd/types.cc index 907d30681ed4..be39e57c8c60 100644 --- a/src/test/osd/types.cc +++ b/src/test/osd/types.cc @@ -809,7 +809,7 @@ TEST(pg_missing_t, add_next_event) eversion_t prior_version(3,4); pg_log_entry_t sample_e(pg_log_entry_t::DELETE, oid, version, prior_version, 0, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999), - utime_t(8,9)); + utime_t(8,9), 0); // new object (MODIFY) { @@ -926,6 +926,20 @@ TEST(pg_missing_t, add_next_event) missing.add_next_event(e); EXPECT_FALSE(missing.have_missing()); } + + // ERROR op should only be used for dup detection + { + pg_missing_t missing; + pg_log_entry_t e = sample_e; + + e.op = pg_log_entry_t::ERROR; + e.return_code = -ENOENT; + EXPECT_FALSE(e.is_update()); + EXPECT_FALSE(missing.is_missing(oid)); + missing.add_next_event(e); + EXPECT_FALSE(missing.is_missing(oid)); + EXPECT_TRUE(e.reqid_is_indexed()); + } } TEST(pg_missing_t, revise_need)