]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd_types: add an error pg log entry type
authorJosh Durgin <jdurgin@redhat.com>
Tue, 10 May 2016 21:04:25 +0000 (14:04 -0700)
committerJosh Durgin <jdurgin@redhat.com>
Sat, 9 Jul 2016 01:33:13 +0000 (18:33 -0700)
This will store write error codes for use in dup op detection.
A few places use checks assuming is_update() or is_delete() are
opposites - fix those to ignore or consider errors, as appropriate.

Refs: http://tracker.ceph.com/issues/14468
Signed-off-by: Josh Durgin <jdurgin@redhat.com>
src/osd/ReplicatedPG.cc
src/osd/osd_types.cc
src/osd/osd_types.h
src/test/osd/TestPGLog.cc
src/test/osd/types.cc

index 7365a8d1a70744f6b9b7690079f21367f5006aa5..b26759294fd8cb2e9f82e9b4a7b7087389ae01d9 100644 (file)
@@ -3455,7 +3455,8 @@ ReplicatedPG::OpContextUPtr ReplicatedPG::trim_object(const hobject_t &coid)
        ctx->obs->oi.version,
        0,
        osd_reqid_t(),
-       ctx->mtime)
+       ctx->mtime,
+       0)
       );
     if (pool.info.require_rollback()) {
       set<snapid_t> snaps(
@@ -3492,7 +3493,8 @@ ReplicatedPG::OpContextUPtr ReplicatedPG::trim_object(const hobject_t &coid)
        coi.prior_version,
        0,
        osd_reqid_t(),
-       ctx->mtime)
+       ctx->mtime,
+       0)
       );
     if (pool.info.require_rollback()) {
       set<string> changing;
@@ -3523,7 +3525,8 @@ ReplicatedPG::OpContextUPtr ReplicatedPG::trim_object(const hobject_t &coid)
        ctx->snapset_obc->obs.oi.version,
        0,
        osd_reqid_t(),
-       ctx->mtime)
+       ctx->mtime,
+       0)
       );
 
     ctx->snapset_obc->obs.exists = false;
@@ -3551,7 +3554,8 @@ ReplicatedPG::OpContextUPtr ReplicatedPG::trim_object(const hobject_t &coid)
        ctx->snapset_obc->obs.oi.version,
        0,
        osd_reqid_t(),
-       ctx->mtime)
+       ctx->mtime,
+       0)
       );
 
     ctx->snapset_obc->obs.oi.prior_version =
@@ -6395,7 +6399,7 @@ void ReplicatedPG::make_writeable(OpContext *ctx)
     ctx->log.push_back(pg_log_entry_t(pg_log_entry_t::CLONE, coid, ctx->at_version,
                                      ctx->obs->oi.version,
                                      ctx->obs->oi.user_version,
-                                     osd_reqid_t(), ctx->new_obs.oi.mtime));
+                                     osd_reqid_t(), ctx->new_obs.oi.mtime, 0));
     ::encode(snaps, ctx->log.back().snaps);
     ctx->log.back().mod_desc.create();
 
@@ -6657,7 +6661,7 @@ void ReplicatedPG::finish_ctx(OpContext *ctx, int log_op_type, bool maintain_ssc
          ctx->log.push_back(pg_log_entry_t(pg_log_entry_t::DELETE, snapoid,
              ctx->at_version,
              ctx->snapset_obc->obs.oi.version,
-             0, osd_reqid_t(), ctx->mtime));
+             0, osd_reqid_t(), ctx->mtime, 0));
          if (pool.info.require_rollback()) {
            if (ctx->log.back().mod_desc.rmobject(ctx->at_version.version)) {
              ctx->op_t->stash(snapoid, ctx->at_version.version);
@@ -6686,7 +6690,7 @@ void ReplicatedPG::finish_ctx(OpContext *ctx, int log_op_type, bool maintain_ssc
       ctx->log.push_back(pg_log_entry_t(pg_log_entry_t::MODIFY, snapoid,
                                        ctx->at_version,
                                        eversion_t(),
-                                       0, osd_reqid_t(), ctx->mtime));
+                                       0, osd_reqid_t(), ctx->mtime, 0));
 
       if (!ctx->snapset_obc)
        ctx->snapset_obc = get_object_context(snapoid, true);
@@ -6795,7 +6799,7 @@ void ReplicatedPG::finish_ctx(OpContext *ctx, int log_op_type, bool maintain_ssc
   ctx->log.push_back(pg_log_entry_t(log_op_type, soid, ctx->at_version,
                                    ctx->obs->oi.version,
                                    ctx->user_at_version, ctx->reqid,
-                                   ctx->mtime));
+                                   ctx->mtime, 0));
   if (soid.snap < CEPH_NOSNAP) {
     switch (log_op_type) {
     case pg_log_entry_t::MODIFY:
@@ -8862,7 +8866,7 @@ void ReplicatedPG::handle_watch_timeout(WatchRef watch)
                                    ctx->at_version,
                                    oi.version,
                                    0,
-                                   osd_reqid_t(), ctx->mtime));
+                                   osd_reqid_t(), ctx->mtime, 0));
 
   oi.prior_version = obc->obs.oi.version;
   oi.version = ctx->at_version;
@@ -9762,7 +9766,7 @@ void ReplicatedPG::mark_all_unfound_lost(
        ++v.version;
        pg_log_entry_t e(
          pg_log_entry_t::LOST_REVERT, oid, v,
-         m->second.need, 0, osd_reqid_t(), mtime);
+         m->second.need, 0, osd_reqid_t(), mtime, 0);
        e.reverting_to = prev;
        e.mod_desc.mark_unrollbackable();
        log_entries.push_back(e);
@@ -9777,7 +9781,7 @@ void ReplicatedPG::mark_all_unfound_lost(
       {
        ++v.version;
        pg_log_entry_t e(pg_log_entry_t::LOST_DELETE, oid, v, m->second.need,
-                    0, osd_reqid_t(), mtime);
+                        0, osd_reqid_t(), mtime, 0);
        if (get_osdmap()->test_flag(CEPH_OSDMAP_REQUIRE_JEWEL)) {
          if (pool.info.require_rollback()) {
            e.mod_desc.try_rmobject(v.version);
@@ -11655,7 +11659,8 @@ void ReplicatedPG::hit_set_persist()
       eversion_t(),
       0,
       osd_reqid_t(),
-      ctx->mtime)
+      ctx->mtime,
+      0)
     );
   if (pool.info.require_rollback()) {
     ctx->log.back().mod_desc.create();
@@ -11690,7 +11695,8 @@ void ReplicatedPG::hit_set_trim(OpContextUPtr &ctx, unsigned max)
                       p->version,
                       0,
                       osd_reqid_t(),
-                      ctx->mtime));
+                      ctx->mtime,
+                      0));
     if (pool.info.require_rollback()) {
       if (ctx->log.back().mod_desc.rmobject(
          ctx->at_version.version)) {
index 5805fbddc32d7ae598161828f33cf4f72942f467..02ce45b289a4923aab1d4ad9f1327e6f533a8d97 100644 (file)
@@ -3433,7 +3433,7 @@ void pg_log_entry_t::decode_with_checksum(bufferlist::iterator& p)
 
 void pg_log_entry_t::encode(bufferlist &bl) const
 {
-  ENCODE_START(10, 4, bl);
+  ENCODE_START(11, 4, bl);
   ::encode(op, bl);
   ::encode(soid, bl);
   ::encode(version, bl);
@@ -3458,12 +3458,14 @@ void pg_log_entry_t::encode(bufferlist &bl) const
   ::encode(user_version, bl);
   ::encode(mod_desc, bl);
   ::encode(extra_reqids, bl);
+  if (op == ERROR)
+    ::encode(return_code, bl);
   ENCODE_FINISH(bl);
 }
 
 void pg_log_entry_t::decode(bufferlist::iterator &bl)
 {
-  DECODE_START_LEGACY_COMPAT_LEN(10, 4, 4, bl);
+  DECODE_START_LEGACY_COMPAT_LEN(11, 4, 4, bl);
   ::decode(op, bl);
   if (struct_v < 2) {
     sobject_t old_soid;
@@ -3512,7 +3514,8 @@ void pg_log_entry_t::decode(bufferlist::iterator &bl)
     mod_desc.mark_unrollbackable();
   if (struct_v >= 10)
     ::decode(extra_reqids, bl);
-
+  if (struct_v >= 11 && op == ERROR)
+    ::decode(return_code, bl);
   DECODE_FINISH(bl);
 }
 
@@ -3535,6 +3538,7 @@ void pg_log_entry_t::dump(Formatter *f) const
   }
   f->close_section();
   f->dump_stream("mtime") << mtime;
+  f->dump_int("return_code", return_code);
   if (snaps.length() > 0) {
     vector<snapid_t> v;
     bufferlist c = snaps;
@@ -3562,13 +3566,17 @@ void pg_log_entry_t::generate_test_instances(list<pg_log_entry_t*>& o)
   hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
   o.push_back(new pg_log_entry_t(MODIFY, oid, eversion_t(1,2), eversion_t(3,4),
                                 1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
-                                utime_t(8,9)));
+                                utime_t(8,9), 0));
+  o.push_back(new pg_log_entry_t(ERROR, oid, eversion_t(1,2), eversion_t(3,4),
+                                1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
+                                utime_t(8,9), -ENOENT));
 }
 
 ostream& operator<<(ostream& out, const pg_log_entry_t& e)
 {
   out << e.version << " (" << e.prior_version << ") "
-      << e.get_op_name() << ' ' << e.soid << " by " << e.reqid << " " << e.mtime;
+      << e.get_op_name() << ' ' << e.soid << " by " << e.reqid << " " << e.mtime
+      << " " << e.return_code;
   if (e.snaps.length()) {
     vector<snapid_t> snaps;
     bufferlist c = e.snaps;
index 04e040d43fb1471831d1ba3c47cc5d66e4375c8a..67eb6840fecc317627da6facb1bc05894d8a863b 100644 (file)
@@ -2539,6 +2539,7 @@ struct pg_log_entry_t {
     LOST_MARK = 7,   // lost new version, now EIO
     PROMOTE = 8,     // promoted object from another tier
     CLEAN = 9,       // mark an object clean
+    ERROR = 10,      // write that returned an error
   };
   static const char *get_op_name(int op) {
     switch (op) {
@@ -2560,6 +2561,8 @@ struct pg_log_entry_t {
       return "l_mark  ";
     case CLEAN:
       return "clean   ";
+    case ERROR:
+      return "error   ";
     default:
       return "unknown ";
     }
@@ -2577,20 +2580,23 @@ struct pg_log_entry_t {
   eversion_t version, prior_version, reverting_to;
   version_t user_version; // the user version for this entry
   utime_t     mtime;  // this is the _user_ mtime, mind you
+  int32_t return_code; // only stored for ERRORs for dup detection
 
   __s32      op;
   bool invalid_hash; // only when decoding sobject_t based entries
   bool invalid_pool; // only when decoding pool-less hobject based entries
 
   pg_log_entry_t()
-   : user_version(0), op(0),
+   : user_version(0), return_code(0), op(0),
      invalid_hash(false), invalid_pool(false) {}
   pg_log_entry_t(int _op, const hobject_t& _soid,
                 const eversion_t& v, const eversion_t& pv,
                 version_t uv,
-                const osd_reqid_t& rid, const utime_t& mt)
+                const osd_reqid_t& rid, const utime_t& mt,
+                int return_code)
    : soid(_soid), reqid(rid), version(v), prior_version(pv), user_version(uv),
-     mtime(mt), op(_op), invalid_hash(false), invalid_pool(false)
+     mtime(mt), return_code(return_code), op(_op),
+     invalid_hash(false), invalid_pool(false)
      {}
       
   bool is_clone() const { return op == CLONE; }
@@ -2601,6 +2607,7 @@ struct pg_log_entry_t {
   bool is_lost_revert() const { return op == LOST_REVERT; }
   bool is_lost_delete() const { return op == LOST_DELETE; }
   bool is_lost_mark() const { return op == LOST_MARK; }
+  bool is_error() const { return op == ERROR; }
 
   bool is_update() const {
     return
@@ -2612,7 +2619,8 @@ struct pg_log_entry_t {
   }
       
   bool reqid_is_indexed() const {
-    return reqid != osd_reqid_t() && (op == MODIFY || op == DELETE);
+    return reqid != osd_reqid_t() &&
+      (op == MODIFY || op == DELETE || op == ERROR);
   }
 
   string get_key_name() const;
index 34d13133a0c7e001991acda98c1313987f289a15..cc0738639c25748050c1702db73c71e56b5ad70d 100644 (file)
@@ -2047,6 +2047,46 @@ TEST_F(PGLogTest, filter_log_1) {
   }
 }
 
+TEST_F(PGLogTest, get_request) {
+  clear();
+
+  // make sure writes, deletes, and errors are found
+  vector<pg_log_entry_t> entries;
+  hobject_t oid(object_t("objname"), "key", 123, 456, 0, "");
+  entries.push_back(
+    pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(6,2), eversion_t(3,4),
+                  1, osd_reqid_t(entity_name_t::CLIENT(777), 8, 1),
+                  utime_t(0,1), -ENOENT));
+  entries.push_back(
+    pg_log_entry_t(pg_log_entry_t::MODIFY, oid, eversion_t(6,3), eversion_t(3,4),
+                  2, osd_reqid_t(entity_name_t::CLIENT(777), 8, 2),
+                  utime_t(1,2), 0));
+  entries.push_back(
+    pg_log_entry_t(pg_log_entry_t::DELETE, oid, eversion_t(7,4), eversion_t(7,4),
+                  3, osd_reqid_t(entity_name_t::CLIENT(777), 8, 3),
+                  utime_t(10,2), 0));
+  entries.push_back(
+    pg_log_entry_t(pg_log_entry_t::ERROR, oid, eversion_t(7,5), eversion_t(7,4),
+                  3, osd_reqid_t(entity_name_t::CLIENT(777), 8, 4),
+                  utime_t(20,1), -ENOENT));
+
+  for (auto &entry : entries) {
+    log.add(entry);
+  }
+
+  for (auto &entry : entries) {
+    eversion_t replay_version;
+    version_t user_version;
+    int return_code = 0;
+    bool got = log.get_request(
+      entry.reqid, &replay_version, &user_version, &return_code);
+    EXPECT_TRUE(got);
+    EXPECT_EQ(entry.return_code, return_code);
+    EXPECT_EQ(entry.version, replay_version);
+    EXPECT_EQ(entry.user_version, user_version);
+  }
+}
+
 int main(int argc, char **argv) {
   vector<const char*> args;
   argv_to_vec(argc, (const char **)argv, args);
index 907d30681ed4042aade509ba06eb15e44ae2b9e7..be39e57c8c6000605d1345a033871ecfec189f0c 100644 (file)
@@ -809,7 +809,7 @@ TEST(pg_missing_t, add_next_event)
   eversion_t prior_version(3,4);
   pg_log_entry_t sample_e(pg_log_entry_t::DELETE, oid, version, prior_version,
                          0, osd_reqid_t(entity_name_t::CLIENT(777), 8, 999),
-                         utime_t(8,9));
+                         utime_t(8,9), 0);
 
   // new object (MODIFY)
   {
@@ -926,6 +926,20 @@ TEST(pg_missing_t, add_next_event)
     missing.add_next_event(e);
     EXPECT_FALSE(missing.have_missing());
   }
+
+  // ERROR op should only be used for dup detection
+  {
+    pg_missing_t missing;
+    pg_log_entry_t e = sample_e;
+
+    e.op = pg_log_entry_t::ERROR;
+    e.return_code = -ENOENT;
+    EXPECT_FALSE(e.is_update());
+    EXPECT_FALSE(missing.is_missing(oid));
+    missing.add_next_event(e);
+    EXPECT_FALSE(missing.is_missing(oid));
+    EXPECT_TRUE(e.reqid_is_indexed());
+  }
 }
 
 TEST(pg_missing_t, revise_need)