]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: pass and verify data+omap digest on copyfrom
authorSage Weil <sage@redhat.com>
Sun, 14 Dec 2014 05:05:46 +0000 (21:05 -0800)
committerSage Weil <sage@redhat.com>
Sat, 20 Dec 2014 15:30:04 +0000 (07:30 -0800)
Two things here:

 1- Pass the original digest from the source across the wire, if it is
    present.
 2- Calculate a new digest as we receive it, and record that.

If there is a mismatch, we currently crash; need to turn this into an
EIO, most likely.

Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/ReplicatedPG.cc
src/osd/ReplicatedPG.h
src/osd/osd_types.cc
src/osd/osd_types.h
src/osdc/Objecter.h
src/test/librados/misc.cc

index ffc7cb58b782b4233ee4c9644126b9d966242117..5d5b98f452a65bb2392399b1b2bb08f818569d83 100644 (file)
@@ -5889,6 +5889,14 @@ int ReplicatedPG::fill_in_copy_get(
     assert(obc->ssc);
     reply_obj.snap_seq = obc->ssc->snapset.seq;
   }
+  if (oi.is_data_digest()) {
+    reply_obj.flags |= object_copy_data_t::FLAG_DATA_DIGEST;
+    reply_obj.data_digest = oi.data_digest;
+  }
+  if (oi.is_omap_digest()) {
+    reply_obj.flags |= object_copy_data_t::FLAG_OMAP_DIGEST;
+    reply_obj.omap_digest = oi.omap_digest;
+  }
 
   // attrs
   map<string,bufferlist>& out_attrs = reply_obj.attrs;
@@ -6058,6 +6066,9 @@ void ReplicatedPG::_copy_some(ObjectContextRef obc, CopyOpRef cop)
              &cop->results.object_size, &cop->results.mtime,
              &cop->attrs, &cop->data, &cop->omap_header, &cop->omap,
              &cop->results.snaps, &cop->results.snap_seq,
+             &cop->results.flags,
+             &cop->results.source_data_digest,
+             &cop->results.source_omap_digest,
              &cop->rval);
 
   C_Copyfrom *fin = new C_Copyfrom(this, obc->obs.oi.soid,
@@ -6152,10 +6163,48 @@ void ReplicatedPG::process_copy_chunk(hobject_t oid, ceph_tid_t tid, int r)
     return;
   }
 
-  dout(20) << __func__ << " success; committing" << dendl;
   cop->results.final_tx = pgbackend->get_transaction();
   _build_finish_copy_transaction(cop, cop->results.final_tx);
 
+  // verify digests?
+  dout(20) << __func__ << std::hex
+          << " got digest: rx data 0x" << cop->results.data_digest
+          << " omap 0x" << cop->results.omap_digest
+          << ", source: data 0x" << cop->results.source_data_digest
+          << " omap 0x" <<  cop->results.source_omap_digest
+          << std::dec
+          << " flags " << cop->results.flags
+          << dendl;
+  if (cop->results.is_data_digest() &&
+      cop->results.data_digest != cop->results.source_data_digest) {
+    derr << __func__ << std::hex << " data digest 0x" << cop->results.data_digest
+        << " != source 0x" << cop->results.source_data_digest << std::dec
+        << dendl;
+    osd->clog->error() << info.pgid << " copy from " << cop->src
+                      << " to " << cop->obc->obs.oi.soid << std::hex
+                      << " data digest 0x" << cop->results.data_digest
+                      << " != source 0x" << cop->results.source_data_digest
+                      << std::dec;
+    r = -EIO;
+    goto out;
+  }
+  if (cop->results.is_omap_digest() &&
+      cop->results.omap_digest != cop->results.source_omap_digest) {
+    derr << __func__ << std::hex
+        << " omap digest 0x" << cop->results.omap_digest
+        << " != source 0x" << cop->results.source_omap_digest
+        << std::dec << dendl;
+    osd->clog->error() << info.pgid << " copy from " << cop->src
+                      << " to " << cop->obc->obs.oi.soid << std::hex
+                      << " omap digest 0x" << cop->results.omap_digest
+                      << " != source 0x" << cop->results.source_omap_digest
+                      << std::dec;
+    r = -EIO;
+    goto out;
+  }
+
+  dout(20) << __func__ << " success; committing" << dendl;
+
  out:
   dout(20) << __func__ << " complete r = " << cpp_strerror(r) << dendl;
   CopyCallbackResults results(r, &cop->results);
@@ -6207,6 +6256,7 @@ void ReplicatedPG::_write_copy_chunk(CopyOpRef cop, PGBackend::PGTransaction *t)
               cop->cursor.data_offset);
       }
     }
+    cop->results.data_digest = cop->data.crc32c(cop->results.data_digest);
     t->append(
       cop->results.temp_oid,
       cop->temp_cursor.data_offset,
@@ -6218,11 +6268,23 @@ void ReplicatedPG::_write_copy_chunk(CopyOpRef cop, PGBackend::PGTransaction *t)
   if (!pool.info.require_rollback()) {
     if (!cop->temp_cursor.omap_complete) {
       if (cop->omap_header.length()) {
+       cop->results.omap_digest =
+         cop->omap_header.crc32c(cop->results.omap_digest);
        t->omap_setheader(
          cop->results.temp_oid,
          cop->omap_header);
        cop->omap_header.clear();
       }
+      if (cop->omap.size()) {
+       for (map<string,bufferlist>::iterator p = cop->omap.begin();
+            p != cop->omap.end(); ++p) {
+         cop->results.omap_digest = ceph_crc32c(
+           cop->results.omap_digest,
+           (const unsigned char *)p->first.data(),
+           p->first.length());
+         cop->results.omap_digest = p->second.crc32c(cop->results.omap_digest);
+       }
+      }
       t->omap_setkeys(cop->results.temp_oid, cop->omap);
       cop->omap.clear();
     }
@@ -6285,6 +6347,9 @@ void ReplicatedPG::finish_copyfrom(OpContext *ctx)
   // CopyFromCallback fills this in for us
   obs.oi.user_version = ctx->user_at_version;
 
+  obs.oi.set_data_digest(cb->results->data_digest);
+  obs.oi.set_omap_digest(cb->results->omap_digest);
+
   // cache: clear whiteout?
   if (obs.oi.is_whiteout()) {
     dout(10) << __func__ << " clearing whiteout on " << obs.oi.soid << dendl;
index 9fb64d9594f8d5f5ef5c97d501864646df421a5d..57a5220cac6113e5f9c66c4d5a977f63adfa29a0 100644 (file)
@@ -123,10 +123,24 @@ public:
     bool mirror_snapset;
     map<string, bufferlist> attrs; ///< src user attrs
     bool has_omap;
-    CopyResults() : object_size(0), started_temp_obj(false),
-                   final_tx(NULL), user_version(0), 
-                   should_requeue(false), mirror_snapset(false),
-                   has_omap(false) {}
+    uint32_t flags;    // object_copy_data_t::FLAG_*
+    uint32_t source_data_digest, source_omap_digest;
+    uint32_t data_digest, omap_digest;
+    bool is_data_digest() {
+      return flags & object_copy_data_t::FLAG_DATA_DIGEST;
+    }
+    bool is_omap_digest() {
+      return flags & object_copy_data_t::FLAG_OMAP_DIGEST;
+    }
+    CopyResults()
+      : object_size(0), started_temp_obj(false),
+       final_tx(NULL), user_version(0),
+       should_requeue(false), mirror_snapset(false),
+       has_omap(false),
+       flags(0),
+       source_data_digest(-1), source_omap_digest(-1),
+       data_digest(-1), omap_digest(-1)
+    {}
   };
 
   struct CopyOp {
index a821a147b4a4f9f3168fd1657a3511bf03252f37..ac6e893abfdbcf5b794ecc6f11c13451e639e34c 100644 (file)
@@ -3330,11 +3330,13 @@ void object_copy_data_t::decode_classic(bufferlist::iterator& bl)
   ::decode(data, bl);
   ::decode(omap, bl);
   ::decode(cursor, bl);
+  flags = 0;
+  data_digest = omap_digest = 0;
 }
 
 void object_copy_data_t::encode(bufferlist& bl) const
 {
-  ENCODE_START(3, 1, bl);
+  ENCODE_START(4, 1, bl);
   ::encode(size, bl);
   ::encode(mtime, bl);
   ::encode((__u32)0, bl);  // was category; no longer used
@@ -3345,12 +3347,15 @@ void object_copy_data_t::encode(bufferlist& bl) const
   ::encode(omap_header, bl);
   ::encode(snaps, bl);
   ::encode(snap_seq, bl);
+  ::encode(flags, bl);
+  ::encode(data_digest, bl);
+  ::encode(omap_digest, bl);
   ENCODE_FINISH(bl);
 }
 
 void object_copy_data_t::decode(bufferlist::iterator& bl)
 {
-  DECODE_START(2, bl);
+  DECODE_START(4, bl);
   ::decode(size, bl);
   ::decode(mtime, bl);
   {
@@ -3370,6 +3375,11 @@ void object_copy_data_t::decode(bufferlist::iterator& bl)
     snaps.clear();
     snap_seq = 0;
   }
+  if (struct_v >= 4) {
+    ::decode(flags, bl);
+    ::decode(data_digest, bl);
+    ::decode(omap_digest, bl);
+  }
   DECODE_FINISH(bl);
 }
 
@@ -3412,6 +3422,9 @@ void object_copy_data_t::dump(Formatter *f) const
   /* we should really print out the attrs here, but bufferlist
      const-correctness prents that */
   f->dump_int("attrs_size", attrs.size());
+  f->dump_int("flags", flags);
+  f->dump_unsigned("data_digest", data_digest);
+  f->dump_unsigned("omap_digest", omap_digest);
   f->dump_int("omap_size", omap.size());
   f->dump_int("omap_header_length", omap_header.length());
   f->dump_int("data_length", data.length());
index 804b5c97a9ef505e127c87753c32f492fdf12216..ad4d5d292de5f66fd84171f4bceb1a035d38622e 100644 (file)
@@ -2490,9 +2490,15 @@ WRITE_CLASS_ENCODER(object_copy_cursor_t)
  * based on the contents of the cursor.
  */
 struct object_copy_data_t {
+  enum {
+    FLAG_DATA_DIGEST = 1<<0,
+    FLAG_OMAP_DIGEST = 1<<1,
+  };
   object_copy_cursor_t cursor;
   uint64_t size;
   utime_t mtime;
+  uint32_t data_digest, omap_digest;
+  uint32_t flags;
   map<string, bufferlist> attrs;
   bufferlist data;
   bufferlist omap_header;
@@ -2503,7 +2509,8 @@ struct object_copy_data_t {
   ///< latest snap seq for the object (if head)
   snapid_t snap_seq;
 public:
-  object_copy_data_t() : size((uint64_t)-1) {}
+  object_copy_data_t() : size((uint64_t)-1), data_digest(-1),
+                        omap_digest(-1), flags(0) {}
 
   static void generate_test_instances(list<object_copy_data_t*>& o);
   void encode_classic(bufferlist& bl) const;
index d17b1ed92b94801890829bece5bfe74f883855f6..2d7527668728d2bd0da88da733dd88c5ed47ce75 100644 (file)
@@ -623,6 +623,9 @@ struct ObjectOperation {
     std::map<std::string,bufferlist> *out_omap;
     vector<snapid_t> *out_snaps;
     snapid_t *out_snap_seq;
+    uint32_t *out_flags;
+    uint32_t *out_data_digest;
+    uint32_t *out_omap_digest;
     int *prval;
     C_ObjectOperation_copyget(object_copy_cursor_t *c,
                              uint64_t *s,
@@ -632,11 +635,15 @@ struct ObjectOperation {
                              std::map<std::string,bufferlist> *o,
                              std::vector<snapid_t> *osnaps,
                              snapid_t *osnap_seq,
+                             uint32_t *flags,
+                             uint32_t *dd,
+                             uint32_t *od,
                              int *r)
       : cursor(c),
        out_size(s), out_mtime(m),
        out_attrs(a), out_data(d), out_omap_header(oh),
        out_omap(o), out_snaps(osnaps), out_snap_seq(osnap_seq),
+       out_flags(flags), out_data_digest(dd), out_omap_digest(od),
        prval(r) {}
     void finish(int r) {
       if (r < 0)
@@ -661,6 +668,12 @@ struct ObjectOperation {
          *out_snaps = copy_reply.snaps;
        if (out_snap_seq)
          *out_snap_seq = copy_reply.snap_seq;
+       if (out_flags)
+         *out_flags = copy_reply.flags;
+       if (out_data_digest)
+         *out_data_digest = copy_reply.data_digest;
+       if (out_omap_digest)
+         *out_omap_digest = copy_reply.omap_digest;
        *cursor = copy_reply.cursor;
       } catch (buffer::error& e) {
        if (prval)
@@ -679,6 +692,9 @@ struct ObjectOperation {
                std::map<std::string,bufferlist> *out_omap,
                vector<snapid_t> *out_snaps,
                snapid_t *out_snap_seq,
+               uint32_t *out_flags,
+               uint32_t *out_data_digest,
+               uint32_t *out_omap_digest,
                int *prval) {
     OSDOp& osd_op = add_op(CEPH_OSD_OP_COPY_GET);
     osd_op.op.copy_get.max = max;
@@ -689,7 +705,9 @@ struct ObjectOperation {
     C_ObjectOperation_copyget *h =
       new C_ObjectOperation_copyget(cursor, out_size, out_mtime,
                                     out_attrs, out_data, out_omap_header,
-                                   out_omap, out_snaps, out_snap_seq, prval);
+                                   out_omap, out_snaps, out_snap_seq,
+                                   out_flags, out_data_digest, out_omap_digest,
+                                   prval);
     out_bl[p] = &h->bl;
     out_handler[p] = h;
   }
index 3b52cbdf398b3243f2a325615c432ddf52f74a54..f4a291d2c35117b2b6276f93d51a4c377e9bb877 100644 (file)
@@ -629,6 +629,100 @@ TEST_F(LibRadosMiscPP, CopyPP) {
   }
 }
 
+TEST_F(LibRadosMiscPP, CopyScrubPP) {
+  bufferlist inbl, bl, x;
+  for (int i=0; i<100; ++i)
+    x.append("barrrrrrrrrrrrrrrrrrrrrrrrrr");
+  bl.append(buffer::create(g_conf->osd_copyfrom_max_chunk * 3));
+  bl.zero();
+  bl.append("tail");
+  bufferlist cbl;
+
+  map<string, bufferlist> to_set;
+  for (int i=0; i<1000; ++i)
+    to_set[string("foo") + stringify(i)] = x;
+
+  // small
+  cbl = x;
+  ASSERT_EQ(0, ioctx.write_full("small", cbl));
+  ASSERT_EQ(0, ioctx.setxattr("small", "myattr", x));
+
+  // big
+  cbl = bl;
+  ASSERT_EQ(0, ioctx.write_full("big", cbl));
+
+  // without header
+  cbl = bl;
+  ASSERT_EQ(0, ioctx.write_full("big2", cbl));
+  ASSERT_EQ(0, ioctx.setxattr("big2", "myattr", x));
+  ASSERT_EQ(0, ioctx.setxattr("big2", "myattr2", x));
+  ASSERT_EQ(0, ioctx.omap_set("big2", to_set));
+
+  // with header
+  cbl = bl;
+  ASSERT_EQ(0, ioctx.write_full("big3", cbl));
+  ASSERT_EQ(0, ioctx.omap_set_header("big3", x));
+  ASSERT_EQ(0, ioctx.omap_set("big3", to_set));
+
+  // deep scrub to ensure digests are in place
+  {
+    for (int i=0; i<10; ++i) {
+      ostringstream ss;
+      ss << "{\"prefix\": \"pg deep-scrub\", \"pgid\": \""
+        << ioctx.get_id() << "." << i
+        << "\"}";
+      cluster.mon_command(ss.str(), inbl, NULL, NULL);
+    }
+
+    // give it a few seconds to go.  this is sloppy but is usually enough time
+    cout << "waiting for initial deep scrubs..." << std::endl;
+    sleep(30);
+    cout << "done waiting, doing copies" << std::endl;
+  }
+
+  {
+    ObjectWriteOperation op;
+    op.copy_from("small", ioctx, 0);
+    ASSERT_EQ(0, ioctx.operate("small.copy", &op));
+  }
+
+  {
+    ObjectWriteOperation op;
+    op.copy_from("big", ioctx, 0);
+    ASSERT_EQ(0, ioctx.operate("big.copy", &op));
+  }
+
+  {
+    ObjectWriteOperation op;
+    op.copy_from("big2", ioctx, 0);
+    ASSERT_EQ(0, ioctx.operate("big2.copy", &op));
+  }
+
+  {
+    ObjectWriteOperation op;
+    op.copy_from("big3", ioctx, 0);
+    ASSERT_EQ(0, ioctx.operate("big3.copy", &op));
+  }
+
+  // deep scrub to ensure digests are correct
+  {
+    for (int i=0; i<10; ++i) {
+      ostringstream ss;
+      ss << "{\"prefix\": \"pg deep-scrub\", \"pgid\": \""
+        << ioctx.get_id() << "." << i
+        << "\"}";
+      cluster.mon_command(ss.str(), inbl, NULL, NULL);
+    }
+
+    // give it a few seconds to go.  this is sloppy but is usually enough time
+    cout << "waiting for final deep scrubs..." << std::endl;
+    sleep(30);
+    cout << "done waiting" << std::endl;
+  }
+}
+
+
+
 int main(int argc, char **argv)
 {
   ::testing::InitGoogleTest(&argc, argv);