From: Sage Weil Date: Sun, 14 Dec 2014 05:05:46 +0000 (-0800) Subject: osd: pass and verify data+omap digest on copyfrom X-Git-Tag: v0.92~111^2~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f8c1d4016908353445ec6922d8165a31a600b534;p=ceph.git osd: pass and verify data+omap digest on copyfrom Two things here: 1- Pass the original digest from the source across the wire, if it is present. 2- Calculate a new digest as we receive it, and record that. If there is a mismatch, we currently crash; need to turn this into an EIO, most likely. Signed-off-by: Sage Weil --- diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index ffc7cb58b78..5d5b98f452a 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -5889,6 +5889,14 @@ int ReplicatedPG::fill_in_copy_get( assert(obc->ssc); reply_obj.snap_seq = obc->ssc->snapset.seq; } + if (oi.is_data_digest()) { + reply_obj.flags |= object_copy_data_t::FLAG_DATA_DIGEST; + reply_obj.data_digest = oi.data_digest; + } + if (oi.is_omap_digest()) { + reply_obj.flags |= object_copy_data_t::FLAG_OMAP_DIGEST; + reply_obj.omap_digest = oi.omap_digest; + } // attrs map& out_attrs = reply_obj.attrs; @@ -6058,6 +6066,9 @@ void ReplicatedPG::_copy_some(ObjectContextRef obc, CopyOpRef cop) &cop->results.object_size, &cop->results.mtime, &cop->attrs, &cop->data, &cop->omap_header, &cop->omap, &cop->results.snaps, &cop->results.snap_seq, + &cop->results.flags, + &cop->results.source_data_digest, + &cop->results.source_omap_digest, &cop->rval); C_Copyfrom *fin = new C_Copyfrom(this, obc->obs.oi.soid, @@ -6152,10 +6163,48 @@ void ReplicatedPG::process_copy_chunk(hobject_t oid, ceph_tid_t tid, int r) return; } - dout(20) << __func__ << " success; committing" << dendl; cop->results.final_tx = pgbackend->get_transaction(); _build_finish_copy_transaction(cop, cop->results.final_tx); + // verify digests? + dout(20) << __func__ << std::hex + << " got digest: rx data 0x" << cop->results.data_digest + << " omap 0x" << cop->results.omap_digest + << ", source: data 0x" << cop->results.source_data_digest + << " omap 0x" << cop->results.source_omap_digest + << std::dec + << " flags " << cop->results.flags + << dendl; + if (cop->results.is_data_digest() && + cop->results.data_digest != cop->results.source_data_digest) { + derr << __func__ << std::hex << " data digest 0x" << cop->results.data_digest + << " != source 0x" << cop->results.source_data_digest << std::dec + << dendl; + osd->clog->error() << info.pgid << " copy from " << cop->src + << " to " << cop->obc->obs.oi.soid << std::hex + << " data digest 0x" << cop->results.data_digest + << " != source 0x" << cop->results.source_data_digest + << std::dec; + r = -EIO; + goto out; + } + if (cop->results.is_omap_digest() && + cop->results.omap_digest != cop->results.source_omap_digest) { + derr << __func__ << std::hex + << " omap digest 0x" << cop->results.omap_digest + << " != source 0x" << cop->results.source_omap_digest + << std::dec << dendl; + osd->clog->error() << info.pgid << " copy from " << cop->src + << " to " << cop->obc->obs.oi.soid << std::hex + << " omap digest 0x" << cop->results.omap_digest + << " != source 0x" << cop->results.source_omap_digest + << std::dec; + r = -EIO; + goto out; + } + + dout(20) << __func__ << " success; committing" << dendl; + out: dout(20) << __func__ << " complete r = " << cpp_strerror(r) << dendl; CopyCallbackResults results(r, &cop->results); @@ -6207,6 +6256,7 @@ void ReplicatedPG::_write_copy_chunk(CopyOpRef cop, PGBackend::PGTransaction *t) cop->cursor.data_offset); } } + cop->results.data_digest = cop->data.crc32c(cop->results.data_digest); t->append( cop->results.temp_oid, cop->temp_cursor.data_offset, @@ -6218,11 +6268,23 @@ void ReplicatedPG::_write_copy_chunk(CopyOpRef cop, PGBackend::PGTransaction *t) if (!pool.info.require_rollback()) { if (!cop->temp_cursor.omap_complete) { if (cop->omap_header.length()) { + cop->results.omap_digest = + cop->omap_header.crc32c(cop->results.omap_digest); t->omap_setheader( cop->results.temp_oid, cop->omap_header); cop->omap_header.clear(); } + if (cop->omap.size()) { + for (map::iterator p = cop->omap.begin(); + p != cop->omap.end(); ++p) { + cop->results.omap_digest = ceph_crc32c( + cop->results.omap_digest, + (const unsigned char *)p->first.data(), + p->first.length()); + cop->results.omap_digest = p->second.crc32c(cop->results.omap_digest); + } + } t->omap_setkeys(cop->results.temp_oid, cop->omap); cop->omap.clear(); } @@ -6285,6 +6347,9 @@ void ReplicatedPG::finish_copyfrom(OpContext *ctx) // CopyFromCallback fills this in for us obs.oi.user_version = ctx->user_at_version; + obs.oi.set_data_digest(cb->results->data_digest); + obs.oi.set_omap_digest(cb->results->omap_digest); + // cache: clear whiteout? if (obs.oi.is_whiteout()) { dout(10) << __func__ << " clearing whiteout on " << obs.oi.soid << dendl; diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 9fb64d9594f..57a5220cac6 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -123,10 +123,24 @@ public: bool mirror_snapset; map attrs; ///< src user attrs bool has_omap; - CopyResults() : object_size(0), started_temp_obj(false), - final_tx(NULL), user_version(0), - should_requeue(false), mirror_snapset(false), - has_omap(false) {} + uint32_t flags; // object_copy_data_t::FLAG_* + uint32_t source_data_digest, source_omap_digest; + uint32_t data_digest, omap_digest; + bool is_data_digest() { + return flags & object_copy_data_t::FLAG_DATA_DIGEST; + } + bool is_omap_digest() { + return flags & object_copy_data_t::FLAG_OMAP_DIGEST; + } + CopyResults() + : object_size(0), started_temp_obj(false), + final_tx(NULL), user_version(0), + should_requeue(false), mirror_snapset(false), + has_omap(false), + flags(0), + source_data_digest(-1), source_omap_digest(-1), + data_digest(-1), omap_digest(-1) + {} }; struct CopyOp { diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index a821a147b4a..ac6e893abfd 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -3330,11 +3330,13 @@ void object_copy_data_t::decode_classic(bufferlist::iterator& bl) ::decode(data, bl); ::decode(omap, bl); ::decode(cursor, bl); + flags = 0; + data_digest = omap_digest = 0; } void object_copy_data_t::encode(bufferlist& bl) const { - ENCODE_START(3, 1, bl); + ENCODE_START(4, 1, bl); ::encode(size, bl); ::encode(mtime, bl); ::encode((__u32)0, bl); // was category; no longer used @@ -3345,12 +3347,15 @@ void object_copy_data_t::encode(bufferlist& bl) const ::encode(omap_header, bl); ::encode(snaps, bl); ::encode(snap_seq, bl); + ::encode(flags, bl); + ::encode(data_digest, bl); + ::encode(omap_digest, bl); ENCODE_FINISH(bl); } void object_copy_data_t::decode(bufferlist::iterator& bl) { - DECODE_START(2, bl); + DECODE_START(4, bl); ::decode(size, bl); ::decode(mtime, bl); { @@ -3370,6 +3375,11 @@ void object_copy_data_t::decode(bufferlist::iterator& bl) snaps.clear(); snap_seq = 0; } + if (struct_v >= 4) { + ::decode(flags, bl); + ::decode(data_digest, bl); + ::decode(omap_digest, bl); + } DECODE_FINISH(bl); } @@ -3412,6 +3422,9 @@ void object_copy_data_t::dump(Formatter *f) const /* we should really print out the attrs here, but bufferlist const-correctness prents that */ f->dump_int("attrs_size", attrs.size()); + f->dump_int("flags", flags); + f->dump_unsigned("data_digest", data_digest); + f->dump_unsigned("omap_digest", omap_digest); f->dump_int("omap_size", omap.size()); f->dump_int("omap_header_length", omap_header.length()); f->dump_int("data_length", data.length()); diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 804b5c97a9e..ad4d5d292de 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -2490,9 +2490,15 @@ WRITE_CLASS_ENCODER(object_copy_cursor_t) * based on the contents of the cursor. */ struct object_copy_data_t { + enum { + FLAG_DATA_DIGEST = 1<<0, + FLAG_OMAP_DIGEST = 1<<1, + }; object_copy_cursor_t cursor; uint64_t size; utime_t mtime; + uint32_t data_digest, omap_digest; + uint32_t flags; map attrs; bufferlist data; bufferlist omap_header; @@ -2503,7 +2509,8 @@ struct object_copy_data_t { ///< latest snap seq for the object (if head) snapid_t snap_seq; public: - object_copy_data_t() : size((uint64_t)-1) {} + object_copy_data_t() : size((uint64_t)-1), data_digest(-1), + omap_digest(-1), flags(0) {} static void generate_test_instances(list& o); void encode_classic(bufferlist& bl) const; diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index d17b1ed92b9..2d752766872 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -623,6 +623,9 @@ struct ObjectOperation { std::map *out_omap; vector *out_snaps; snapid_t *out_snap_seq; + uint32_t *out_flags; + uint32_t *out_data_digest; + uint32_t *out_omap_digest; int *prval; C_ObjectOperation_copyget(object_copy_cursor_t *c, uint64_t *s, @@ -632,11 +635,15 @@ struct ObjectOperation { std::map *o, std::vector *osnaps, snapid_t *osnap_seq, + uint32_t *flags, + uint32_t *dd, + uint32_t *od, int *r) : cursor(c), out_size(s), out_mtime(m), out_attrs(a), out_data(d), out_omap_header(oh), out_omap(o), out_snaps(osnaps), out_snap_seq(osnap_seq), + out_flags(flags), out_data_digest(dd), out_omap_digest(od), prval(r) {} void finish(int r) { if (r < 0) @@ -661,6 +668,12 @@ struct ObjectOperation { *out_snaps = copy_reply.snaps; if (out_snap_seq) *out_snap_seq = copy_reply.snap_seq; + if (out_flags) + *out_flags = copy_reply.flags; + if (out_data_digest) + *out_data_digest = copy_reply.data_digest; + if (out_omap_digest) + *out_omap_digest = copy_reply.omap_digest; *cursor = copy_reply.cursor; } catch (buffer::error& e) { if (prval) @@ -679,6 +692,9 @@ struct ObjectOperation { std::map *out_omap, vector *out_snaps, snapid_t *out_snap_seq, + uint32_t *out_flags, + uint32_t *out_data_digest, + uint32_t *out_omap_digest, int *prval) { OSDOp& osd_op = add_op(CEPH_OSD_OP_COPY_GET); osd_op.op.copy_get.max = max; @@ -689,7 +705,9 @@ struct ObjectOperation { C_ObjectOperation_copyget *h = new C_ObjectOperation_copyget(cursor, out_size, out_mtime, out_attrs, out_data, out_omap_header, - out_omap, out_snaps, out_snap_seq, prval); + out_omap, out_snaps, out_snap_seq, + out_flags, out_data_digest, out_omap_digest, + prval); out_bl[p] = &h->bl; out_handler[p] = h; } diff --git a/src/test/librados/misc.cc b/src/test/librados/misc.cc index 3b52cbdf398..f4a291d2c35 100644 --- a/src/test/librados/misc.cc +++ b/src/test/librados/misc.cc @@ -629,6 +629,100 @@ TEST_F(LibRadosMiscPP, CopyPP) { } } +TEST_F(LibRadosMiscPP, CopyScrubPP) { + bufferlist inbl, bl, x; + for (int i=0; i<100; ++i) + x.append("barrrrrrrrrrrrrrrrrrrrrrrrrr"); + bl.append(buffer::create(g_conf->osd_copyfrom_max_chunk * 3)); + bl.zero(); + bl.append("tail"); + bufferlist cbl; + + map to_set; + for (int i=0; i<1000; ++i) + to_set[string("foo") + stringify(i)] = x; + + // small + cbl = x; + ASSERT_EQ(0, ioctx.write_full("small", cbl)); + ASSERT_EQ(0, ioctx.setxattr("small", "myattr", x)); + + // big + cbl = bl; + ASSERT_EQ(0, ioctx.write_full("big", cbl)); + + // without header + cbl = bl; + ASSERT_EQ(0, ioctx.write_full("big2", cbl)); + ASSERT_EQ(0, ioctx.setxattr("big2", "myattr", x)); + ASSERT_EQ(0, ioctx.setxattr("big2", "myattr2", x)); + ASSERT_EQ(0, ioctx.omap_set("big2", to_set)); + + // with header + cbl = bl; + ASSERT_EQ(0, ioctx.write_full("big3", cbl)); + ASSERT_EQ(0, ioctx.omap_set_header("big3", x)); + ASSERT_EQ(0, ioctx.omap_set("big3", to_set)); + + // deep scrub to ensure digests are in place + { + for (int i=0; i<10; ++i) { + ostringstream ss; + ss << "{\"prefix\": \"pg deep-scrub\", \"pgid\": \"" + << ioctx.get_id() << "." << i + << "\"}"; + cluster.mon_command(ss.str(), inbl, NULL, NULL); + } + + // give it a few seconds to go. this is sloppy but is usually enough time + cout << "waiting for initial deep scrubs..." << std::endl; + sleep(30); + cout << "done waiting, doing copies" << std::endl; + } + + { + ObjectWriteOperation op; + op.copy_from("small", ioctx, 0); + ASSERT_EQ(0, ioctx.operate("small.copy", &op)); + } + + { + ObjectWriteOperation op; + op.copy_from("big", ioctx, 0); + ASSERT_EQ(0, ioctx.operate("big.copy", &op)); + } + + { + ObjectWriteOperation op; + op.copy_from("big2", ioctx, 0); + ASSERT_EQ(0, ioctx.operate("big2.copy", &op)); + } + + { + ObjectWriteOperation op; + op.copy_from("big3", ioctx, 0); + ASSERT_EQ(0, ioctx.operate("big3.copy", &op)); + } + + // deep scrub to ensure digests are correct + { + for (int i=0; i<10; ++i) { + ostringstream ss; + ss << "{\"prefix\": \"pg deep-scrub\", \"pgid\": \"" + << ioctx.get_id() << "." << i + << "\"}"; + cluster.mon_command(ss.str(), inbl, NULL, NULL); + } + + // give it a few seconds to go. this is sloppy but is usually enough time + cout << "waiting for final deep scrubs..." << std::endl; + sleep(30); + cout << "done waiting" << std::endl; + } +} + + + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv);