From: Sage Weil Date: Wed, 10 Dec 2014 20:50:52 +0000 (-0800) Subject: osd: EIO if a full-object read produces a bad digest X-Git-Tag: v0.92~111^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=75b2bc282300709ce93d0bc48d8432a4d64eb948;p=ceph.git osd: EIO if a full-object read produces a bad digest Add a tunable to control this. Returning an EIO instead of garbled data may not be the best approach in all environments. Eventually we want to do something better here, like read from a replica instead. Signed-off-by: Sage Weil --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 4c191101d555..8313d0014191 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -511,6 +511,8 @@ OPTION(osd_recover_clone_overlap, OPT_BOOL, true) // preserve clone_overlap du OPTION(osd_op_num_threads_per_shard, OPT_INT, 2) OPTION(osd_op_num_shards, OPT_INT, 5) +OPTION(osd_read_eio_on_bad_digest, OPT_BOOL, true) // return EIO if object digest is bad + // Only use clone_overlap for recovery if there are fewer than // osd_recover_clone_overlap_limit entries in the overlap set OPTION(osd_recover_clone_overlap_limit, OPT_INT, 10) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index b059ce1dcfa8..ffc7cb58b782 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -3342,6 +3342,22 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector& ops) } dout(10) << " read got " << r << " / " << op.extent.length << " bytes from obj " << soid << dendl; + + // whole object? can we verify the checksum? + if (result >= 0 && + op.extent.offset == 0 && op.extent.length == oi.size && + oi.is_data_digest()) { + uint32_t crc = osd_op.outdata.crc32c(-1); + if (oi.data_digest != crc) { + osd->clog->error() << info.pgid << std::hex + << " full-object read crc " << crc + << " != expected " << oi.data_digest + << std::dec << " on " << soid; + // FIXME fall back to replica or something? + if (g_conf->osd_read_eio_on_bad_digest) + result = -EIO; + } + } } if (first_read) { first_read = false;