]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: EIO if a full-object read produces a bad digest
authorSage Weil <sage@redhat.com>
Wed, 10 Dec 2014 20:50:52 +0000 (12:50 -0800)
committerSage Weil <sage@redhat.com>
Sat, 20 Dec 2014 15:30:04 +0000 (07:30 -0800)
Add a tunable to control this.  Returning an EIO instead of garbled data
may not be the best approach in all environments.

Eventually we want to do something better here, like read from a replica
instead.

Signed-off-by: Sage Weil <sage@redhat.com>
src/common/config_opts.h
src/osd/ReplicatedPG.cc

index 4c191101d5550ef47d096bc7beeda8031232db72..8313d0014191b204de5e5b259d697de1e7dcf61a 100644 (file)
@@ -511,6 +511,8 @@ OPTION(osd_recover_clone_overlap, OPT_BOOL, true)   // preserve clone_overlap du
 OPTION(osd_op_num_threads_per_shard, OPT_INT, 2)
 OPTION(osd_op_num_shards, OPT_INT, 5)
 
+OPTION(osd_read_eio_on_bad_digest, OPT_BOOL, true) // return EIO if object digest is bad
+
 // Only use clone_overlap for recovery if there are fewer than
 // osd_recover_clone_overlap_limit entries in the overlap set
 OPTION(osd_recover_clone_overlap_limit, OPT_INT, 10)
index b059ce1dcfa8c8aff03cba91e5cf226bfc353040..ffc7cb58b782b4233ee4c9644126b9d966242117 100644 (file)
@@ -3342,6 +3342,22 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
          }
          dout(10) << " read got " << r << " / " << op.extent.length
                   << " bytes from obj " << soid << dendl;
+
+         // whole object?  can we verify the checksum?
+         if (result >= 0 &&
+             op.extent.offset == 0 && op.extent.length == oi.size &&
+             oi.is_data_digest()) {
+           uint32_t crc = osd_op.outdata.crc32c(-1);
+           if (oi.data_digest != crc) {
+             osd->clog->error() << info.pgid << std::hex
+                                << " full-object read crc " << crc
+                                << " != expected " << oi.data_digest
+                                << std::dec << " on " << soid;
+             // FIXME fall back to replica or something?
+             if (g_conf->osd_read_eio_on_bad_digest)
+               result = -EIO;
+           }
+         }
        }
        if (first_read) {
          first_read = false;