From: Josh Durgin Date: Wed, 21 Jun 2017 22:05:49 +0000 (-0700) Subject: osd/ReplicatedBackend: reset thread heartbeat after every omap entry in deep-scrub X-Git-Tag: v12.1.1~199^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=15ce60830aed7c4c2a0b10d2cb69d8daef418d20;p=ceph.git osd/ReplicatedBackend: reset thread heartbeat after every omap entry in deep-scrub Doing this every 100 entries could be after 100MB of reads. There's little cost to reset this, so remove the option for configuring it. This reduces the likelihood of crashing the osd due to too many omap values on an object. Fixes: http://tracker.ceph.com/issues/20375 Signed-off-by: Josh Durgin --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 443ef8c1a871..8fc0458cef01 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -853,7 +853,6 @@ OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week OPTION(osd_deep_scrub_randomize_ratio, OPT_FLOAT, 0.15) // scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs are deep) OPTION(osd_deep_scrub_stride, OPT_INT, 524288) OPTION(osd_deep_scrub_update_digest_min_age, OPT_INT, 2*60*60) // objects must be this old (seconds) before we update the whole-object digest on scrub -OPTION(osd_scan_list_ping_tp_interval, OPT_U64, 100) OPTION(osd_class_dir, OPT_STR, CEPH_LIBDIR "/rados-classes") // where rados plugins are stored OPTION(osd_open_classes_on_start, OPT_BOOL, true) OPTION(osd_class_load_list, OPT_STR, "cephfs hello journal lock log numops " diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index 07c0d316cf6d..0bd0ab8dd2ab 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -770,14 +770,9 @@ void ReplicatedBackend::be_deep_scrub( ghobject_t( poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard)); assert(iter); - uint64_t keys_scanned = 0; for (iter->seek_to_first(); iter->status() == 0 && iter->valid(); iter->next(false)) { - if (cct->_conf->osd_scan_list_ping_tp_interval && - (keys_scanned % cct->_conf->osd_scan_list_ping_tp_interval == 0)) { - handle.reset_tp_timeout(); - } - ++keys_scanned; + handle.reset_tp_timeout(); dout(25) << "CRC key " << iter->key() << " value:\n"; iter->value().hexdump(*_dout);