From: Guang G Yang Date: Tue, 21 Jul 2015 17:54:45 +0000 (+0000) Subject: mon: add a new pool setting to configure fast read for EC pool X-Git-Tag: v9.1.0~244^2~15 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5eb2a77dd770ce39e76aaeb73a1494426584df9f;p=ceph.git mon: add a new pool setting to configure fast read for EC pool Signed-off-by: Guang Yang --- diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 1bf238787b9b..42bb35235ac3 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -674,11 +674,11 @@ COMMAND("osd pool rename " \ "rename to ", "osd", "rw", "cli,rest") COMMAND("osd pool get " \ "name=pool,type=CephPoolname " \ - "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|write_fadvise_dontneed|all|min_write_recency_for_promote", \ + "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|auid|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|write_fadvise_dontneed|all|min_write_recency_for_promote|fast_read", \ "get pool parameter ", "osd", "r", "cli,rest") COMMAND("osd pool set " \ "name=pool,type=CephPoolname " \ - "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|write_fadvise_dontneed|min_write_recency_for_promote " \ + "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|write_fadvise_dontneed|min_write_recency_for_promote|fast_read " \ "name=val,type=CephString " \ "name=force,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \ "set pool parameter to ", "osd", "rw", "cli,rest") diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 25cf3c10e95e..04ee1a05826d 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -2881,7 +2881,8 @@ namespace { CACHE_TARGET_FULL_RATIO, CACHE_MIN_FLUSH_AGE, CACHE_MIN_EVICT_AGE, ERASURE_CODE_PROFILE, MIN_READ_RECENCY_FOR_PROMOTE, - WRITE_FADVISE_DONTNEED, MIN_WRITE_RECENCY_FOR_PROMOTE}; + WRITE_FADVISE_DONTNEED, MIN_WRITE_RECENCY_FOR_PROMOTE, + FAST_READ}; std::set subtract_second_from_first(const std::set& first, @@ -3334,7 +3335,8 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) ("erasure_code_profile", ERASURE_CODE_PROFILE) ("min_read_recency_for_promote", MIN_READ_RECENCY_FOR_PROMOTE) ("write_fadvise_dontneed", WRITE_FADVISE_DONTNEED) - ("min_write_recency_for_promote", MIN_WRITE_RECENCY_FOR_PROMOTE); + ("min_write_recency_for_promote", MIN_WRITE_RECENCY_FOR_PROMOTE) + ("fast_read", FAST_READ); typedef std::set choices_set_t; @@ -3490,6 +3492,9 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) f->dump_int("min_write_recency_for_promote", p->min_write_recency_for_promote); break; + case FAST_READ: + f->dump_int("fast_read", p->fast_read); + break; } f->close_section(); f->flush(rdata); @@ -3588,6 +3593,9 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) ss << "min_write_recency_for_promote: " << p->min_write_recency_for_promote << "\n"; break; + case FAST_READ: + ss << "fast_read: " << p->fast_read << "\n"; + break; } rdata.append(ss.str()); ss.str(""); @@ -4942,6 +4950,16 @@ int OSDMonitor::prepare_command_pool_set(map &cmdmap, return -EINVAL; } p.min_write_recency_for_promote = n; + } else if (var == "fast_read") { + if (val == "true" || (interr.empty() && n == 1)) { + if (p.is_replicated()) { + ss << "fast read is not supported in replication pool"; + return -EINVAL; + } + p.fast_read = true; + } else if (val == "false" || (interr.empty() && n == 0)) { + p.fast_read = false; + } } else { ss << "unrecognized variable '" << var << "'"; return -EINVAL; diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index d5bfbcf79b2a..f4991fdfafce 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -112,7 +112,7 @@ void ReplicatedPG::OpContext::start_async_reads(ReplicatedPG *pg) pg->pgbackend->objects_read_async( obc->obs.oi.soid, pending_async_reads, - new OnReadComplete(pg, this)); + new OnReadComplete(pg, this), pg->get_pool().fast_read); pending_async_reads.clear(); } void ReplicatedPG::OpContext::finish_read(ReplicatedPG *pg) diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 50a80b06cb1f..0f78242c7652 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -947,6 +947,7 @@ void pg_pool_t::dump(Formatter *f) const f->dump_unsigned("min_write_recency_for_promote", min_write_recency_for_promote); f->dump_unsigned("stripe_width", get_stripe_width()); f->dump_unsigned("expected_num_objects", expected_num_objects); + f->dump_bool("fast_read", fast_read); } void pg_pool_t::convert_to_pg_shards(const vector &from, set* to) const { @@ -1256,7 +1257,7 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const return; } - ENCODE_START(21, 5, bl); + ENCODE_START(22, 5, bl); ::encode(type, bl); ::encode(size, bl); ::encode(crush_ruleset, bl); @@ -1301,12 +1302,13 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const ::encode(cache_target_dirty_high_ratio_micro, bl); ::encode(min_write_recency_for_promote, bl); ::encode(use_gmt_hitset, bl); + ::encode(fast_read, bl); ENCODE_FINISH(bl); } void pg_pool_t::decode(bufferlist::iterator& bl) { - DECODE_START_LEGACY_COMPAT_LEN(21, 5, 5, bl); + DECODE_START_LEGACY_COMPAT_LEN(22, 5, 5, bl); ::decode(type, bl); ::decode(size, bl); ::decode(crush_ruleset, bl); @@ -1433,6 +1435,11 @@ void pg_pool_t::decode(bufferlist::iterator& bl) } else { use_gmt_hitset = false; } + if (struct_v >= 22) { + ::decode(fast_read, bl); + } else { + fast_read = false; + } DECODE_FINISH(bl); calc_pg_masks(); } @@ -1490,6 +1497,7 @@ void pg_pool_t::generate_test_instances(list& o) a.cache_min_evict_age = 2321; a.erasure_code_profile = "profile in osdmap"; a.expected_num_objects = 123456; + a.fast_read = false; o.push_back(new pg_pool_t(a)); } @@ -1541,6 +1549,8 @@ ostream& operator<<(ostream& out, const pg_pool_t& p) out << " stripe_width " << p.get_stripe_width(); if (p.expected_num_objects) out << " expected_num_objects " << p.expected_num_objects; + if (p.fast_read) + out << " fast_read " << p.fast_read; return out; } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index cf67153c9116..9af5db99d175 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1119,6 +1119,7 @@ public: uint64_t expected_num_objects; ///< expected number of objects on this pool, a value of 0 indicates ///< user does not specify any expected value + bool fast_read; ///< whether turn on fast read on the pool or not pg_pool_t() : flags(0), type(0), size(0), min_size(0), @@ -1146,7 +1147,8 @@ public: min_read_recency_for_promote(0), min_write_recency_for_promote(0), stripe_width(0), - expected_num_objects(0) + expected_num_objects(0), + fast_read(false) { } void dump(Formatter *f) const; diff --git a/src/test/pybind/test_ceph_argparse.py b/src/test/pybind/test_ceph_argparse.py index 6bd2b0835263..bf23beaef947 100755 --- a/src/test/pybind/test_ceph_argparse.py +++ b/src/test/pybind/test_ceph_argparse.py @@ -597,7 +597,7 @@ class TestOSD(TestArgparse): self.assert_valid_command(['osd', 'crush', 'dump']) assert_equal({}, validate_command(sigdict, ['osd', 'crush'])) assert_equal({}, validate_command(sigdict, ['osd', 'crush', - 'dump', + 'dump', 'toomany'])) def test_setcrushmap(self): @@ -982,7 +982,7 @@ class TestOSD(TestArgparse): assert_equal({}, validate_command(sigdict, ['osd', 'pool', 'create', 'poolname', '128', '128', - 'erasure', '^^^', + 'erasure', '^^^', 'ruleset'])) assert_equal({}, validate_command(sigdict, ['osd', 'pool', 'create', 'poolname', @@ -1026,7 +1026,7 @@ class TestOSD(TestArgparse): def test_pool_get(self): for var in ('size', 'min_size', 'crash_replay_interval', - 'pg_num', 'pgp_num', 'crush_ruleset', 'auid'): + 'pg_num', 'pgp_num', 'crush_ruleset', 'auid', 'fast_read'): self.assert_valid_command(['osd', 'pool', 'get', 'poolname', var]) assert_equal({}, validate_command(sigdict, ['osd', 'pool'])) assert_equal({}, validate_command(sigdict, ['osd', 'pool', @@ -1043,7 +1043,7 @@ class TestOSD(TestArgparse): def test_pool_set(self): for var in ('size', 'min_size', 'crash_replay_interval', 'pg_num', 'pgp_num', 'crush_ruleset', - 'hashpspool', 'auid'): + 'hashpspool', 'auid', 'fast_read'): self.assert_valid_command(['osd', 'pool', 'set', 'poolname', var, 'value']) assert_equal({}, validate_command(sigdict, ['osd', 'pool', @@ -1151,7 +1151,7 @@ class TestConfigKey(TestArgparse): def test_list(self): self.check_no_arg('config-key', 'list') # Local Variables: -# compile-command: "cd ../.. ; make -j4 && +# compile-command: "cd ../.. ; make -j4 && # PYTHONPATH=pybind nosetests --stop \ # test/pybind/test_ceph_argparse.py # test_ceph_argparse.py:TestOSD.test_rm" # End: