From 5f92d825ea49d51218cccd168ac9327a51631d8c Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Thu, 11 Dec 2014 10:08:45 +0800 Subject: [PATCH] osd: Make async-read can handle fadvise flags. Signed-off-by: Jianpeng Ma --- src/include/ceph_features.h | 2 ++ src/messages/MOSDECSubOpRead.h | 4 +-- src/osd/ECBackend.cc | 59 ++++++++++++++++--------------- src/osd/ECBackend.h | 6 ++-- src/osd/ECMsgTypes.cc | 63 ++++++++++++++++++++++++++-------- src/osd/ECMsgTypes.h | 7 ++-- src/osd/PGBackend.h | 2 +- src/osd/ReplicatedBackend.cc | 9 ++--- src/osd/ReplicatedBackend.h | 2 +- src/osd/ReplicatedPG.cc | 4 +-- src/osd/ReplicatedPG.h | 6 ++-- src/test/encoding/types.h | 2 +- 12 files changed, 104 insertions(+), 62 deletions(-) diff --git a/src/include/ceph_features.h b/src/include/ceph_features.h index 9b9ef5c0c8726..a193d5eb93ca5 100644 --- a/src/include/ceph_features.h +++ b/src/include/ceph_features.h @@ -54,6 +54,7 @@ #define CEPH_FEATURE_OSD_POOLRESEND (1ULL<<43) #define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44) #define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45) +#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46) /* * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature @@ -128,6 +129,7 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) { CEPH_FEATURE_OSD_POOLRESEND | \ CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 | \ CEPH_FEATURE_OSD_SET_ALLOC_HINT | \ + CEPH_FEATURE_OSD_FADVISE_FLAGS | \ 0ULL) #define CEPH_FEATURES_SUPPORTED_DEFAULT CEPH_FEATURES_ALL diff --git a/src/messages/MOSDECSubOpRead.h b/src/messages/MOSDECSubOpRead.h index 99e62e6c48a67..3e315ef26a8e3 100644 --- a/src/messages/MOSDECSubOpRead.h +++ b/src/messages/MOSDECSubOpRead.h @@ -20,7 +20,7 @@ #include "osd/ECMsgTypes.h" class MOSDECSubOpRead : public Message { - static const int HEAD_VERSION = 1; + static const int HEAD_VERSION = 2; static const int COMPAT_VERSION = 1; public: @@ -46,7 +46,7 @@ public: virtual void encode_payload(uint64_t features) { ::encode(pgid, payload); ::encode(map_epoch, payload); - ::encode(op, payload); + ::encode(op, payload, features); } const char *get_type_name() const { return "MOSDECSubOpRead"; } diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index ed971ab9ea854..20f6caa9f695d 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -213,8 +213,8 @@ struct RecoveryMessages { const hobject_t &hoid, uint64_t off, uint64_t len, const set &need, bool attrs) { - list > to_read; - to_read.push_back(make_pair(off, len)); + list > to_read; + to_read.push_back(boost::make_tuple(off, len, 0)); assert(!reads.count(hoid)); reads.insert( make_pair( @@ -856,11 +856,11 @@ void ECBackend::handle_sub_read( ECSubRead &op, ECSubReadReply *reply) { - for(map > >::iterator i = + for(map > >::iterator i = op.to_read.begin(); i != op.to_read.end(); ++i) { - for (list >::iterator j = i->second.begin(); + for (list >::iterator j = i->second.begin(); j != i->second.end(); ++j) { bufferlist bl; @@ -868,9 +868,9 @@ void ECBackend::handle_sub_read( i->first.is_temp() ? temp_coll : coll, ghobject_t( i->first, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard), - j->first, - j->second, - bl, + j->get<0>(), + j->get<1>(), + bl, j->get<2>(), false); if (r < 0) { assert(0); @@ -880,7 +880,7 @@ void ECBackend::handle_sub_read( } else { reply->buffers_read[i->first].push_back( make_pair( - j->first, + j->get<0>(), bl) ); } @@ -949,7 +949,7 @@ void ECBackend::handle_sub_read_reply( // We canceled this read! @see filter_read_op continue; } - list >::const_iterator req_iter = + list >::const_iterator req_iter = rop.to_read.find(i->first)->second.to_read.begin(); list< boost::tuple< @@ -962,7 +962,7 @@ void ECBackend::handle_sub_read_reply( assert(riter != rop.complete[i->first].returned.end()); pair adjusted = sinfo.aligned_offset_len_to_chunk( - *req_iter); + make_pair(req_iter->get<0>(), req_iter->get<1>())); assert(adjusted.first == j->first); riter->get<2>()[from].claim(j->second); } @@ -1396,22 +1396,23 @@ void ECBackend::start_read_op( op.obj_to_source[i->first].insert(*j); op.source_to_obj[*j].insert(i->first); } - for (list >::const_iterator j = + for (list >::const_iterator j = i->second.to_read.begin(); j != i->second.to_read.end(); ++j) { reslist.push_back( boost::make_tuple( - j->first, - j->second, + j->get<0>(), + j->get<1>(), map())); pair chunk_off_len = - sinfo.aligned_offset_len_to_chunk( - *j); + sinfo.aligned_offset_len_to_chunk(make_pair(j->get<0>(), j->get<1>())); for (set::const_iterator k = i->second.need.begin(); k != i->second.need.end(); ++k) { - messages[*k].to_read[i->first].push_back(chunk_off_len); + messages[*k].to_read[i->first].push_back(boost::make_tuple(chunk_off_len.first, + chunk_off_len.second, + j->get<2>())); } assert(!need_attrs); } @@ -1580,12 +1581,12 @@ struct CallClientContexts : public GenContext &> { ECBackend *ec; ECBackend::ClientAsyncReadStatus *status; - list, + list, pair > > to_read; CallClientContexts( ECBackend *ec, ECBackend::ClientAsyncReadStatus *status, - const list, + const list, pair > > &to_read) : ec(ec), status(status), to_read(to_read) {} void finish(pair &in) { @@ -1593,12 +1594,12 @@ struct CallClientContexts : assert(res.returned.size() == to_read.size()); assert(res.r == 0); assert(res.errors.empty()); - for (list, + for (list, pair > >::iterator i = to_read.begin(); i != to_read.end(); to_read.erase(i++)) { pair adjusted = - ec->sinfo.offset_len_to_stripe_bounds(i->first); + ec->sinfo.offset_len_to_stripe_bounds(make_pair(i->first.get<0>(), i->first.get<1>())); assert(res.returned.front().get<0>() == adjusted.first && res.returned.front().get<1>() == adjusted.second); map to_decode; @@ -1618,8 +1619,8 @@ struct CallClientContexts : assert(i->second.first); i->second.first->substr_of( bl, - i->first.first - adjusted.first, - MIN(i->first.second, bl.length() - (i->first.first - adjusted.first))); + i->first.get<0>() - adjusted.first, + MIN(i->first.get<1>(), bl.length() - (i->first.get<0>() - adjusted.first))); if (i->second.second) { i->second.second->complete(i->second.first->length()); } @@ -1637,7 +1638,7 @@ struct CallClientContexts : } } ~CallClientContexts() { - for (list, + for (list, pair > >::iterator i = to_read.begin(); i != to_read.end(); to_read.erase(i++)) { @@ -1648,21 +1649,23 @@ struct CallClientContexts : void ECBackend::objects_read_async( const hobject_t &hoid, - const list, + const list, pair > > &to_read, Context *on_complete) { in_progress_client_reads.push_back(ClientAsyncReadStatus(on_complete)); CallClientContexts *c = new CallClientContexts( this, &(in_progress_client_reads.back()), to_read); - list > offsets; - for (list, + + list > offsets; + pair tmp; + for (list, pair > >::const_iterator i = to_read.begin(); i != to_read.end(); ++i) { - offsets.push_back( - sinfo.offset_len_to_stripe_bounds(i->first)); + tmp = sinfo.offset_len_to_stripe_bounds(make_pair(i->first.get<0>(), i->first.get<1>())); + offsets.push_back(boost::make_tuple(tmp.first, tmp.second, i->first.get<2>())); } const vector &chunk_mapping = ec_impl->get_chunk_mapping(); diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index 2a71c339819e1..147e3e85d1db8 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -143,7 +143,7 @@ public: list in_progress_client_reads; void objects_read_async( const hobject_t &hoid, - const list, + const list, pair > > &to_read, Context *on_complete); @@ -265,13 +265,13 @@ public: read_result_t() : r(0) {} }; struct read_request_t { - const list > to_read; + const list > to_read; const set need; const bool want_attrs; GenContext &> *cb; read_request_t( const hobject_t &hoid, - const list > &to_read, + const list > &to_read, const set &need, bool want_attrs, GenContext &> *cb) diff --git a/src/osd/ECMsgTypes.cc b/src/osd/ECMsgTypes.cc index 282355fe21e34..cf63611a2d64d 100644 --- a/src/osd/ECMsgTypes.cc +++ b/src/osd/ECMsgTypes.cc @@ -153,9 +153,29 @@ void ECSubWriteReply::generate_test_instances(list& o) o.back()->applied = true; } -void ECSubRead::encode(bufferlist &bl) const +void ECSubRead::encode(bufferlist &bl, uint64_t features) const { - ENCODE_START(1, 1, bl); + if ((features & CEPH_FEATURE_OSD_FADVISE_FLAGS) == 0) { + ENCODE_START(1, 1, bl); + ::encode(from, bl); + ::encode(tid, bl); + map > > tmp; + for (map > >::const_iterator m = to_read.begin(); + m != to_read.end(); ++m) { + list > tlist; + for (list >::const_iterator l = m->second.begin(); + l != m->second.end(); ++l) { + tlist.push_back(std::make_pair(l->get<0>(), l->get<1>())); + } + tmp[m->first] = tlist; + } + ::encode(tmp, bl); + ::encode(attrs_to_read, bl); + ENCODE_FINISH(bl); + return; + } + + ENCODE_START(2, 2, bl); ::encode(from, bl); ::encode(tid, bl); ::encode(to_read, bl); @@ -165,10 +185,24 @@ void ECSubRead::encode(bufferlist &bl) const void ECSubRead::decode(bufferlist::iterator &bl) { - DECODE_START(1, bl); + DECODE_START(2, bl); ::decode(from, bl); ::decode(tid, bl); - ::decode(to_read, bl); + if (struct_v == 1) { + map > >tmp; + ::decode(tmp, bl); + for (map > >::const_iterator m = tmp.begin(); + m != tmp.end(); ++m) { + list > tlist; + for (list > ::const_iterator l = m->second.begin(); + l != m->second.end(); ++l) { + tlist.push_back(boost::make_tuple(l->first, l->second, 0)); + } + to_read[m->first] = tlist; + } + } else { + ::decode(to_read, bl); + } ::decode(attrs_to_read, bl); DECODE_FINISH(bl); } @@ -187,20 +221,21 @@ void ECSubRead::dump(Formatter *f) const f->dump_stream("from") << from; f->dump_unsigned("tid", tid); f->open_array_section("objects"); - for (map > >::const_iterator i = + for (map > >::const_iterator i = to_read.begin(); i != to_read.end(); ++i) { f->open_object_section("object"); f->dump_stream("oid") << i->first; f->open_array_section("extents"); - for (list >::const_iterator j = + for (list >::const_iterator j = i->second.begin(); j != i->second.end(); ++j) { f->open_object_section("extent"); - f->dump_unsigned("off", j->first); - f->dump_unsigned("len", j->second); + f->dump_unsigned("off", j->get<0>()); + f->dump_unsigned("len", j->get<1>()); + f->dump_unsigned("flags", j->get<2>()); f->close_section(); } f->close_section(); @@ -226,16 +261,16 @@ void ECSubRead::generate_test_instances(list& o) o.push_back(new ECSubRead()); o.back()->from = pg_shard_t(2, shard_id_t(255)); o.back()->tid = 1; - o.back()->to_read[hoid1].push_back(make_pair(100, 200)); - o.back()->to_read[hoid1].push_back(make_pair(400, 600)); - o.back()->to_read[hoid2].push_back(make_pair(400, 600)); + o.back()->to_read[hoid1].push_back(boost::make_tuple(100, 200, 0)); + o.back()->to_read[hoid1].push_back(boost::make_tuple(400, 600, 0)); + o.back()->to_read[hoid2].push_back(boost::make_tuple(400, 600, 0)); o.back()->attrs_to_read.insert(hoid1); o.push_back(new ECSubRead()); o.back()->from = pg_shard_t(2, shard_id_t(255)); o.back()->tid = 300; - o.back()->to_read[hoid1].push_back(make_pair(300, 200)); - o.back()->to_read[hoid2].push_back(make_pair(400, 600)); - o.back()->to_read[hoid2].push_back(make_pair(2000, 600)); + o.back()->to_read[hoid1].push_back(boost::make_tuple(300, 200, 0)); + o.back()->to_read[hoid2].push_back(boost::make_tuple(400, 600, 0)); + o.back()->to_read[hoid2].push_back(boost::make_tuple(2000, 600, 0)); o.back()->attrs_to_read.insert(hoid2); } diff --git a/src/osd/ECMsgTypes.h b/src/osd/ECMsgTypes.h index 1cdfa57e153c5..78193830959b1 100644 --- a/src/osd/ECMsgTypes.h +++ b/src/osd/ECMsgTypes.h @@ -18,6 +18,7 @@ #include "osd_types.h" #include "include/buffer.h" #include "os/ObjectStore.h" +#include "boost/tuple/tuple.hpp" struct ECSubWrite { pg_shard_t from; @@ -80,14 +81,14 @@ WRITE_CLASS_ENCODER(ECSubWriteReply) struct ECSubRead { pg_shard_t from; ceph_tid_t tid; - map > > to_read; + map > > to_read; set attrs_to_read; - void encode(bufferlist &bl) const; + void encode(bufferlist &bl, uint64_t features) const; void decode(bufferlist::iterator &bl); void dump(Formatter *f) const; static void generate_test_instances(list& o); }; -WRITE_CLASS_ENCODER(ECSubRead) +WRITE_CLASS_ENCODER_FEATURES(ECSubRead) struct ECSubReadReply { pg_shard_t from; diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 82d520e8c32b6..dea075aeccbd7 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -577,7 +577,7 @@ virtual void objects_read_async( const hobject_t &hoid, - const list, + const list, pair > > &to_read, Context *on_complete) = 0; diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index 893ed180573bb..97e69dd0c646f 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -245,18 +245,19 @@ struct AsyncReadCallback : public GenContext { }; void ReplicatedBackend::objects_read_async( const hobject_t &hoid, - const list, + const list, pair > > &to_read, Context *on_complete) { int r = 0; - for (list, + for (list, pair > >::const_iterator i = to_read.begin(); i != to_read.end() && r >= 0; ++i) { - int _r = store->read(coll, hoid, i->first.first, - i->first.second, *(i->second.first)); + int _r = store->read(coll, hoid, i->first.get<0>(), + i->first.get<1>(), *(i->second.first), + i->first.get<2>()); if (i->second.second) { get_parent()->schedule_recovery_work( get_parent()->bless_gencontext( diff --git a/src/osd/ReplicatedBackend.h b/src/osd/ReplicatedBackend.h index 7de3922d2860e..67a4a1f7ffbf7 100644 --- a/src/osd/ReplicatedBackend.h +++ b/src/osd/ReplicatedBackend.h @@ -158,7 +158,7 @@ public: void objects_read_async( const hobject_t &hoid, - const list, + const list, pair > > &to_read, Context *on_complete); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 502cf0cc32b0a..8b587c13603bb 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -3327,7 +3327,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector& ops) } else if (pool.info.require_rollback()) { ctx->pending_async_reads.push_back( make_pair( - make_pair(op.extent.offset, op.extent.length), + boost::make_tuple(op.extent.offset, op.extent.length, op.flags), make_pair(&osd_op.outdata, new FillInExtent(&op.extent.length)))); dout(10) << " async_read noted for " << soid << dendl; } else { @@ -5832,7 +5832,7 @@ int ReplicatedPG::fill_in_copy_get( async_read_started = true; ctx->pending_async_reads.push_back( make_pair( - make_pair(cursor.data_offset, left), + boost::make_tuple(cursor.data_offset, left, 0), make_pair(&bl, cb))); result = MIN(oi.size - cursor.data_offset, (uint64_t)left); cb->len = result; diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 3d57058cf8844..d3539fde6600c 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -512,8 +512,8 @@ public: pending_attrs.clear(); } - // pending async reads -> - list, + // pending async reads -> + list, pair > > pending_async_reads; int async_read_result; unsigned inflightreads; @@ -571,7 +571,7 @@ public: assert(lock_to_release == NONE); if (reply) reply->put(); - for (list, + for (list, pair > >::iterator i = pending_async_reads.begin(); i != pending_async_reads.end(); diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h index 56f39b08b914d..59a576e45214f 100644 --- a/src/test/encoding/types.h +++ b/src/test/encoding/types.h @@ -89,7 +89,7 @@ TYPE(ECUtil::HashInfo) #include "osd/ECMsgTypes.h" TYPE(ECSubWrite) TYPE(ECSubWriteReply) -TYPE(ECSubRead) +TYPE_FEATUREFUL(ECSubRead) TYPE(ECSubReadReply) #include "osd/HitSet.h" -- 2.39.5