]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Make async-read can handle fadvise flags.
authorJianpeng Ma <jianpeng.ma@intel.com>
Thu, 11 Dec 2014 02:08:45 +0000 (10:08 +0800)
committerJianpeng Ma <jianpeng.ma@intel.com>
Fri, 12 Dec 2014 06:23:35 +0000 (14:23 +0800)
Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com>
12 files changed:
src/include/ceph_features.h
src/messages/MOSDECSubOpRead.h
src/osd/ECBackend.cc
src/osd/ECBackend.h
src/osd/ECMsgTypes.cc
src/osd/ECMsgTypes.h
src/osd/PGBackend.h
src/osd/ReplicatedBackend.cc
src/osd/ReplicatedBackend.h
src/osd/ReplicatedPG.cc
src/osd/ReplicatedPG.h
src/test/encoding/types.h

index 9b9ef5c0c87260a4d1311ad1e0eb41d1b1f9cad5..a193d5eb93ca5e5fdcc8f12e5199115ef431221d 100644 (file)
@@ -54,6 +54,7 @@
 #define CEPH_FEATURE_OSD_POOLRESEND    (1ULL<<43)
 #define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44)
 #define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45)
+#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46)
 
 /*
  * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -128,6 +129,7 @@ static inline unsigned long long ceph_sanitize_features(unsigned long long f) {
         CEPH_FEATURE_OSD_POOLRESEND |  \
          CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 |   \
          CEPH_FEATURE_OSD_SET_ALLOC_HINT |   \
+        CEPH_FEATURE_OSD_FADVISE_FLAGS | \
         0ULL)
 
 #define CEPH_FEATURES_SUPPORTED_DEFAULT  CEPH_FEATURES_ALL
index 99e62e6c48a6762c00493f690cfd925584a42649..3e315ef26a8e3c357efcdbe80b4b8f14f01fb383 100644 (file)
@@ -20,7 +20,7 @@
 #include "osd/ECMsgTypes.h"
 
 class MOSDECSubOpRead : public Message {
-  static const int HEAD_VERSION = 1;
+  static const int HEAD_VERSION = 2;
   static const int COMPAT_VERSION = 1;
 
 public:
@@ -46,7 +46,7 @@ public:
   virtual void encode_payload(uint64_t features) {
     ::encode(pgid, payload);
     ::encode(map_epoch, payload);
-    ::encode(op, payload);
+    ::encode(op, payload, features);
   }
 
   const char *get_type_name() const { return "MOSDECSubOpRead"; }
index ed971ab9ea854b2282c52ab2944e78c931434938..20f6caa9f695da9f2c8b45d75c19bab7f157c9da 100644 (file)
@@ -213,8 +213,8 @@ struct RecoveryMessages {
     const hobject_t &hoid, uint64_t off, uint64_t len,
     const set<pg_shard_t> &need,
     bool attrs) {
-    list<pair<uint64_t, uint64_t> > to_read;
-    to_read.push_back(make_pair(off, len));
+    list<boost::tuple<uint64_t, uint64_t, uint32_t> > to_read;
+    to_read.push_back(boost::make_tuple(off, len, 0));
     assert(!reads.count(hoid));
     reads.insert(
       make_pair(
@@ -856,11 +856,11 @@ void ECBackend::handle_sub_read(
   ECSubRead &op,
   ECSubReadReply *reply)
 {
-  for(map<hobject_t, list<pair<uint64_t, uint64_t> > >::iterator i =
+  for(map<hobject_t, list<boost::tuple<uint64_t, uint64_t, uint32_t> > >::iterator i =
         op.to_read.begin();
       i != op.to_read.end();
       ++i) {
-    for (list<pair<uint64_t, uint64_t> >::iterator j = i->second.begin();
+    for (list<boost::tuple<uint64_t, uint64_t, uint32_t> >::iterator j = i->second.begin();
         j != i->second.end();
         ++j) {
       bufferlist bl;
@@ -868,9 +868,9 @@ void ECBackend::handle_sub_read(
        i->first.is_temp() ? temp_coll : coll,
        ghobject_t(
          i->first, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
-       j->first,
-       j->second,
-       bl,
+       j->get<0>(),
+       j->get<1>(),
+       bl, j->get<2>(),
        false);
       if (r < 0) {
        assert(0);
@@ -880,7 +880,7 @@ void ECBackend::handle_sub_read(
       } else {
        reply->buffers_read[i->first].push_back(
          make_pair(
-           j->first,
+           j->get<0>(),
            bl)
          );
       }
@@ -949,7 +949,7 @@ void ECBackend::handle_sub_read_reply(
       // We canceled this read! @see filter_read_op
       continue;
     }
-    list<pair<uint64_t, uint64_t> >::const_iterator req_iter =
+    list<boost::tuple<uint64_t, uint64_t, uint32_t> >::const_iterator req_iter =
       rop.to_read.find(i->first)->second.to_read.begin();
     list<
       boost::tuple<
@@ -962,7 +962,7 @@ void ECBackend::handle_sub_read_reply(
       assert(riter != rop.complete[i->first].returned.end());
       pair<uint64_t, uint64_t> adjusted =
        sinfo.aligned_offset_len_to_chunk(
-         *req_iter);
+         make_pair(req_iter->get<0>(), req_iter->get<1>()));
       assert(adjusted.first == j->first);
       riter->get<2>()[from].claim(j->second);
     }
@@ -1396,22 +1396,23 @@ void ECBackend::start_read_op(
       op.obj_to_source[i->first].insert(*j);
       op.source_to_obj[*j].insert(i->first);
     }
-    for (list<pair<uint64_t, uint64_t> >::const_iterator j =
+    for (list<boost::tuple<uint64_t, uint64_t, uint32_t> >::const_iterator j =
           i->second.to_read.begin();
         j != i->second.to_read.end();
         ++j) {
       reslist.push_back(
        boost::make_tuple(
-         j->first,
-         j->second,
+         j->get<0>(),
+         j->get<1>(),
          map<pg_shard_t, bufferlist>()));
       pair<uint64_t, uint64_t> chunk_off_len =
-       sinfo.aligned_offset_len_to_chunk(
-         *j);
+       sinfo.aligned_offset_len_to_chunk(make_pair(j->get<0>(), j->get<1>()));
       for (set<pg_shard_t>::const_iterator k = i->second.need.begin();
           k != i->second.need.end();
           ++k) {
-       messages[*k].to_read[i->first].push_back(chunk_off_len);
+       messages[*k].to_read[i->first].push_back(boost::make_tuple(chunk_off_len.first,
+                                                                   chunk_off_len.second,
+                                                                   j->get<2>()));
       }
       assert(!need_attrs);
     }
@@ -1580,12 +1581,12 @@ struct CallClientContexts :
   public GenContext<pair<RecoveryMessages*, ECBackend::read_result_t& > &> {
   ECBackend *ec;
   ECBackend::ClientAsyncReadStatus *status;
-  list<pair<pair<uint64_t, uint64_t>,
+  list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>,
            pair<bufferlist*, Context*> > > to_read;
   CallClientContexts(
     ECBackend *ec,
     ECBackend::ClientAsyncReadStatus *status,
-    const list<pair<pair<uint64_t, uint64_t>,
+    const list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>,
                    pair<bufferlist*, Context*> > > &to_read)
     : ec(ec), status(status), to_read(to_read) {}
   void finish(pair<RecoveryMessages *, ECBackend::read_result_t &> &in) {
@@ -1593,12 +1594,12 @@ struct CallClientContexts :
     assert(res.returned.size() == to_read.size());
     assert(res.r == 0);
     assert(res.errors.empty());
-    for (list<pair<pair<uint64_t, uint64_t>,
+    for (list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>,
                   pair<bufferlist*, Context*> > >::iterator i = to_read.begin();
         i != to_read.end();
         to_read.erase(i++)) {
       pair<uint64_t, uint64_t> adjusted =
-       ec->sinfo.offset_len_to_stripe_bounds(i->first);
+       ec->sinfo.offset_len_to_stripe_bounds(make_pair(i->first.get<0>(), i->first.get<1>()));
       assert(res.returned.front().get<0>() == adjusted.first &&
             res.returned.front().get<1>() == adjusted.second);
       map<int, bufferlist> to_decode;
@@ -1618,8 +1619,8 @@ struct CallClientContexts :
       assert(i->second.first);
       i->second.first->substr_of(
        bl,
-       i->first.first - adjusted.first,
-       MIN(i->first.second, bl.length() - (i->first.first - adjusted.first)));
+       i->first.get<0>() - adjusted.first,
+       MIN(i->first.get<1>(), bl.length() - (i->first.get<0>() - adjusted.first)));
       if (i->second.second) {
        i->second.second->complete(i->second.first->length());
       }
@@ -1637,7 +1638,7 @@ struct CallClientContexts :
     }
   }
   ~CallClientContexts() {
-    for (list<pair<pair<uint64_t, uint64_t>,
+    for (list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>,
                   pair<bufferlist*, Context*> > >::iterator i = to_read.begin();
         i != to_read.end();
         to_read.erase(i++)) {
@@ -1648,21 +1649,23 @@ struct CallClientContexts :
 
 void ECBackend::objects_read_async(
   const hobject_t &hoid,
-  const list<pair<pair<uint64_t, uint64_t>,
+  const list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>,
                  pair<bufferlist*, Context*> > > &to_read,
   Context *on_complete)
 {
   in_progress_client_reads.push_back(ClientAsyncReadStatus(on_complete));
   CallClientContexts *c = new CallClientContexts(
     this, &(in_progress_client_reads.back()), to_read);
-  list<pair<uint64_t, uint64_t> > offsets;
-  for (list<pair<pair<uint64_t, uint64_t>,
+
+  list<boost::tuple<uint64_t, uint64_t, uint32_t> > offsets;
+  pair<uint64_t, uint64_t> tmp;
+  for (list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>,
                 pair<bufferlist*, Context*> > >::const_iterator i =
         to_read.begin();
        i != to_read.end();
        ++i) {
-    offsets.push_back(
-      sinfo.offset_len_to_stripe_bounds(i->first));
+    tmp = sinfo.offset_len_to_stripe_bounds(make_pair(i->first.get<0>(), i->first.get<1>()));
+    offsets.push_back(boost::make_tuple(tmp.first, tmp.second, i->first.get<2>()));
   }
 
   const vector<int> &chunk_mapping = ec_impl->get_chunk_mapping();
index 2a71c339819e1dc1ed1654e10e5a69cc43c0ca9a..147e3e85d1db8e04f785b650ed926822b2249d0c 100644 (file)
@@ -143,7 +143,7 @@ public:
   list<ClientAsyncReadStatus> in_progress_client_reads;
   void objects_read_async(
     const hobject_t &hoid,
-    const list<pair<pair<uint64_t, uint64_t>,
+    const list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>,
                    pair<bufferlist*, Context*> > > &to_read,
     Context *on_complete);
 
@@ -265,13 +265,13 @@ public:
     read_result_t() : r(0) {}
   };
   struct read_request_t {
-    const list<pair<uint64_t, uint64_t> > to_read;
+    const list<boost::tuple<uint64_t, uint64_t, uint32_t> > to_read;
     const set<pg_shard_t> need;
     const bool want_attrs;
     GenContext<pair<RecoveryMessages *, read_result_t& > &> *cb;
     read_request_t(
       const hobject_t &hoid,
-      const list<pair<uint64_t, uint64_t> > &to_read,
+      const list<boost::tuple<uint64_t, uint64_t, uint32_t> > &to_read,
       const set<pg_shard_t> &need,
       bool want_attrs,
       GenContext<pair<RecoveryMessages *, read_result_t& > &> *cb)
index 282355fe21e34ca158e84762f469f073a9bcb5a9..cf63611a2d64d95db1eaae923a3d9494d0f13507 100644 (file)
@@ -153,9 +153,29 @@ void ECSubWriteReply::generate_test_instances(list<ECSubWriteReply*>& o)
   o.back()->applied = true;
 }
 
-void ECSubRead::encode(bufferlist &bl) const
+void ECSubRead::encode(bufferlist &bl, uint64_t features) const
 {
-  ENCODE_START(1, 1, bl);
+  if ((features & CEPH_FEATURE_OSD_FADVISE_FLAGS) == 0) {
+    ENCODE_START(1, 1, bl);
+    ::encode(from, bl);
+    ::encode(tid, bl);
+    map<hobject_t, list<pair<uint64_t, uint64_t> > > tmp;
+    for (map<hobject_t, list<boost::tuple<uint64_t, uint64_t, uint32_t> > >::const_iterator m = to_read.begin();
+         m != to_read.end(); ++m) {
+      list<pair<uint64_t, uint64_t> > tlist;
+      for (list<boost::tuple<uint64_t, uint64_t, uint32_t> >::const_iterator l = m->second.begin();
+           l != m->second.end(); ++l) {
+       tlist.push_back(std::make_pair(l->get<0>(), l->get<1>()));
+      }
+      tmp[m->first] = tlist;
+    }
+    ::encode(tmp, bl);
+    ::encode(attrs_to_read, bl);
+    ENCODE_FINISH(bl);
+    return;
+  }
+
+  ENCODE_START(2, 2, bl);
   ::encode(from, bl);
   ::encode(tid, bl);
   ::encode(to_read, bl);
@@ -165,10 +185,24 @@ void ECSubRead::encode(bufferlist &bl) const
 
 void ECSubRead::decode(bufferlist::iterator &bl)
 {
-  DECODE_START(1, bl);
+  DECODE_START(2, bl);
   ::decode(from, bl);
   ::decode(tid, bl);
-  ::decode(to_read, bl);
+  if (struct_v == 1) {
+    map<hobject_t, list<pair<uint64_t, uint64_t> > >tmp;
+    ::decode(tmp, bl);
+    for (map<hobject_t, list<pair<uint64_t, uint64_t> > >::const_iterator m = tmp.begin();
+         m != tmp.end(); ++m) {
+      list<boost::tuple<uint64_t, uint64_t, uint32_t> > tlist;
+      for (list<pair<uint64_t, uint64_t> > ::const_iterator l = m->second.begin();
+           l != m->second.end(); ++l) {
+       tlist.push_back(boost::make_tuple(l->first, l->second, 0));
+      }
+      to_read[m->first] = tlist;
+    }
+  } else {
+    ::decode(to_read, bl);
+  }
   ::decode(attrs_to_read, bl);
   DECODE_FINISH(bl);
 }
@@ -187,20 +221,21 @@ void ECSubRead::dump(Formatter *f) const
   f->dump_stream("from") << from;
   f->dump_unsigned("tid", tid);
   f->open_array_section("objects");
-  for (map<hobject_t, list<pair<uint64_t, uint64_t> > >::const_iterator i =
+  for (map<hobject_t, list<boost::tuple<uint64_t, uint64_t, uint32_t> > >::const_iterator i =
         to_read.begin();
        i != to_read.end();
        ++i) {
     f->open_object_section("object");
     f->dump_stream("oid") << i->first;
     f->open_array_section("extents");
-    for (list<pair<uint64_t, uint64_t> >::const_iterator j =
+    for (list<boost::tuple<uint64_t, uint64_t, uint32_t> >::const_iterator j =
           i->second.begin();
         j != i->second.end();
         ++j) {
       f->open_object_section("extent");
-      f->dump_unsigned("off", j->first);
-      f->dump_unsigned("len", j->second);
+      f->dump_unsigned("off", j->get<0>());
+      f->dump_unsigned("len", j->get<1>());
+      f->dump_unsigned("flags", j->get<2>());
       f->close_section();
     }
     f->close_section();
@@ -226,16 +261,16 @@ void ECSubRead::generate_test_instances(list<ECSubRead*>& o)
   o.push_back(new ECSubRead());
   o.back()->from = pg_shard_t(2, shard_id_t(255));
   o.back()->tid = 1;
-  o.back()->to_read[hoid1].push_back(make_pair(100, 200));
-  o.back()->to_read[hoid1].push_back(make_pair(400, 600));
-  o.back()->to_read[hoid2].push_back(make_pair(400, 600));
+  o.back()->to_read[hoid1].push_back(boost::make_tuple(100, 200, 0));
+  o.back()->to_read[hoid1].push_back(boost::make_tuple(400, 600, 0));
+  o.back()->to_read[hoid2].push_back(boost::make_tuple(400, 600, 0));
   o.back()->attrs_to_read.insert(hoid1);
   o.push_back(new ECSubRead());
   o.back()->from = pg_shard_t(2, shard_id_t(255));
   o.back()->tid = 300;
-  o.back()->to_read[hoid1].push_back(make_pair(300, 200));
-  o.back()->to_read[hoid2].push_back(make_pair(400, 600));
-  o.back()->to_read[hoid2].push_back(make_pair(2000, 600));
+  o.back()->to_read[hoid1].push_back(boost::make_tuple(300, 200, 0));
+  o.back()->to_read[hoid2].push_back(boost::make_tuple(400, 600, 0));
+  o.back()->to_read[hoid2].push_back(boost::make_tuple(2000, 600, 0));
   o.back()->attrs_to_read.insert(hoid2);
 }
 
index 1cdfa57e153c50424ffbc094520b77c65dd42732..78193830959b178ae237b705887ae37aff1282c1 100644 (file)
@@ -18,6 +18,7 @@
 #include "osd_types.h"
 #include "include/buffer.h"
 #include "os/ObjectStore.h"
+#include "boost/tuple/tuple.hpp"
 
 struct ECSubWrite {
   pg_shard_t from;
@@ -80,14 +81,14 @@ WRITE_CLASS_ENCODER(ECSubWriteReply)
 struct ECSubRead {
   pg_shard_t from;
   ceph_tid_t tid;
-  map<hobject_t, list<pair<uint64_t, uint64_t> > > to_read;
+  map<hobject_t, list<boost::tuple<uint64_t, uint64_t, uint32_t> > > to_read;
   set<hobject_t> attrs_to_read;
-  void encode(bufferlist &bl) const;
+  void encode(bufferlist &bl, uint64_t features) const;
   void decode(bufferlist::iterator &bl);
   void dump(Formatter *f) const;
   static void generate_test_instances(list<ECSubRead*>& o);
 };
-WRITE_CLASS_ENCODER(ECSubRead)
+WRITE_CLASS_ENCODER_FEATURES(ECSubRead)
 
 struct ECSubReadReply {
   pg_shard_t from;
index 82d520e8c32b6c7cc44b871f7fd8d38e4d0a6a6f..dea075aeccbd792a505813fce36c130af6c6edec 100644 (file)
 
    virtual void objects_read_async(
      const hobject_t &hoid,
-     const list<pair<pair<uint64_t, uint64_t>,
+     const list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>,
                pair<bufferlist*, Context*> > > &to_read,
      Context *on_complete) = 0;
 
index 893ed180573bb0a28ed2d1d6e73fefa2e7cd6049..97e69dd0c646f2f6634ab3eb192d59c9a9553187 100644 (file)
@@ -245,18 +245,19 @@ struct AsyncReadCallback : public GenContext<ThreadPool::TPHandle&> {
 };
 void ReplicatedBackend::objects_read_async(
   const hobject_t &hoid,
-  const list<pair<pair<uint64_t, uint64_t>,
+  const list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>,
                  pair<bufferlist*, Context*> > > &to_read,
   Context *on_complete)
 {
   int r = 0;
-  for (list<pair<pair<uint64_t, uint64_t>,
+  for (list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>,
                 pair<bufferlist*, Context*> > >::const_iterator i =
           to_read.begin();
        i != to_read.end() && r >= 0;
        ++i) {
-    int _r = store->read(coll, hoid, i->first.first,
-                        i->first.second, *(i->second.first));
+    int _r = store->read(coll, hoid, i->first.get<0>(),
+                        i->first.get<1>(), *(i->second.first),
+                        i->first.get<2>());
     if (i->second.second) {
       get_parent()->schedule_recovery_work(
        get_parent()->bless_gencontext(
index 7de3922d2860e6f397fbd151b2a9bcc8f8d23879..67a4a1f7ffbf735df4a96e450ad0cbde0117d57f 100644 (file)
@@ -158,7 +158,7 @@ public:
 
   void objects_read_async(
     const hobject_t &hoid,
-    const list<pair<pair<uint64_t, uint64_t>,
+    const list<pair<boost::tuple<uint64_t, uint64_t, uint32_t>,
               pair<bufferlist*, Context*> > > &to_read,
     Context *on_complete);
 
index 502cf0cc32b0afb071a933584a72d27eea689f37..8b587c13603bb055c165c26a12f3a186f26f60cc 100644 (file)
@@ -3327,7 +3327,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
        } else if (pool.info.require_rollback()) {
          ctx->pending_async_reads.push_back(
            make_pair(
-             make_pair(op.extent.offset, op.extent.length),
+             boost::make_tuple(op.extent.offset, op.extent.length, op.flags),
              make_pair(&osd_op.outdata, new FillInExtent(&op.extent.length))));
          dout(10) << " async_read noted for " << soid << dendl;
        } else {
@@ -5832,7 +5832,7 @@ int ReplicatedPG::fill_in_copy_get(
        async_read_started = true;
        ctx->pending_async_reads.push_back(
          make_pair(
-           make_pair(cursor.data_offset, left),
+           boost::make_tuple(cursor.data_offset, left, 0),
            make_pair(&bl, cb)));
        result = MIN(oi.size - cursor.data_offset, (uint64_t)left);
        cb->len = result;
index 3d57058cf8844096f2353bcfc83d9a01ab8d15f5..d3539fde6600ca063285486ef1b04cfa4cba14ac 100644 (file)
@@ -512,8 +512,8 @@ public:
       pending_attrs.clear();
     }
 
-    // pending async reads <off, len> -> <outbl, outr>
-    list<pair<pair<uint64_t, uint64_t>,
+    // pending async reads <off, len, op_flags> -> <outbl, outr>
+    list<pair<boost::tuple<uint64_t, uint64_t, unsigned>,
              pair<bufferlist*, Context*> > > pending_async_reads;
     int async_read_result;
     unsigned inflightreads;
@@ -571,7 +571,7 @@ public:
       assert(lock_to_release == NONE);
       if (reply)
        reply->put();
-      for (list<pair<pair<uint64_t, uint64_t>,
+      for (list<pair<boost::tuple<uint64_t, uint64_t, unsigned>,
                     pair<bufferlist*, Context*> > >::iterator i =
             pending_async_reads.begin();
           i != pending_async_reads.end();
index 56f39b08b914d6f40242910f2ab70bd1df1ef671..59a576e45214f496ddd35cb676f66a491b69485b 100644 (file)
@@ -89,7 +89,7 @@ TYPE(ECUtil::HashInfo)
 #include "osd/ECMsgTypes.h"
 TYPE(ECSubWrite)
 TYPE(ECSubWriteReply)
-TYPE(ECSubRead)
+TYPE_FEATUREFUL(ECSubRead)
 TYPE(ECSubReadReply)
 
 #include "osd/HitSet.h"