]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
ObjectStore/Transaction: Add fadvise_flags to track write fadvise flags.
authorJianpeng Ma <jianpeng.ma@intel.com>
Mon, 8 Dec 2014 02:00:33 +0000 (10:00 +0800)
committerJianpeng Ma <jianpeng.ma@intel.com>
Fri, 12 Dec 2014 06:23:35 +0000 (14:23 +0800)
Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com>
src/os/FileStore.cc
src/os/FileStore.h
src/os/KeyValueStore.cc
src/os/KeyValueStore.h
src/os/MemStore.cc
src/os/MemStore.h
src/os/ObjectStore.h
src/osd/ECTransaction.cc
src/osd/ReplicatedBackend.cc
src/osd/ReplicatedPG.cc
src/test/objectstore/ObjectStoreTransactionBenchmark.cc

index f68e4b942f1888877b5aed10a97996c96be0efb3..7812b924b248750cabfdd735502c9c73a52fb075 100644 (file)
@@ -2280,12 +2280,12 @@ unsigned FileStore::_do_transaction(
        ghobject_t oid = i.decode_oid();
        uint64_t off = i.decode_length();
        uint64_t len = i.decode_length();
-       bool replica = i.get_replica();
+       uint32_t fadvise_flags = i.get_fadvise_flags();
        bufferlist bl;
        i.decode_bl(bl);
         tracepoint(objectstore, write_enter, osr_name, off, len);
        if (_check_replay_guard(cid, oid, spos) > 0)
-         r = _write(cid, oid, off, len, bl, replica);
+         r = _write(cid, oid, off, len, bl, fadvise_flags);
         tracepoint(objectstore, write_exit, r);
       }
       break;
@@ -2971,7 +2971,7 @@ int FileStore::_touch(coll_t cid, const ghobject_t& oid)
 
 int FileStore::_write(coll_t cid, const ghobject_t& oid,
                      uint64_t offset, size_t len,
-                     const bufferlist& bl, bool replica)
+                     const bufferlist& bl, uint32_t fadvise_flags)
 {
   dout(15) << "write " << cid << "/" << oid << " " << offset << "~" << len << dendl;
   int r;
@@ -3015,7 +3015,8 @@ int FileStore::_write(coll_t cid, const ghobject_t& oid,
   // flush?
   if (!replaying &&
       g_conf->filestore_wbthrottle_enable)
-    wbthrottle.queue_wb(fd, oid, offset, len, replica);
+    wbthrottle.queue_wb(fd, oid, offset, len,
+                         fadvise_flags & CEPH_OSD_OP_FLAG_FADVISE_DONTNEED);
   lfn_close(fd);
 
  out:
index a7fa4d86d1eabeff8185c611df486e80a83d79e5..3e5bf2d0e9a2d18259d7ef41cc1cc6cea5e17be9 100644 (file)
@@ -530,8 +530,8 @@ public:
   int fiemap(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl);
 
   int _touch(coll_t cid, const ghobject_t& oid);
-  int _write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, const bufferlist& bl,
-      bool replica = false);
+  int _write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len,
+             const bufferlist& bl, uint32_t fadvise_flags = 0);
   int _zero(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len);
   int _truncate(coll_t cid, const ghobject_t& oid, uint64_t size);
   int _clone(coll_t cid, const ghobject_t& oldoid, const ghobject_t& newoid,
index 19d3b1b62587394a58f742d5755407c65282cfbc..041f9f7d66762bb6798776b655d950f081791aa2 100644 (file)
@@ -1224,10 +1224,10 @@ unsigned KeyValueStore::_do_transaction(Transaction& transaction,
         ghobject_t oid = i.decode_oid();
         uint64_t off = i.decode_length();
         uint64_t len = i.decode_length();
-        bool replica = i.get_replica();
+        uint32_t fadvise_flags = i.get_fadvise_flags();
         bufferlist bl;
         i.decode_bl(bl);
-        r = _write(cid, oid, off, len, bl, t, replica);
+        r = _write(cid, oid, off, len, bl, t, fadvise_flags);
       }
       break;
 
@@ -1876,7 +1876,7 @@ int KeyValueStore::_touch(coll_t cid, const ghobject_t& oid,
 int KeyValueStore::_generic_write(StripObjectMap::StripObjectHeaderRef header,
                                   uint64_t offset, size_t len,
                                   const bufferlist& bl, BufferTransaction &t,
-                                  bool replica)
+                                  uint32_t fadvise_flags)
 {
   if (len > bl.length())
     len = bl.length();
@@ -1960,7 +1960,7 @@ int KeyValueStore::_generic_write(StripObjectMap::StripObjectHeaderRef header,
 
 int KeyValueStore::_write(coll_t cid, const ghobject_t& oid,
                           uint64_t offset, size_t len, const bufferlist& bl,
-                          BufferTransaction &t, bool replica)
+                          BufferTransaction &t, uint32_t fadvise_flags)
 {
   dout(15) << __func__ << " " << cid << "/" << oid << " " << offset << "~"
            << len << dendl;
@@ -1975,7 +1975,7 @@ int KeyValueStore::_write(coll_t cid, const ghobject_t& oid,
     return r;
   }
 
-  return _generic_write(header, offset, len, bl, t, replica);
+  return _generic_write(header, offset, len, bl, t, fadvise_flags);
 }
 
 int KeyValueStore::_zero(coll_t cid, const ghobject_t& oid, uint64_t offset,
index b73bf928335bc2339da3edca3f08bbd29b36256d..3bd99c4d67e34759461f98f12a7d338a1ab420ce 100644 (file)
@@ -542,7 +542,7 @@ class KeyValueStore : public ObjectStore,
                     bool allow_eio = false, BufferTransaction *bt = 0);
   int _generic_write(StripObjectMap::StripObjectHeaderRef header,
                      uint64_t offset, size_t len, const bufferlist& bl,
-                     BufferTransaction &t, bool replica = false);
+                     BufferTransaction &t, uint32_t fadvise_flags = 0);
 
   bool exists(coll_t cid, const ghobject_t& oid);
   int stat(coll_t cid, const ghobject_t& oid, struct stat *st,
@@ -554,7 +554,7 @@ class KeyValueStore : public ObjectStore,
 
   int _touch(coll_t cid, const ghobject_t& oid, BufferTransaction &t);
   int _write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len,
-             const bufferlist& bl, BufferTransaction &t, bool replica = false);
+             const bufferlist& bl, BufferTransaction &t, uint32_t fadvise_flags = 0);
   int _zero(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len,
             BufferTransaction &t);
   int _truncate(coll_t cid, const ghobject_t& oid, uint64_t size,
index 19316d90d3b53ef88ad78823236a68fefd227e7a..8eb0bcb5fcc912828d2b71ed54bba0fce56d7284 100644 (file)
@@ -709,10 +709,10 @@ void MemStore::_do_transaction(Transaction& t)
        ghobject_t oid = i.decode_oid();
        uint64_t off = i.decode_length();
        uint64_t len = i.decode_length();
-       bool replica = i.get_replica();
+       uint32_t fadvise_flags = i.get_fadvise_flags();
        bufferlist bl;
        i.decode_bl(bl);
-       r = _write(cid, oid, off, len, bl, replica);
+       r = _write(cid, oid, off, len, bl, fadvise_flags);
       }
       break;
       
@@ -1054,7 +1054,7 @@ int MemStore::_touch(coll_t cid, const ghobject_t& oid)
 
 int MemStore::_write(coll_t cid, const ghobject_t& oid,
                     uint64_t offset, size_t len, const bufferlist& bl,
-                    bool replica)
+                    uint32_t fadvise_flags)
 {
   dout(10) << __func__ << " " << cid << " " << oid << " "
           << offset << "~" << len << dendl;
index 7eb994d9875dabad3c97b5abffc8dc14e94edea4..83d776297b3904b43e7d0f4dbf08fecdebd28369 100644 (file)
@@ -187,8 +187,8 @@ private:
   void _write_into_bl(const bufferlist& src, unsigned offset, bufferlist *dst);
 
   int _touch(coll_t cid, const ghobject_t& oid);
-  int _write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, const bufferlist& bl,
-      bool replica = false);
+  int _write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len,
+             const bufferlist& bl, uint32_t fadvsie_flags = 0);
   int _zero(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len);
   int _truncate(coll_t cid, const ghobject_t& oid, uint64_t size);
   int _remove(coll_t cid, const ghobject_t& oid);
index 5e8800f9484c88a9eeed6e10d95cdb23593c7620..efbdc897a592f645005e57241d8c566e12b80005 100644 (file)
@@ -391,7 +391,7 @@ public:
     bool sobject_encoding;
     int64_t pool_override;
     bool use_pool_override;
-    bool replica;
+    uint32_t fadvise_flags; //record write flags
     void *osr; // NULL on replay
 
     list<Context *> on_applied;
@@ -454,16 +454,23 @@ public:
     void set_pool_override(int64_t pool) {
       pool_override = pool;
     }
-    void set_replica() {
-      replica = true;
+
+    void set_fadvise_flags(uint32_t flags) {
+      fadvise_flags = flags;
+    }
+
+    void set_fadvise_flag(uint32_t flag) {
+      fadvise_flags |= flag;
     }
-    bool get_replica() { return replica; }
+
+    uint32_t get_fadvise_flags() { return fadvise_flags; }
 
     void swap(Transaction& other) {
       std::swap(ops, other.ops);
       std::swap(largest_data_len, other.largest_data_len);
       std::swap(largest_data_off, other.largest_data_off);
       std::swap(largest_data_off_in_tbl, other.largest_data_off_in_tbl);
+      std::swap(fadvise_flags, other.fadvise_flags);
       std::swap(on_applied, other.on_applied);
       std::swap(on_commit, other.on_commit);
       std::swap(on_applied_sync, other.on_applied_sync);
@@ -490,7 +497,7 @@ public:
 
     /// How big is the encoded Transaction buffer?
     uint64_t get_encoded_bytes() {
-      return 1 + 8 + 8 + 4 + 4 + 4 + 4 + tbl.length();
+      return 1 + 8 + 8 + 4 + 4 + 4 + 4 + 4 + tbl.length();
     }
 
     uint64_t get_num_bytes() {
@@ -512,6 +519,7 @@ public:
          sizeof(largest_data_len) +
          sizeof(largest_data_off) +
          sizeof(largest_data_off_in_tbl) +
+         sizeof(fadvise_flags) +
          sizeof(__u32);  // tbl length
       }
       return 0;  // none
@@ -553,14 +561,14 @@ public:
       bool sobject_encoding;
       int64_t pool_override;
       bool use_pool_override;
-      bool replica;
+      uint32_t fadvise_flags;
 
       iterator(Transaction *t)
        : p(t->tbl.begin()),
          sobject_encoding(t->sobject_encoding),
          pool_override(t->pool_override),
          use_pool_override(t->use_pool_override),
-         replica(t->replica) {}
+         fadvise_flags(t->fadvise_flags) {}
 
       friend class Transaction;
 
@@ -635,7 +643,8 @@ public:
        ::decode(bits, p);
        return bits;
       }
-      bool get_replica() { return replica; }
+
+      uint32_t get_fadvise_flags() { return fadvise_flags; }
     };
 
     iterator begin() {
@@ -687,7 +696,7 @@ public:
      * "hole" in the file.
      */
     void write(coll_t cid, const ghobject_t& oid, uint64_t off, uint64_t len,
-              const bufferlist& data) {
+              const bufferlist& data, uint32_t flags = 0) {
       __u32 op = OP_WRITE;
       ::encode(op, tbl);
       ::encode(cid, tbl);
@@ -695,6 +704,7 @@ public:
       ::encode(off, tbl);
       ::encode(len, tbl);
       assert(len == data.length());
+      fadvise_flags |= flags;
       if (data.length() > largest_data_len) {
        largest_data_len = data.length();
        largest_data_off = off;
@@ -1034,13 +1044,13 @@ public:
     Transaction() :
       ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0),
       sobject_encoding(false), pool_override(-1), use_pool_override(false),
-      replica(false),
+      fadvise_flags(0),
       osr(NULL) {}
 
     Transaction(bufferlist::iterator &dp) :
       ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0),
       sobject_encoding(false), pool_override(-1), use_pool_override(false),
-      replica(false),
+      fadvise_flags(0),
       osr(NULL) {
       decode(dp);
     }
@@ -1048,14 +1058,14 @@ public:
     Transaction(bufferlist &nbl) :
       ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0),
       sobject_encoding(false), pool_override(-1), use_pool_override(false),
-      replica(false),
+      fadvise_flags(0),
       osr(NULL) {
       bufferlist::iterator dp = nbl.begin();
       decode(dp);
     }
 
     void encode(bufferlist& bl) const {
-      ENCODE_START(7, 5, bl);
+      ENCODE_START(8, 5, bl);
       ::encode(ops, bl);
       ::encode(pad_unused_bytes, bl);
       ::encode(largest_data_len, bl);
@@ -1066,10 +1076,11 @@ public:
        bool tolerate_collection_add_enoent = 0;
        ::encode(tolerate_collection_add_enoent, bl);
       }
+      ::encode(fadvise_flags, bl);
       ENCODE_FINISH(bl);
     }
     void decode(bufferlist::iterator &bl) {
-      DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl);
+      DECODE_START_LEGACY_COMPAT_LEN(8, 5, 5, bl);
       DECODE_OLDEST(2);
       if (struct_v < 4)
        sobject_encoding = true;
@@ -1090,6 +1101,9 @@ public:
        bool tolerate_collection_add_enoent;
        ::decode(tolerate_collection_add_enoent, bl);
       }
+      if (struct_v >= 8) {
+       ::decode(fadvise_flags, bl);
+      }
       DECODE_FINISH(bl);
     }
 
index f471f53736edc9c69337063cb0e63c6c042c20bc..bad02653ddd4751286da49dfc474252894b361cd 100644 (file)
@@ -155,7 +155,8 @@ struct TransGenerator : public boost::static_visitor<void> {
        sinfo.logical_to_prev_chunk_offset(
          offset),
        enc_bl.length(),
-       enc_bl);
+       enc_bl,
+       op.fadvise_flags);
       i->second.setattr(
        get_coll_ct(i->first, op.oid),
        ghobject_t(op.oid, ghobject_t::NO_GEN, i->first),
index fcf51ed76678ad7645217e2c4650628f45bb3c08..d48f831768f068477d1354e56f36a7f22f0ed7d2 100644 (file)
@@ -325,7 +325,7 @@ public:
     uint32_t fadvise_flags
     ) {
     written += len;
-    t->write(get_coll_ct(hoid), hoid, off, len, bl);
+    t->write(get_coll_ct(hoid), hoid, off, len, bl, fadvise_flags);
   }
   void remove(
     const hobject_t &hoid
index 856fb99cb216dd326c34145999fc847aa98541ef..974227cc0cd6b9a7f7e25aa7ec4160d9076d242c 100644 (file)
@@ -8007,8 +8007,8 @@ void ReplicatedBackend::sub_op_modify(OpRequestRef op)
     }
     rm->opt.set_pool_override(get_info().pgid.pool());
   }
-  rm->opt.set_replica();
-
+  
+  rm->opt.set_fadvise_flag(CEPH_OSD_OP_FLAG_FADVISE_DONTNEED);
   bool update_snaps = false;
   if (!rm->opt.empty()) {
     // If the opt is non-empty, we infer we are before
index 0d7c73f337fe4f73c68fe682f1d293bcb71c13c3..67074a4cf25c3d63b211a095b09f4c3df89c7b3d 100644 (file)
@@ -96,7 +96,7 @@ class Transaction {
           ghobject_t oid = i.decode_oid();
           i.decode_length();
           i.decode_length();
-          i.get_replica();
+          i.get_fadvise_flags();
           bufferlist bl;
           i.decode_bl(bl);
         }