From 3752864840cac584875f6e63314048b28330bc02 Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Mon, 8 Dec 2014 10:00:33 +0800 Subject: [PATCH] ObjectStore/Transaction: Add fadvise_flags to track write fadvise flags. Signed-off-by: Jianpeng Ma --- src/os/FileStore.cc | 9 ++-- src/os/FileStore.h | 4 +- src/os/KeyValueStore.cc | 10 ++--- src/os/KeyValueStore.h | 4 +- src/os/MemStore.cc | 6 +-- src/os/MemStore.h | 4 +- src/os/ObjectStore.h | 42 ++++++++++++------- src/osd/ECTransaction.cc | 3 +- src/osd/ReplicatedBackend.cc | 2 +- src/osd/ReplicatedPG.cc | 4 +- .../ObjectStoreTransactionBenchmark.cc | 2 +- 11 files changed, 53 insertions(+), 37 deletions(-) diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index f68e4b942f188..7812b924b2487 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -2280,12 +2280,12 @@ unsigned FileStore::_do_transaction( ghobject_t oid = i.decode_oid(); uint64_t off = i.decode_length(); uint64_t len = i.decode_length(); - bool replica = i.get_replica(); + uint32_t fadvise_flags = i.get_fadvise_flags(); bufferlist bl; i.decode_bl(bl); tracepoint(objectstore, write_enter, osr_name, off, len); if (_check_replay_guard(cid, oid, spos) > 0) - r = _write(cid, oid, off, len, bl, replica); + r = _write(cid, oid, off, len, bl, fadvise_flags); tracepoint(objectstore, write_exit, r); } break; @@ -2971,7 +2971,7 @@ int FileStore::_touch(coll_t cid, const ghobject_t& oid) int FileStore::_write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, - const bufferlist& bl, bool replica) + const bufferlist& bl, uint32_t fadvise_flags) { dout(15) << "write " << cid << "/" << oid << " " << offset << "~" << len << dendl; int r; @@ -3015,7 +3015,8 @@ int FileStore::_write(coll_t cid, const ghobject_t& oid, // flush? if (!replaying && g_conf->filestore_wbthrottle_enable) - wbthrottle.queue_wb(fd, oid, offset, len, replica); + wbthrottle.queue_wb(fd, oid, offset, len, + fadvise_flags & CEPH_OSD_OP_FLAG_FADVISE_DONTNEED); lfn_close(fd); out: diff --git a/src/os/FileStore.h b/src/os/FileStore.h index a7fa4d86d1eab..3e5bf2d0e9a2d 100644 --- a/src/os/FileStore.h +++ b/src/os/FileStore.h @@ -530,8 +530,8 @@ public: int fiemap(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, bufferlist& bl); int _touch(coll_t cid, const ghobject_t& oid); - int _write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, const bufferlist& bl, - bool replica = false); + int _write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, + const bufferlist& bl, uint32_t fadvise_flags = 0); int _zero(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len); int _truncate(coll_t cid, const ghobject_t& oid, uint64_t size); int _clone(coll_t cid, const ghobject_t& oldoid, const ghobject_t& newoid, diff --git a/src/os/KeyValueStore.cc b/src/os/KeyValueStore.cc index 19d3b1b625873..041f9f7d66762 100644 --- a/src/os/KeyValueStore.cc +++ b/src/os/KeyValueStore.cc @@ -1224,10 +1224,10 @@ unsigned KeyValueStore::_do_transaction(Transaction& transaction, ghobject_t oid = i.decode_oid(); uint64_t off = i.decode_length(); uint64_t len = i.decode_length(); - bool replica = i.get_replica(); + uint32_t fadvise_flags = i.get_fadvise_flags(); bufferlist bl; i.decode_bl(bl); - r = _write(cid, oid, off, len, bl, t, replica); + r = _write(cid, oid, off, len, bl, t, fadvise_flags); } break; @@ -1876,7 +1876,7 @@ int KeyValueStore::_touch(coll_t cid, const ghobject_t& oid, int KeyValueStore::_generic_write(StripObjectMap::StripObjectHeaderRef header, uint64_t offset, size_t len, const bufferlist& bl, BufferTransaction &t, - bool replica) + uint32_t fadvise_flags) { if (len > bl.length()) len = bl.length(); @@ -1960,7 +1960,7 @@ int KeyValueStore::_generic_write(StripObjectMap::StripObjectHeaderRef header, int KeyValueStore::_write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, const bufferlist& bl, - BufferTransaction &t, bool replica) + BufferTransaction &t, uint32_t fadvise_flags) { dout(15) << __func__ << " " << cid << "/" << oid << " " << offset << "~" << len << dendl; @@ -1975,7 +1975,7 @@ int KeyValueStore::_write(coll_t cid, const ghobject_t& oid, return r; } - return _generic_write(header, offset, len, bl, t, replica); + return _generic_write(header, offset, len, bl, t, fadvise_flags); } int KeyValueStore::_zero(coll_t cid, const ghobject_t& oid, uint64_t offset, diff --git a/src/os/KeyValueStore.h b/src/os/KeyValueStore.h index b73bf928335bc..3bd99c4d67e34 100644 --- a/src/os/KeyValueStore.h +++ b/src/os/KeyValueStore.h @@ -542,7 +542,7 @@ class KeyValueStore : public ObjectStore, bool allow_eio = false, BufferTransaction *bt = 0); int _generic_write(StripObjectMap::StripObjectHeaderRef header, uint64_t offset, size_t len, const bufferlist& bl, - BufferTransaction &t, bool replica = false); + BufferTransaction &t, uint32_t fadvise_flags = 0); bool exists(coll_t cid, const ghobject_t& oid); int stat(coll_t cid, const ghobject_t& oid, struct stat *st, @@ -554,7 +554,7 @@ class KeyValueStore : public ObjectStore, int _touch(coll_t cid, const ghobject_t& oid, BufferTransaction &t); int _write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, - const bufferlist& bl, BufferTransaction &t, bool replica = false); + const bufferlist& bl, BufferTransaction &t, uint32_t fadvise_flags = 0); int _zero(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, BufferTransaction &t); int _truncate(coll_t cid, const ghobject_t& oid, uint64_t size, diff --git a/src/os/MemStore.cc b/src/os/MemStore.cc index 19316d90d3b53..8eb0bcb5fcc91 100644 --- a/src/os/MemStore.cc +++ b/src/os/MemStore.cc @@ -709,10 +709,10 @@ void MemStore::_do_transaction(Transaction& t) ghobject_t oid = i.decode_oid(); uint64_t off = i.decode_length(); uint64_t len = i.decode_length(); - bool replica = i.get_replica(); + uint32_t fadvise_flags = i.get_fadvise_flags(); bufferlist bl; i.decode_bl(bl); - r = _write(cid, oid, off, len, bl, replica); + r = _write(cid, oid, off, len, bl, fadvise_flags); } break; @@ -1054,7 +1054,7 @@ int MemStore::_touch(coll_t cid, const ghobject_t& oid) int MemStore::_write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, const bufferlist& bl, - bool replica) + uint32_t fadvise_flags) { dout(10) << __func__ << " " << cid << " " << oid << " " << offset << "~" << len << dendl; diff --git a/src/os/MemStore.h b/src/os/MemStore.h index 7eb994d9875da..83d776297b390 100644 --- a/src/os/MemStore.h +++ b/src/os/MemStore.h @@ -187,8 +187,8 @@ private: void _write_into_bl(const bufferlist& src, unsigned offset, bufferlist *dst); int _touch(coll_t cid, const ghobject_t& oid); - int _write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, const bufferlist& bl, - bool replica = false); + int _write(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len, + const bufferlist& bl, uint32_t fadvsie_flags = 0); int _zero(coll_t cid, const ghobject_t& oid, uint64_t offset, size_t len); int _truncate(coll_t cid, const ghobject_t& oid, uint64_t size); int _remove(coll_t cid, const ghobject_t& oid); diff --git a/src/os/ObjectStore.h b/src/os/ObjectStore.h index 5e8800f9484c8..efbdc897a592f 100644 --- a/src/os/ObjectStore.h +++ b/src/os/ObjectStore.h @@ -391,7 +391,7 @@ public: bool sobject_encoding; int64_t pool_override; bool use_pool_override; - bool replica; + uint32_t fadvise_flags; //record write flags void *osr; // NULL on replay list on_applied; @@ -454,16 +454,23 @@ public: void set_pool_override(int64_t pool) { pool_override = pool; } - void set_replica() { - replica = true; + + void set_fadvise_flags(uint32_t flags) { + fadvise_flags = flags; + } + + void set_fadvise_flag(uint32_t flag) { + fadvise_flags |= flag; } - bool get_replica() { return replica; } + + uint32_t get_fadvise_flags() { return fadvise_flags; } void swap(Transaction& other) { std::swap(ops, other.ops); std::swap(largest_data_len, other.largest_data_len); std::swap(largest_data_off, other.largest_data_off); std::swap(largest_data_off_in_tbl, other.largest_data_off_in_tbl); + std::swap(fadvise_flags, other.fadvise_flags); std::swap(on_applied, other.on_applied); std::swap(on_commit, other.on_commit); std::swap(on_applied_sync, other.on_applied_sync); @@ -490,7 +497,7 @@ public: /// How big is the encoded Transaction buffer? uint64_t get_encoded_bytes() { - return 1 + 8 + 8 + 4 + 4 + 4 + 4 + tbl.length(); + return 1 + 8 + 8 + 4 + 4 + 4 + 4 + 4 + tbl.length(); } uint64_t get_num_bytes() { @@ -512,6 +519,7 @@ public: sizeof(largest_data_len) + sizeof(largest_data_off) + sizeof(largest_data_off_in_tbl) + + sizeof(fadvise_flags) + sizeof(__u32); // tbl length } return 0; // none @@ -553,14 +561,14 @@ public: bool sobject_encoding; int64_t pool_override; bool use_pool_override; - bool replica; + uint32_t fadvise_flags; iterator(Transaction *t) : p(t->tbl.begin()), sobject_encoding(t->sobject_encoding), pool_override(t->pool_override), use_pool_override(t->use_pool_override), - replica(t->replica) {} + fadvise_flags(t->fadvise_flags) {} friend class Transaction; @@ -635,7 +643,8 @@ public: ::decode(bits, p); return bits; } - bool get_replica() { return replica; } + + uint32_t get_fadvise_flags() { return fadvise_flags; } }; iterator begin() { @@ -687,7 +696,7 @@ public: * "hole" in the file. */ void write(coll_t cid, const ghobject_t& oid, uint64_t off, uint64_t len, - const bufferlist& data) { + const bufferlist& data, uint32_t flags = 0) { __u32 op = OP_WRITE; ::encode(op, tbl); ::encode(cid, tbl); @@ -695,6 +704,7 @@ public: ::encode(off, tbl); ::encode(len, tbl); assert(len == data.length()); + fadvise_flags |= flags; if (data.length() > largest_data_len) { largest_data_len = data.length(); largest_data_off = off; @@ -1034,13 +1044,13 @@ public: Transaction() : ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0), sobject_encoding(false), pool_override(-1), use_pool_override(false), - replica(false), + fadvise_flags(0), osr(NULL) {} Transaction(bufferlist::iterator &dp) : ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0), sobject_encoding(false), pool_override(-1), use_pool_override(false), - replica(false), + fadvise_flags(0), osr(NULL) { decode(dp); } @@ -1048,14 +1058,14 @@ public: Transaction(bufferlist &nbl) : ops(0), pad_unused_bytes(0), largest_data_len(0), largest_data_off(0), largest_data_off_in_tbl(0), sobject_encoding(false), pool_override(-1), use_pool_override(false), - replica(false), + fadvise_flags(0), osr(NULL) { bufferlist::iterator dp = nbl.begin(); decode(dp); } void encode(bufferlist& bl) const { - ENCODE_START(7, 5, bl); + ENCODE_START(8, 5, bl); ::encode(ops, bl); ::encode(pad_unused_bytes, bl); ::encode(largest_data_len, bl); @@ -1066,10 +1076,11 @@ public: bool tolerate_collection_add_enoent = 0; ::encode(tolerate_collection_add_enoent, bl); } + ::encode(fadvise_flags, bl); ENCODE_FINISH(bl); } void decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl); + DECODE_START_LEGACY_COMPAT_LEN(8, 5, 5, bl); DECODE_OLDEST(2); if (struct_v < 4) sobject_encoding = true; @@ -1090,6 +1101,9 @@ public: bool tolerate_collection_add_enoent; ::decode(tolerate_collection_add_enoent, bl); } + if (struct_v >= 8) { + ::decode(fadvise_flags, bl); + } DECODE_FINISH(bl); } diff --git a/src/osd/ECTransaction.cc b/src/osd/ECTransaction.cc index f471f53736edc..bad02653ddd47 100644 --- a/src/osd/ECTransaction.cc +++ b/src/osd/ECTransaction.cc @@ -155,7 +155,8 @@ struct TransGenerator : public boost::static_visitor { sinfo.logical_to_prev_chunk_offset( offset), enc_bl.length(), - enc_bl); + enc_bl, + op.fadvise_flags); i->second.setattr( get_coll_ct(i->first, op.oid), ghobject_t(op.oid, ghobject_t::NO_GEN, i->first), diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index fcf51ed76678a..d48f831768f06 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -325,7 +325,7 @@ public: uint32_t fadvise_flags ) { written += len; - t->write(get_coll_ct(hoid), hoid, off, len, bl); + t->write(get_coll_ct(hoid), hoid, off, len, bl, fadvise_flags); } void remove( const hobject_t &hoid diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 856fb99cb216d..974227cc0cd6b 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -8007,8 +8007,8 @@ void ReplicatedBackend::sub_op_modify(OpRequestRef op) } rm->opt.set_pool_override(get_info().pgid.pool()); } - rm->opt.set_replica(); - + + rm->opt.set_fadvise_flag(CEPH_OSD_OP_FLAG_FADVISE_DONTNEED); bool update_snaps = false; if (!rm->opt.empty()) { // If the opt is non-empty, we infer we are before diff --git a/src/test/objectstore/ObjectStoreTransactionBenchmark.cc b/src/test/objectstore/ObjectStoreTransactionBenchmark.cc index 0d7c73f337fe4..67074a4cf25c3 100644 --- a/src/test/objectstore/ObjectStoreTransactionBenchmark.cc +++ b/src/test/objectstore/ObjectStoreTransactionBenchmark.cc @@ -96,7 +96,7 @@ class Transaction { ghobject_t oid = i.decode_oid(); i.decode_length(); i.decode_length(); - i.get_replica(); + i.get_fadvise_flags(); bufferlist bl; i.decode_bl(bl); } -- 2.39.5