From: Matt Benjamin Date: Tue, 9 Dec 2014 21:44:25 +0000 (-0500) Subject: Add safe-sharing to buffer::list and buffer::ptr. X-Git-Tag: v0.91~39^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5f551cc0ba46dfcd0980a9dfd65a85f5e3351157;p=ceph.git Add safe-sharing to buffer::list and buffer::ptr. Formerly known as "strong claim," this change introduces a new is_shareable() property to ceph::buffer::raw objects, which can be used to prevent unplanned sharing of buffers. The default value of is_shareable() is true, but derived classes may override this. For example, Accelio message buffers may point to registered or pool-allocated memory which should not be held for long periods. Cloning of non-shareable buffers is now the unmarked case, so is performed automatically in the buffer::list copy constructor. Low-level operations should allow explicit volatile sharing, so copying a buffer::list:iterator or a buffer::ptr works as normal. For explicit sharing of "non-shareable" buffers, a new boolean is added to the public claim_* methods, and a buffer::list::share method is also added, to share an entire sequence. Signed-off-by: Vu Pham Signed-off-by: Matt Benjamin --- diff --git a/src/common/buffer.cc b/src/common/buffer.cc index 4ab786a3f5d1..b3d9683d8d42 100644 --- a/src/common/buffer.cc +++ b/src/common/buffer.cc @@ -162,6 +162,12 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER; bool is_n_page_sized() { return (len & ~CEPH_PAGE_MASK) == 0; } + virtual bool is_shareable() { + // true if safe to reference/share the existing buffer copy + // false if it is not safe to share the buffer, e.g., due to special + // and/or registered memory that is scarce + return true; + } bool get_crc(const pair &fromto, pair *crc) const { Mutex::Locker l(crc_lock); @@ -603,6 +609,18 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER; return _raw->clone(); } + buffer::ptr& buffer::ptr::make_shareable() { + if (_raw && !_raw->is_shareable()) { + buffer::raw *tr = _raw; + _raw = tr->clone(); + _raw->nref.set(1); + if (unlikely(tr->nref.dec() == 0)) { + delete tr; + } + } + return *this; + } + void buffer::ptr::swap(ptr& other) { raw *r = _raw; @@ -1171,27 +1189,31 @@ void buffer::list::rebuild_page_aligned() } // sort-of-like-assignment-op - void buffer::list::claim(list& bl) + void buffer::list::claim(list& bl, unsigned int flags) { // free my buffers clear(); - claim_append(bl); + claim_append(bl, flags); } - void buffer::list::claim_append(list& bl) + void buffer::list::claim_append(list& bl, unsigned int flags) { // steal the other guy's buffers _len += bl._len; - _buffers.splice( _buffers.end(), bl._buffers ); + if (!(flags & CLAIM_ALLOW_NONSHAREABLE)) + bl.make_shareable(); + _buffers.splice(_buffers.end(), bl._buffers ); bl._len = 0; bl.last_p = bl.begin(); } - void buffer::list::claim_prepend(list& bl) + void buffer::list::claim_prepend(list& bl, unsigned int flags) { // steal the other guy's buffers _len += bl._len; - _buffers.splice( _buffers.begin(), bl._buffers ); + if (!(flags & CLAIM_ALLOW_NONSHAREABLE)) + bl.make_shareable(); + _buffers.splice(_buffers.begin(), bl._buffers ); bl._len = 0; bl.last_p = bl.begin(); } diff --git a/src/include/buffer.h b/src/include/buffer.h index aac2f80a650d..6eaa7b3c3936 100644 --- a/src/include/buffer.h +++ b/src/include/buffer.h @@ -179,6 +179,7 @@ public: raw *clone(); void swap(ptr& other); + ptr& make_shareable(); // misc bool at_buffer_head() const { return _off == 0; } @@ -329,12 +330,15 @@ public: append_buffer.set_length(0); // unused, so far. } ~list() {} - - list(const list& other) : _buffers(other._buffers), _len(other._len), _memcopy_count(other._memcopy_count),last_p(this) { } + list(const list& other) : _buffers(other._buffers), _len(other._len), + _memcopy_count(other._memcopy_count), last_p(this) { + make_shareable(); + } list& operator= (const list& other) { if (this != &other) { _buffers = other._buffers; _len = other._len; + make_shareable(); } return *this; } @@ -404,10 +408,33 @@ public: unsigned align_memory); void rebuild_page_aligned(); - // sort-of-like-assignment-op - void claim(list& bl); - void claim_append(list& bl); - void claim_prepend(list& bl); + // assignment-op with move semantics + const static unsigned int CLAIM_DEFAULT = 0; + const static unsigned int CLAIM_ALLOW_NONSHAREABLE = 1; + + void claim(list& bl, unsigned int flags = CLAIM_DEFAULT); + void claim_append(list& bl, unsigned int flags = CLAIM_DEFAULT); + void claim_prepend(list& bl, unsigned int flags = CLAIM_DEFAULT); + + // clone non-shareable buffers (make shareable) + void make_shareable() { + std::list::iterator pb; + for (pb = _buffers.begin(); pb != _buffers.end(); ++pb) { + (void) pb->make_shareable(); + } + } + + // copy with explicit volatile-sharing semantics + void share(const list& bl) + { + if (this != &bl) { + clear(); + std::list::const_iterator pb; + for (pb = bl._buffers.begin(); pb != bl._buffers.end(); ++pb) { + push_back(*pb); + } + } + } iterator begin() { return iterator(this, 0); diff --git a/src/msg/Message.h b/src/msg/Message.h index cb47058d0c0f..87671dc10fbd 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -337,10 +337,11 @@ public: } bufferlist& get_data() { return data; } - void claim_data(bufferlist& bl) { + void claim_data(bufferlist& bl, + unsigned int flags = buffer::list::CLAIM_DEFAULT) { if (byte_throttler) byte_throttler->put(data.length()); - bl.claim(data); + bl.claim(data, flags); } off_t get_data_len() { return data.length(); }