From: Igor Fedotov Date: Mon, 16 May 2016 16:53:37 +0000 (+0300) Subject: Adds cached buffer processing for _do_read X-Git-Tag: v11.0.0~359^2~71 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1a0e9754db3c2489d022ff3a07bbad452113078e;p=ceph.git Adds cached buffer processing for _do_read Signed-off-by: Igor Fedotov --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index e20efd4a9827..41b804d69063 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -2801,6 +2801,10 @@ int BlueStore::_do_read( --lext; } + ready_regions_t ready_regions_in_cache, ready_regions; + interval_set ready_intervals_in_cache; + o->bc.read(off, length, ready_regions_in_cache, ready_intervals_in_cache); + //build blob list to read blobs2read_t blobs2read; while (l > 0 && lext != lextents.end()) { @@ -2811,8 +2815,10 @@ int BlueStore::_do_read( if (off >= lext->first && off < lext->first + lext->second.length) { uint32_t r_off = off - lext->first; l2read = MIN(l, lext->second.length - r_off); - regions2read_t& regions = blobs2read[bptr]; - regions.push_back(region_t(off, r_off + lext->second.offset, 0, l2read)); + if (!ready_intervals_in_cache.contains( off, l2read)) { + regions2read_t& regions = blobs2read[bptr]; + regions.push_back(region_t(off, r_off + lext->second.offset, 0, l2read)); + } ++lext; } else if (off >= lext->first + lext->second.length) { //handling the case when the first lookup get into the previous block due to the hole @@ -2826,8 +2832,6 @@ int BlueStore::_do_read( l -= l2read; } - ready_regions_t ready_regions; - //enumerate and read/decompress desired blobs blobs2read_t::iterator b2r_it = blobs2read.begin(); while (b2r_it != blobs2read.end()) { @@ -2861,7 +2865,7 @@ int BlueStore::_do_read( } } else { extents2read_t e2r; - int r = _blob2read_to_extents2read(bptr, r2r_it, r2r.cend(), &e2r); + int r = _blob2read_to_extents2read(bptr, r2r_it, r2r.cend(), ready_intervals_in_cache, &e2r); if (r < 0) return r; @@ -2877,20 +2881,59 @@ int BlueStore::_do_read( } //generate a resulting buffer - ready_regions_t::iterator rr_it = ready_regions.begin(); + auto rr_it = ready_regions.begin(); + auto rr_end = ready_regions.end(); + auto rr0_it = ready_regions_in_cache.begin(); + auto rr0_end = ready_regions_in_cache.end(); + off = offset; + while (rr_it != rr_end || rr0_it != rr0_end) { + ready_regions_t::iterator it; + if (rr_it != rr_end && (rr0_it == rr0_end || rr_it->first < rr0_it->first)) { + + uint64_t r_off = 0; + uint64_t r_len = rr_it->second.length(); + if (off > rr_it->first + r_len) { + ++rr_it; + continue; + } + if (rr0_it!=rr0_end && (rr0_it->first < rr_it->first + r_len)) { + r_len = rr_it->first - rr0_it->first; + } + + if (off > rr_it->first && r_len) { + r_off = off - rr_it->first; + assert(r_len >= r_off); + r_len -= r_off; + } + if (r_len == 0) { + ++rr_it; + continue; + } - while (rr_it != ready_regions.end()) { - if (off < rr_it->first) - bl.append_zero(rr_it->first - off); - off = rr_it->first + rr_it->second.length(); - assert(off <= offset + length); - bl.claim_append(rr_it->second); - ++rr_it; + if (off < rr_it->first + r_off) + bl.append_zero(rr_it->first + r_off - off); + if(r_off == 0 && r_len == rr_it->second.length()) { + bl.claim_append(rr_it->second); + } else { + bufferlist tmp; + tmp.substr_of(rr_it->second, r_off, r_len); + bl.claim_append(tmp); + } + off = rr_it->first + r_off + r_len; + ++rr_it; + } else if(rr0_it != rr0_end) { + if (off < rr0_it->first) + bl.append_zero(rr0_it->first + off); + bl.claim_append(rr0_it->second); + off = rr0_it->first + rr0_it->second.length(); + ++rr0_it; + } } + assert(offset + length >= off); bl.append_zero(offset + length - off); - r = bl.length(); + r = bl.length(); return r; } @@ -2952,7 +2995,6 @@ int BlueStore::_read_extent_sparse( while (cur != end) { assert(cur->ext_xoffset + cur->length <= extent->length); - uint64_t r_off = cur->ext_xoffset; uint64_t front_extra = r_off % chunk_size; r_off -= front_extra; @@ -2970,9 +3012,9 @@ int BlueStore::_read_extent_sparse( r = _verify_csum(blob, cur->blob_xoffset, bl); if (r < 0) { dout(20) << __func__ << " blob reading 0x" << std::hex - << cur->logical_offset << " 0x" - << cur->blob_xoffset << "~0x" << bl.length() - << " csum verification failed" << dendl; + << cur->logical_offset << " 0x" + << cur->blob_xoffset << "~0x" << bl.length() + << " csum verification failed" << dendl; return -EIO; } } @@ -2988,6 +3030,7 @@ int BlueStore::_blob2read_to_extents2read( const bluestore_blob_t* blob, BlueStore::regions2read_t::const_iterator cur, BlueStore::regions2read_t::const_iterator end, + const interval_set& ready_intervals_in_cache, BlueStore::extents2read_t* result) { result->clear(); @@ -3018,8 +3061,10 @@ int BlueStore::_blob2read_to_extents2read( if (r_len > 0) { r_len = MIN(r_len, l); const bluestore_pextent_t* eptr = &(*ext_it); - regions2read_t& regions = (*result)[eptr]; - regions.push_back(region_t(l_offs, x_offs, r_offs, r_len)); + if (!ready_intervals_in_cache.contains(l_offs, r_len)) { + regions2read_t& regions = (*result)[eptr]; + regions.push_back(region_t(l_offs, x_offs, r_offs, r_len)); + } l -= r_len; l_offs += r_len; x_offs += r_len; diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 83eb7c3f7b93..a7e6b99835a2 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -71,6 +71,31 @@ public: class TransContext; + + // -------------------------------------------------------- + // intermediate data structures used while reading + struct region_t { + uint64_t logical_offset; + uint64_t blob_xoffset; //region offset within the blob + uint64_t ext_xoffset; //region offset within the pextent + uint64_t length; + + region_t(uint64_t offset, uint64_t b_offs, uint64_t x_offs, uint32_t len) + : logical_offset(offset), + blob_xoffset(b_offs), + ext_xoffset(x_offs), + length(len) {} + region_t(const region_t& from) + : logical_offset(from.logical_offset), + blob_xoffset(from.blob_xoffset), + ext_xoffset(from.ext_xoffset), + length(from.length) {} + }; + typedef list regions2read_t; + typedef map blobs2read_t; + typedef map extents2read_t; + typedef map ready_regions_t; + /// cached buffer struct Buffer { enum { @@ -245,6 +270,43 @@ public: } } + void read(uint64_t offset, uint64_t length, BlueStore::ready_regions_t& res, interval_set res_intervals) { + res.clear(); + auto i = _data_lower_bound(offset); + uint64_t end = offset + length; + while (i != buffer_map.end() && offset < end && i->first < end) { + Buffer *b = i->second.get(); + + if (b->offset < offset) { + uint64_t head = offset - b->offset; + uint64_t l = MIN(length, b->length - head); + if (b->is_writing()) {//?? should we use in is_cleaning state too? + res[offset].substr_of(b->data, head, l); + res_intervals.insert( offset, l); + } + offset += l; + length -= l; + } else if (b->offset > offset) { + uint64_t head = b->offset - offset; + uint64_t l = MIN(length - head, b->length); + if (b->is_writing()) { + res[b->offset].substr_of(b->data, 0, l); + res_intervals.insert(b->offset, l); + } + offset += l + head; + length -= l + head; + } else { + if (b->is_writing()) { + res[b->offset].append(b->data); + res_intervals.insert(b->offset, b->length); + } + offset += b->length; + length -= b->length; + } + ++i; + } + } + void truncate(uint64_t offset) { discard(offset, (uint64_t)-1 - offset); } @@ -1104,30 +1166,9 @@ public: ThreadPool::TPHandle *handle = NULL) override; private: - // -------------------------------------------------------- - // intermediate data structures used while reading - struct region_t { - uint64_t logical_offset; - uint64_t blob_xoffset; //region offset within the blob - uint64_t ext_xoffset; //region offset within the pextent - uint64_t length; - - region_t(uint64_t offset, uint64_t b_offs, uint64_t x_offs, uint32_t len) - : logical_offset(offset), - blob_xoffset(b_offs), - ext_xoffset(x_offs), - length(len) {} - region_t(const region_t& from) - : logical_offset(from.logical_offset), - blob_xoffset(from.blob_xoffset), - ext_xoffset(from.ext_xoffset), - length(from.length) {} - }; - typedef list regions2read_t; - typedef map blobs2read_t; - typedef map extents2read_t; - typedef map ready_regions_t; + // -------------------------------------------------------- + // read processing internal methods int _read_whole_blob(const bluestore_blob_t* blob, OnodeRef o, bool buffered, bufferlist* result); int _read_extent_sparse( const bluestore_blob_t* blob, @@ -1142,6 +1183,7 @@ private: const bluestore_blob_t* blob, regions2read_t::const_iterator cur, regions2read_t::const_iterator end, + const interval_set& ready_intervals_in_cache, extents2read_t* result); int _verify_csum(const bluestore_blob_t* blob, uint64_t blob_xoffset, const bufferlist& bl) const;