From: Igor Fedotov Date: Tue, 28 Jun 2016 16:32:38 +0000 (+0300) Subject: os/bluestore: do not use ref_map for non-shared objects X-Git-Tag: ses5-milestone5~268^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=53af17592d68de3362db739b99389e2b86ff3bb6;p=ceph.git os/bluestore: do not use ref_map for non-shared objects Signed-off-by: Igor Fedotov --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 3ed60bd0c16b..825c8be537e0 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -2837,21 +2837,20 @@ int BlueStore::_fsck_verify_blob_map( ++errors; continue; } - if (pv->second != b.blob.ref_map) { + if (b.blob.has_refmap() && pv->second != b.blob.ref_map) { derr << " " << what << " blob " << b.id << " ref_map " << b.blob.ref_map << " != expected " << pv->second << dendl; ++errors; } - v.erase(pv); - interval_set span; bool compressed = b.blob.is_compressed(); if (compressed) { expected_statfs.compressed += b.blob.compressed_length; - for (auto& r : b.blob.ref_map.ref_map) { - expected_statfs.compressed_original += r.second.refs * r.second.length; + for (auto& r : b.blob.has_refmap() ? b.blob.ref_map.ref_map : v[b.id].ref_map) { + expected_statfs.compressed_original += r.second.refs * r.second.length; } } + v.erase(pv); for (auto& p : b.blob.extents) { if (!p.is_valid()) { continue; @@ -5813,12 +5812,10 @@ void BlueStore::_do_write_small( &txc->ioc, wctx->buffered); }); b->blob.calc_csum(b_off, padded); - o->onode.punch_hole(offset, length, &wctx->lex_old); dout(20) << __func__ << " lexold 0x" << std::hex << offset << std::dec << ": " << ep->second << dendl; - bluestore_lextent_t& lex = o->onode.extent_map[offset] = - bluestore_lextent_t(blob, b_off + head_pad, length); - b->blob.ref_map.get(lex.offset, lex.length); + bluestore_lextent_t lex(blob, b_off + head_pad, length); + o->onode.set_lextent(offset, lex, &b->blob, &wctx->lex_old); b->blob.mark_used(lex.offset, lex.length, min_alloc_size); txc->statfs_delta.stored() += lex.length; dout(20) << __func__ << " lex 0x" << std::hex << offset << std::dec @@ -5888,10 +5885,8 @@ void BlueStore::_do_write_small( dout(20) << __func__ << " wal write 0x" << std::hex << b_off << "~" << b_len << std::dec << " of mutable " << blob << ": " << *b << " at " << op->extents << dendl; - o->onode.punch_hole(offset, length, &wctx->lex_old); - bluestore_lextent_t& lex = o->onode.extent_map[offset] = - bluestore_lextent_t(blob, offset - bstart, length); - b->blob.ref_map.get(lex.offset, lex.length); + bluestore_lextent_t lex(blob, offset - bstart, length); + o->onode.set_lextent(offset, lex, &b->blob, &wctx->lex_old); b->blob.mark_used(lex.offset, lex.length, min_alloc_size); txc->statfs_delta.stored() += lex.length; dout(20) << __func__ << " lex 0x" << std::hex << offset @@ -5909,10 +5904,8 @@ void BlueStore::_do_write_small( uint64_t b_off = P2PHASE(offset, alloc_len); b->bc.write(txc->seq, b_off, bl, wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); _pad_zeros(&bl, &b_off, block_size); - o->onode.punch_hole(offset, length, &wctx->lex_old); - bluestore_lextent_t& lex = o->onode.extent_map[offset] = - bluestore_lextent_t(b->id, P2PHASE(offset, alloc_len), length); - b->blob.ref_map.get(lex.offset, lex.length); + bluestore_lextent_t lex(b->id, P2PHASE(offset, alloc_len), length); + o->onode.set_lextent(offset, lex, &b->blob, &wctx->lex_old); txc->statfs_delta.stored() += lex.length; dout(20) << __func__ << " lex 0x" << std::hex << offset << std::dec << ": " << lex << dendl; @@ -5944,9 +5937,8 @@ void BlueStore::_do_write_big( blp.copy(l, t); b->bc.write(txc->seq, 0, t, wctx->buffered ? 0 : Buffer::FLAG_NOCACHE); wctx->write(b, l, 0, t, false); - o->onode.punch_hole(offset, l, &wctx->lex_old); - o->onode.extent_map[offset] = bluestore_lextent_t(b->id, 0, l); - b->blob.ref_map.get(0, l); + bluestore_lextent_t lex(b->id, 0, l); + o->onode.set_lextent(offset, lex, &b->blob, &wctx->lex_old); txc->statfs_delta.stored() += l; dout(20) << __func__ << " lex 0x" << std::hex << offset << std::dec << ": " << o->onode.extent_map[offset] << dendl; @@ -6097,12 +6089,15 @@ void BlueStore::_wctx_finish( WriteContext *wctx) { dout(10) << __func__ << " lex_old " << wctx->lex_old << dendl; + set > blobs2remove; for (auto &lo : wctx->lex_old) { bluestore_lextent_t& l = lo.second; Blob *b = c->get_blob(o, l.blob); vector r; bool compressed = b->blob.is_compressed(); - b->blob.put_ref(l.offset, l.length, min_alloc_size, &r); + if (o->onode.deref_lextent(lo.first, l, &b->blob, min_alloc_size, &r)) { + blobs2remove.insert(std::make_pair(l.blob >= 0, b)); + } // we can't invalidate our logical extents as we drop them because // other lextents (either in our onode or others) may still // reference them. but we can throw out anything that is no @@ -6122,21 +6117,20 @@ void BlueStore::_wctx_finish( txc->statfs_delta.compressed_allocated() -= e.length; } } - if (b->blob.ref_map.empty()) { - dout(20) << __func__ << " rm blob " << *b << dendl; - txc->statfs_delta.compressed() -= b->blob.get_compressed_payload_length(); - if (l.blob >= 0) { - o->blob_map.erase(b); - } else { - o->bnode->blob_map.erase(b); - } - } else { - dout(20) << __func__ << " keep blob " << *b << dendl; - } if (l.blob < 0) { txc->write_bnode(o->bnode); } } + for (auto br : blobs2remove) { + Blob* b = br.second; + dout(20) << __func__ << " rm blob " << *b << dendl; + txc->statfs_delta.compressed() -= b->blob.get_compressed_payload_length(); + if (br.first) { + o->blob_map.erase(b); + } else { + o->bnode->blob_map.erase(b); + } + } o->onode.compress_extent_map(); } @@ -6679,15 +6673,22 @@ int BlueStore::_clone(TransContext *txc, // move blobs map moved_blobs; for (auto& p : oldo->onode.extent_map) { - if (!p.second.is_shared() && moved_blobs.count(p.second.blob) == 0) { - Blob *b = oldo->blob_map.get(p.second.blob); - oldo->blob_map.erase(b); - newo->bnode->blob_map.claim(b); - moved_blobs[p.second.blob] = b->id; - dout(30) << __func__ << " moving old onode blob " << p.second.blob - << " to bnode blob " << b->id << dendl; - b->blob.clear_flag(bluestore_blob_t::FLAG_MUTABLE); - } + if (!p.second.is_shared()) { + Blob *b; + if (moved_blobs.count(p.second.blob) == 0) { + b = oldo->blob_map.get(p.second.blob); + oldo->blob_map.erase(b); + newo->bnode->blob_map.claim(b); + moved_blobs[p.second.blob] = b->id; + dout(30) << __func__ << " moving old onode blob " << p.second.blob + << " to bnode blob " << b->id << dendl; + b->blob.clear_flag(bluestore_blob_t::FLAG_MUTABLE); + b->blob.set_flag(bluestore_blob_t::FLAG_HAS_REFMAP); + } else { + b = newo->bnode->blob_map.get(moved_blobs[p.second.blob]); + } + b->blob.get_ref(p.second.offset, p.second.length); + } } // update lextents for (auto& p : oldo->onode.extent_map) { @@ -6696,9 +6697,7 @@ int BlueStore::_clone(TransContext *txc, } newo->onode.extent_map[p.first] = p.second; assert(p.second.blob < 0); - newo->bnode->blob_map.get(-p.second.blob)->blob.ref_map.get( - p.second.offset, - p.second.length); + newo->bnode->blob_map.get(-p.second.blob)->blob.get_ref(p.second.offset, p.second.length); txc->statfs_delta.stored() += p.second.length; } _dump_onode(newo); diff --git a/src/os/bluestore/bluestore_types.h b/src/os/bluestore/bluestore_types.h index 7e8cd858c4ae..1280ba9081fa 100644 --- a/src/os/bluestore/bluestore_types.h +++ b/src/os/bluestore/bluestore_types.h @@ -245,9 +245,7 @@ struct bluestore_blob_t { typedef std::bitset unused_t; unused_t unused; ///< portion that has never been written to - bluestore_blob_t(uint32_t f = 0) : flags(f) { - set_flag(FLAG_HAS_REFMAP); - } + bluestore_blob_t(uint32_t f = 0) : flags(f) {} void encode(bufferlist& bl) const; void decode(bufferlist::iterator& p);