From: Abutalib Aghayev Date: Fri, 12 Jun 2020 14:51:59 +0000 (-0400) Subject: os/bluestore: Add missing punch_hole call so that object offsets are updated. X-Git-Tag: v16.1.0~1998^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6317feae56614529cd42553a6a7dba6f52999b5f;p=ceph.git os/bluestore: Add missing punch_hole call so that object offsets are updated. Rearranged an HM-SMR related code in _do_write_small so that it reuses previously calculated offsets and, most importantly, added the missing onode->extent_map.punch_hole call that releases overwritten blobs and updates the location of the object to point to the latest copy. Without it the OSD could not restart becase a stale copy of osd_superblock object was being read. Signed-off-by: Abutalib Aghayev --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 7e7488f47934c..ddb730f7efa0d 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -13035,22 +13035,6 @@ void BlueStore::_do_write_small( << std::dec << dendl; ceph_assert(length < min_alloc_size); - // On zoned devices, the first goal is to support non-overwrite workloads, - // such as RGW, with large, aligned objects. Therefore, for user writes - // _do_write_small should not trigger. OSDs, however, write and update a tiny - // amount of metadata, such as OSD maps, to disk. For those cases, we - // temporarily just pad them to min_alloc_size and write them to a new place - // on every update. - if (bdev->is_smr()) { - BlobRef b = c->new_blob(); - uint64_t b_off = 0, b_off0 = 0; - bufferlist l; - blp.copy(length, l); - _pad_zeros(&l, &b_off0, min_alloc_size); - wctx->write(offset, b, min_alloc_size, b_off0, l, b_off, length, false, true); - return; - } - uint64_t end_offs = offset + length; logger->inc(l_bluestore_write_small); @@ -13073,6 +13057,22 @@ void BlueStore::_do_write_small( // than 'offset' only). o->extent_map.fault_range(db, min_off, offset + max_bsize - min_off); + // On zoned devices, the first goal is to support non-overwrite workloads, + // such as RGW, with large, aligned objects. Therefore, for user writes + // _do_write_small should not trigger. OSDs, however, write and update a tiny + // amount of metadata, such as OSD maps, to disk. For those cases, we + // temporarily just pad them to min_alloc_size and write them to a new place + // on every update. + if (bdev->is_smr()) { + BlobRef b = c->new_blob(); + uint64_t b_off = p2phase(offset, alloc_len); + uint64_t b_off0 = b_off; + _pad_zeros(&bl, &b_off0, min_alloc_size); + o->extent_map.punch_hole(c, offset, length, &wctx->old_extents); + wctx->write(offset, b, alloc_len, b_off0, bl, b_off, length, false, true); + return; + } + // Look for an existing mutable blob we can use. auto begin = o->extent_map.extent_map.begin(); auto end = o->extent_map.extent_map.end();