From: tridao Date: Sat, 1 Jul 2023 20:08:32 +0000 (-0300) Subject: First iteration of per-object fragmentaion metrics X-Git-Tag: v19.0.0~773^2~10 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f4619147a56587d04c714dfcc621badfa9569df5;p=ceph.git First iteration of per-object fragmentaion metrics Added a RandomCW generator and use an map to map logical offset to physical extents (this allows for sparse objects) Signed-off-by: Tri Dao --- diff --git a/src/test/objectstore/Fragmentation_simulator.cc b/src/test/objectstore/Fragmentation_simulator.cc index 89fb89d77927..689741dd94be 100644 --- a/src/test/objectstore/Fragmentation_simulator.cc +++ b/src/test/objectstore/Fragmentation_simulator.cc @@ -11,6 +11,7 @@ #include "include/buffer_fwd.h" #include "os/ObjectStore.h" #include "test/objectstore/ObjectStoreImitator.h" +#include #include #include #include @@ -46,7 +47,6 @@ public: using WorkloadGeneratorRef = std::shared_ptr; void add_generator(WorkloadGeneratorRef gen); - void clear_generators() { generators.clear(); } int begin_simulation_with_generators(); void init(const std::string &alloc_type, uint64_t size, uint64_t min_alloc_size = 4096); @@ -69,7 +69,7 @@ void FragmentationSimulator::init(const std::string &alloc_type, uint64_t size, std::cout << "Initializing ObjectStoreImitator" << std::endl; os = new ObjectStoreImitator(g_ceph_context, "", min_alloc_size); - std::cout << "Initializing allocator: " << alloc_type << " size: 0x" + std::cout << "Initializing allocator: " << alloc_type << ", size: 0x" << std::hex << size << std::dec << "\n" << std::endl; os->init_alloc(alloc_type, size); @@ -93,7 +93,9 @@ int FragmentationSimulator::begin_simulation_with_generators() { return r; } + generators.clear(); os->print_status(); + os->print_per_object_fragmentation(); return 0; } @@ -159,6 +161,45 @@ struct SimpleCWGenerator : public FragmentationSimulator::WorkloadGenerator { } }; +typedef boost::mt11213b gen_type; + +struct RandomCWGenerator : public FragmentationSimulator::WorkloadGenerator { + std::string name() override { return "RandomCW"; } + int generate_txns(ObjectStore::CollectionHandle &ch, + ObjectStoreImitator *os) override { + + hobject_t h; + h.oid = fmt::format("obj"); + h.set_hash(1); + h.pool = 1; + ghobject_t obj(h); + + std::vector tls; + + ObjectStore::Transaction t1; + t1.create(ch->get_cid(), obj); + tls.emplace_back(std::move(t1)); + + gen_type rng(0); + boost::uniform_int<> u_size(0, _1Mb * 4); + boost::uniform_int<> u_offset(0, _1Mb); + + for (unsigned i{0}; i < 100; ++i) { + ObjectStore::Transaction t2; + + auto size = u_size(rng); + auto offset = u_offset(rng); + + t2.write(ch->get_cid(), obj, offset, size, make_bl(size, 'c')); + tls.emplace_back(std::move(t2)); + } + + os->queue_transactions(ch, tls); + os->verify_objects(ch); + return 0; + } +}; + // ----------- Tests ----------- TEST_P(FragmentationSimulator, SimpleCWGenerator) { @@ -167,6 +208,12 @@ TEST_P(FragmentationSimulator, SimpleCWGenerator) { begin_simulation_with_generators(); } +TEST_P(FragmentationSimulator, RandomCWGenerator) { + init(GetParam(), _1Mb * 8); + add_generator(std::make_shared()); + begin_simulation_with_generators(); +} + // ----------- main ----------- INSTANTIATE_TEST_SUITE_P(Allocator, FragmentationSimulator, diff --git a/src/test/objectstore/ObjectStoreImitator.cc b/src/test/objectstore/ObjectStoreImitator.cc index d6c1c09f5c1f..b393cd1178f8 100644 --- a/src/test/objectstore/ObjectStoreImitator.cc +++ b/src/test/objectstore/ObjectStoreImitator.cc @@ -8,6 +8,9 @@ #include "common/errno.h" #include "include/ceph_assert.h" #include "include/intarith.h" +#include "os/bluestore/bluestore_types.h" +#include +#include #define dout_context cct #define OBJECT_MAX_SIZE 0xffffffff // 32 bits @@ -31,7 +34,33 @@ void ObjectStoreImitator::print_status() { void ObjectStoreImitator::verify_objects(CollectionHandle &ch) { Collection *c = static_cast(ch.get()); - c->verify_objects(); + for (auto &[_, obj] : c->objects) { + obj->verify_extents(); + } +} + +void ObjectStoreImitator::print_per_object_fragmentation() { + for (auto &[_, coll_ref] : coll_map) { + double coll_total{0}; + for (auto &[id, obj] : coll_ref->objects) { + double frag_score{1}; + unsigned i{1}; + uint64_t ext_size = obj->ext_length(); + + for (auto &[_, ext] : obj->extent_map) { + double ext_frag = + std::pow(((double)ext.length / (double)ext_size), (double)i++); + frag_score -= ext_frag; + } + coll_total += frag_score; + + std::cout << "Object: " << id.hobj.oid.name + << ", hash: " << id.hobj.get_hash() + << " fragmentation score: " << frag_score << std::endl; + } + double avg = coll_total / coll_ref->objects.size(); + std::cout << "Average obj fragmentation " << avg << std::endl; + } } // ------- Transactions ------- @@ -324,7 +353,7 @@ void ObjectStoreImitator::_assign_nid(ObjectRef &o) { int ObjectStoreImitator::_do_zero(CollectionRef &c, ObjectRef &o, uint64_t offset, size_t length) { PExtentVector old_extents; - o->punch_hole(offset, length, old_extents); + o->punch_hole(offset, length, min_alloc_size, old_extents); alloc->release(old_extents); return 0; } @@ -340,35 +369,37 @@ int ObjectStoreImitator::_do_read(Collection *c, ObjectRef &o, uint64_t offset, int ObjectStoreImitator::_do_write(CollectionRef &c, ObjectRef &o, uint64_t offset, uint64_t length, bufferlist &bl, uint32_t fadvise_flags) { + if (length == 0) { + return 0; + } ceph_assert(length == bl.length()); - int r = 0; - uint64_t end = length + offset; + if (offset + length > o->size) { + o->size = offset + length; + } - if (length == 0) { + if (length < min_alloc_size) { return 0; } + // roundup offset, consider the beginning as deffered + offset = p2roundup(offset, min_alloc_size); + // align length, consider the end as deffered + length = p2align(length, min_alloc_size); + PExtentVector punched; - o->punch_hole(offset, length, punched); + o->punch_hole(offset, length, min_alloc_size, punched); alloc->release(punched); // all writes will trigger an allocation - r = _do_alloc_write(c, o, bl); + int r = _do_alloc_write(c, o, bl, offset, length); if (r < 0) { derr << __func__ << " _do_alloc_write failed with " << cpp_strerror(r) << dendl; - goto out; - } - - if (end > o->size) { - o->size = end; + return r; } - r = 0; - -out: - return r; + return 0; } int ObjectStoreImitator::_do_clone_range(CollectionRef &c, ObjectRef &oldo, @@ -396,18 +427,18 @@ int ObjectStoreImitator::_write(CollectionRef &c, ObjectRef &o, uint64_t offset, } int ObjectStoreImitator::_do_alloc_write(CollectionRef coll, ObjectRef &o, - bufferlist &bl) { + bufferlist &bl, uint64_t offset, + uint64_t length) { // No compression for now - uint64_t need = p2roundup(static_cast(bl.length()), min_alloc_size); - + uint64_t need = length; PExtentVector prealloc; int64_t prealloc_left = alloc->allocate(need, min_alloc_size, need, 0, &prealloc); if (prealloc_left < 0 || prealloc_left < (int64_t)need) { derr << __func__ << " failed to allocate 0x" << std::hex << need - << " allocated 0x " << (prealloc_left < 0 ? 0 : prealloc_left) + << " allocated 0x" << (prealloc_left < 0 ? 0 : prealloc_left) << " min_alloc_size 0x" << min_alloc_size << " available 0x " << alloc->get_free() << std::dec << dendl; if (prealloc.size()) @@ -439,7 +470,7 @@ int ObjectStoreImitator::_do_alloc_write(CollectionRef coll, ObjectRef &o, } } - o->append(extents); + o->append(extents, offset); if (prealloc_left > 0) { PExtentVector old_extents; @@ -459,11 +490,12 @@ int ObjectStoreImitator::_do_alloc_write(CollectionRef coll, ObjectRef &o, void ObjectStoreImitator::_do_truncate(CollectionRef &c, ObjectRef &o, uint64_t offset) { - if (offset == o->size) + // current size already satisfied + if (offset >= o->size) return; PExtentVector old_extents; - o->punch_hole(offset, o->size - offset, old_extents); + o->punch_hole(offset, o->size - offset, min_alloc_size, old_extents); o->size = offset; alloc->release(old_extents); } diff --git a/src/test/objectstore/ObjectStoreImitator.h b/src/test/objectstore/ObjectStoreImitator.h index 6698aac99a5c..b001366b307d 100644 --- a/src/test/objectstore/ObjectStoreImitator.h +++ b/src/test/objectstore/ObjectStoreImitator.h @@ -7,6 +7,7 @@ #pragma once #include "include/common_fwd.h" +#include "include/intarith.h" #include "os/ObjectStore.h" #include "os/bluestore/Allocator.h" #include "os/bluestore/bluestore_types.h" @@ -42,74 +43,135 @@ private: uint32_t expected_object_size = 0; uint32_t expected_write_size = 0; - // We assume these extents are sorted according by "logical" order. - PExtentVector extents; + typedef std::map ExtentMap; + ExtentMap extent_map; Object(Collection *c_, const ghobject_t &oid_, bool exists_ = false, uint64_t nid_ = 0, uint64_t size_ = 0) : c(c_), oid(oid_), exists(exists_), nid(nid_), size(size_) {} - void punch_hole(uint64_t offset, uint64_t length, + void punch_hole(uint64_t offset, uint64_t length, uint64_t min_alloc_size, PExtentVector &old_extents) { - if (offset >= size || length == 0) + if (extent_map.empty()) return; - if (offset + length >= size) { - length = size - offset; + PExtentVector to_be_punched; + std::vector deleted_keys; + uint64_t end = offset + length; + + uint64_t re_add_key{0}; + bluestore_pextent_t re_add; + + // std::cout << "current extents:\n"; + // for (auto &[l_off, e] : extent_map) { + // std::cout << "l_off " << l_off << ", off " << e.offset << ", len " + // << e.length << std::endl; + // } + + // std::cout << "wants to punch: off " << offset << ", len " << length + // << std::endl; + + auto it = extent_map.lower_bound(offset); + if ((it == extent_map.end() || it->first > offset) && + it != extent_map.begin()) { + it = std::prev(it); + + // diff between where we need to punch and current position + auto diff = offset - it->first; + // std::cout << "diff " << diff << " , p_off " << it->first << + // std::endl; + + // offset will be inside this extent + // otherwise skip over this extent and assume 'offset' has been passed + if (diff < it->second.length) { + // the hole is bigger than the remaining of the extent + if (end > it->first + it->second.length) { + to_be_punched.emplace_back(it->second.offset + diff, + it->second.length - diff); + } else { // else the hole is entirely in this extent + to_be_punched.emplace_back(it->second.offset + diff, length); + + re_add_key = end; + re_add.offset = it->second.offset + diff + length; + re_add.length = it->second.length - diff - length; + + // std::cout << "re_add: off " << re_add.offset << ", len " + // << re_add.length << std::endl; + } + + // Modify the remaining extent's length + it->second.length = diff; + } + + it++; } - uint64_t l_offset{0}, punched_length{0}; - PExtentVector to_be_punched, remains; - for (auto e : extents) { - if (l_offset > offset && l_offset - length >= offset) - break; + // this loop is only valid when 'it' is in the hole + while (it != extent_map.end() && it->first < end) { + if (it->first + it->second.length > end) { // last extent to punched + uint64_t remaining = it->first + it->second.length - end; + uint64_t punched = it->second.length - remaining; - // Found where we need to punch - if (l_offset >= offset) { - // We only punched a portion of the extent - if (e.length + punched_length > length) { - uint64_t left = e.length + punched_length - length; - e.length = length - punched_length; - remains.emplace_back(e.offset + e.length, left); - } + to_be_punched.emplace_back(it->second.offset, punched); + deleted_keys.push_back(it->first); - to_be_punched.push_back(e); - punched_length += e.length; - } else { // else the extent will remain - remains.push_back(e); + re_add.offset = it->second.offset + punched; + re_add.length = remaining; + re_add_key = it->first + punched; + + it++; + break; } - l_offset += e.length; + deleted_keys.push_back(it->first); + to_be_punched.push_back(it->second); + it++; + } + + for (auto k : deleted_keys) { + extent_map.erase(k); + } + + if (re_add.length > 0) { + extent_map[re_add_key] = re_add; } - size -= punched_length; - extents = remains; old_extents = to_be_punched; + // std::cout << "to be deleted\n"; + // for (auto e : to_be_punched) { + // std::cout << "off " << e.offset << ", len " << e.length << std::endl; + // } } - void append(PExtentVector &ext) { + void append(PExtentVector &ext, uint64_t offset) { for (auto &e : ext) { - extents.push_back(e); - size += e.length; + ceph_assert(e.length > 0); + // std::cout << "adding off " << offset << ", len " << e.length + // << std::endl; + extent_map[offset] = e; + offset += e.length; } - - std::sort(extents.begin(), extents.end(), - [](bluestore_pextent_t &a, bluestore_pextent_t &b) { - return a.offset < b.offset; - }); } void verify_extents() { - uint64_t total{0}; - for (auto &e : extents) { - ceph_assert(total <= e.offset); - ceph_assert(e.length > 0); - total += e.length; + // std::cout << "verifying extents:\n"; + for (auto &[l_off, ext] : extent_map) { + // std::cout << l_off << " " << ext.offset << " " << ext.length + // << std::endl; + ceph_assert(ext.is_valid()); + ceph_assert(ext.length > 0); } + } - ceph_assert(total == size); + uint64_t ext_length() { + uint64_t ret{0}; + for (auto &[_, ext] : extent_map) { + ret += ext.length; + } + return ret; } }; + typedef boost::intrusive_ptr ObjectRef; struct Collection : public CollectionImpl { @@ -183,12 +245,6 @@ private: o->oid = new_oid; } - void verify_objects() { - for (auto &[_, obj] : objects) { - obj->verify_extents(); - } - } - Collection(ObjectStoreImitator *sim_, coll_t cid_) : CollectionImpl(sim_->cct, cid_), exists(true), commit_queue(nullptr) { } @@ -228,7 +284,8 @@ private: int _do_write(CollectionRef &c, ObjectRef &o, uint64_t offset, uint64_t length, ceph::buffer::list &bl, uint32_t fadvise_flags); - int _do_alloc_write(CollectionRef c, ObjectRef &o, bufferlist &bl); + int _do_alloc_write(CollectionRef c, ObjectRef &o, bufferlist &bl, + uint64_t offset, uint64_t length); void _do_truncate(CollectionRef &c, ObjectRef &o, uint64_t offset); int _do_zero(CollectionRef &c, ObjectRef &o, uint64_t offset, size_t length); @@ -268,6 +325,13 @@ public: void print_status(); void verify_objects(CollectionHandle &ch); + // Generate metrics for per-object fragmentation, defined by: + // frag_score = 1 - sum((size proportion of each extents / object size) ^ + // index of each extent in a vector sorted by descending length). + // This should only be called after the generators are finished as it will + // attempt to change an object's extents. + void print_per_object_fragmentation(); + // Overrides // This is often not called directly but through queue_transaction