#include "include/buffer_fwd.h"
#include "os/ObjectStore.h"
#include "test/objectstore/ObjectStoreImitator.h"
+#include <boost/random/uniform_int.hpp>
#include <fmt/core.h>
#include <gtest/gtest.h>
#include <iostream>
using WorkloadGeneratorRef = std::shared_ptr<WorkloadGenerator>;
void add_generator(WorkloadGeneratorRef gen);
- void clear_generators() { generators.clear(); }
int begin_simulation_with_generators();
void init(const std::string &alloc_type, uint64_t size,
uint64_t min_alloc_size = 4096);
std::cout << "Initializing ObjectStoreImitator" << std::endl;
os = new ObjectStoreImitator(g_ceph_context, "", min_alloc_size);
- std::cout << "Initializing allocator: " << alloc_type << " size: 0x"
+ std::cout << "Initializing allocator: " << alloc_type << ", size: 0x"
<< std::hex << size << std::dec << "\n"
<< std::endl;
os->init_alloc(alloc_type, size);
return r;
}
+ generators.clear();
os->print_status();
+ os->print_per_object_fragmentation();
return 0;
}
}
};
+typedef boost::mt11213b gen_type;
+
+struct RandomCWGenerator : public FragmentationSimulator::WorkloadGenerator {
+ std::string name() override { return "RandomCW"; }
+ int generate_txns(ObjectStore::CollectionHandle &ch,
+ ObjectStoreImitator *os) override {
+
+ hobject_t h;
+ h.oid = fmt::format("obj");
+ h.set_hash(1);
+ h.pool = 1;
+ ghobject_t obj(h);
+
+ std::vector<ObjectStore::Transaction> tls;
+
+ ObjectStore::Transaction t1;
+ t1.create(ch->get_cid(), obj);
+ tls.emplace_back(std::move(t1));
+
+ gen_type rng(0);
+ boost::uniform_int<> u_size(0, _1Mb * 4);
+ boost::uniform_int<> u_offset(0, _1Mb);
+
+ for (unsigned i{0}; i < 100; ++i) {
+ ObjectStore::Transaction t2;
+
+ auto size = u_size(rng);
+ auto offset = u_offset(rng);
+
+ t2.write(ch->get_cid(), obj, offset, size, make_bl(size, 'c'));
+ tls.emplace_back(std::move(t2));
+ }
+
+ os->queue_transactions(ch, tls);
+ os->verify_objects(ch);
+ return 0;
+ }
+};
+
// ----------- Tests -----------
TEST_P(FragmentationSimulator, SimpleCWGenerator) {
begin_simulation_with_generators();
}
+TEST_P(FragmentationSimulator, RandomCWGenerator) {
+ init(GetParam(), _1Mb * 8);
+ add_generator(std::make_shared<RandomCWGenerator>());
+ begin_simulation_with_generators();
+}
+
// ----------- main -----------
INSTANTIATE_TEST_SUITE_P(Allocator, FragmentationSimulator,
#include "common/errno.h"
#include "include/ceph_assert.h"
#include "include/intarith.h"
+#include "os/bluestore/bluestore_types.h"
+#include <algorithm>
+#include <cmath>
#define dout_context cct
#define OBJECT_MAX_SIZE 0xffffffff // 32 bits
void ObjectStoreImitator::verify_objects(CollectionHandle &ch) {
Collection *c = static_cast<Collection *>(ch.get());
- c->verify_objects();
+ for (auto &[_, obj] : c->objects) {
+ obj->verify_extents();
+ }
+}
+
+void ObjectStoreImitator::print_per_object_fragmentation() {
+ for (auto &[_, coll_ref] : coll_map) {
+ double coll_total{0};
+ for (auto &[id, obj] : coll_ref->objects) {
+ double frag_score{1};
+ unsigned i{1};
+ uint64_t ext_size = obj->ext_length();
+
+ for (auto &[_, ext] : obj->extent_map) {
+ double ext_frag =
+ std::pow(((double)ext.length / (double)ext_size), (double)i++);
+ frag_score -= ext_frag;
+ }
+ coll_total += frag_score;
+
+ std::cout << "Object: " << id.hobj.oid.name
+ << ", hash: " << id.hobj.get_hash()
+ << " fragmentation score: " << frag_score << std::endl;
+ }
+ double avg = coll_total / coll_ref->objects.size();
+ std::cout << "Average obj fragmentation " << avg << std::endl;
+ }
}
// ------- Transactions -------
int ObjectStoreImitator::_do_zero(CollectionRef &c, ObjectRef &o,
uint64_t offset, size_t length) {
PExtentVector old_extents;
- o->punch_hole(offset, length, old_extents);
+ o->punch_hole(offset, length, min_alloc_size, old_extents);
alloc->release(old_extents);
return 0;
}
int ObjectStoreImitator::_do_write(CollectionRef &c, ObjectRef &o,
uint64_t offset, uint64_t length,
bufferlist &bl, uint32_t fadvise_flags) {
+ if (length == 0) {
+ return 0;
+ }
ceph_assert(length == bl.length());
- int r = 0;
- uint64_t end = length + offset;
+ if (offset + length > o->size) {
+ o->size = offset + length;
+ }
- if (length == 0) {
+ if (length < min_alloc_size) {
return 0;
}
+ // roundup offset, consider the beginning as deffered
+ offset = p2roundup(offset, min_alloc_size);
+ // align length, consider the end as deffered
+ length = p2align(length, min_alloc_size);
+
PExtentVector punched;
- o->punch_hole(offset, length, punched);
+ o->punch_hole(offset, length, min_alloc_size, punched);
alloc->release(punched);
// all writes will trigger an allocation
- r = _do_alloc_write(c, o, bl);
+ int r = _do_alloc_write(c, o, bl, offset, length);
if (r < 0) {
derr << __func__ << " _do_alloc_write failed with " << cpp_strerror(r)
<< dendl;
- goto out;
- }
-
- if (end > o->size) {
- o->size = end;
+ return r;
}
- r = 0;
-
-out:
- return r;
+ return 0;
}
int ObjectStoreImitator::_do_clone_range(CollectionRef &c, ObjectRef &oldo,
}
int ObjectStoreImitator::_do_alloc_write(CollectionRef coll, ObjectRef &o,
- bufferlist &bl) {
+ bufferlist &bl, uint64_t offset,
+ uint64_t length) {
// No compression for now
- uint64_t need = p2roundup(static_cast<uint64_t>(bl.length()), min_alloc_size);
-
+ uint64_t need = length;
PExtentVector prealloc;
int64_t prealloc_left =
alloc->allocate(need, min_alloc_size, need, 0, &prealloc);
if (prealloc_left < 0 || prealloc_left < (int64_t)need) {
derr << __func__ << " failed to allocate 0x" << std::hex << need
- << " allocated 0x " << (prealloc_left < 0 ? 0 : prealloc_left)
+ << " allocated 0x" << (prealloc_left < 0 ? 0 : prealloc_left)
<< " min_alloc_size 0x" << min_alloc_size << " available 0x "
<< alloc->get_free() << std::dec << dendl;
if (prealloc.size())
}
}
- o->append(extents);
+ o->append(extents, offset);
if (prealloc_left > 0) {
PExtentVector old_extents;
void ObjectStoreImitator::_do_truncate(CollectionRef &c, ObjectRef &o,
uint64_t offset) {
- if (offset == o->size)
+ // current size already satisfied
+ if (offset >= o->size)
return;
PExtentVector old_extents;
- o->punch_hole(offset, o->size - offset, old_extents);
+ o->punch_hole(offset, o->size - offset, min_alloc_size, old_extents);
o->size = offset;
alloc->release(old_extents);
}
#pragma once
#include "include/common_fwd.h"
+#include "include/intarith.h"
#include "os/ObjectStore.h"
#include "os/bluestore/Allocator.h"
#include "os/bluestore/bluestore_types.h"
uint32_t expected_object_size = 0;
uint32_t expected_write_size = 0;
- // We assume these extents are sorted according by "logical" order.
- PExtentVector extents;
+ typedef std::map<uint64_t, bluestore_pextent_t> ExtentMap;
+ ExtentMap extent_map;
Object(Collection *c_, const ghobject_t &oid_, bool exists_ = false,
uint64_t nid_ = 0, uint64_t size_ = 0)
: c(c_), oid(oid_), exists(exists_), nid(nid_), size(size_) {}
- void punch_hole(uint64_t offset, uint64_t length,
+ void punch_hole(uint64_t offset, uint64_t length, uint64_t min_alloc_size,
PExtentVector &old_extents) {
- if (offset >= size || length == 0)
+ if (extent_map.empty())
return;
- if (offset + length >= size) {
- length = size - offset;
+ PExtentVector to_be_punched;
+ std::vector<uint64_t> deleted_keys;
+ uint64_t end = offset + length;
+
+ uint64_t re_add_key{0};
+ bluestore_pextent_t re_add;
+
+ // std::cout << "current extents:\n";
+ // for (auto &[l_off, e] : extent_map) {
+ // std::cout << "l_off " << l_off << ", off " << e.offset << ", len "
+ // << e.length << std::endl;
+ // }
+
+ // std::cout << "wants to punch: off " << offset << ", len " << length
+ // << std::endl;
+
+ auto it = extent_map.lower_bound(offset);
+ if ((it == extent_map.end() || it->first > offset) &&
+ it != extent_map.begin()) {
+ it = std::prev(it);
+
+ // diff between where we need to punch and current position
+ auto diff = offset - it->first;
+ // std::cout << "diff " << diff << " , p_off " << it->first <<
+ // std::endl;
+
+ // offset will be inside this extent
+ // otherwise skip over this extent and assume 'offset' has been passed
+ if (diff < it->second.length) {
+ // the hole is bigger than the remaining of the extent
+ if (end > it->first + it->second.length) {
+ to_be_punched.emplace_back(it->second.offset + diff,
+ it->second.length - diff);
+ } else { // else the hole is entirely in this extent
+ to_be_punched.emplace_back(it->second.offset + diff, length);
+
+ re_add_key = end;
+ re_add.offset = it->second.offset + diff + length;
+ re_add.length = it->second.length - diff - length;
+
+ // std::cout << "re_add: off " << re_add.offset << ", len "
+ // << re_add.length << std::endl;
+ }
+
+ // Modify the remaining extent's length
+ it->second.length = diff;
+ }
+
+ it++;
}
- uint64_t l_offset{0}, punched_length{0};
- PExtentVector to_be_punched, remains;
- for (auto e : extents) {
- if (l_offset > offset && l_offset - length >= offset)
- break;
+ // this loop is only valid when 'it' is in the hole
+ while (it != extent_map.end() && it->first < end) {
+ if (it->first + it->second.length > end) { // last extent to punched
+ uint64_t remaining = it->first + it->second.length - end;
+ uint64_t punched = it->second.length - remaining;
- // Found where we need to punch
- if (l_offset >= offset) {
- // We only punched a portion of the extent
- if (e.length + punched_length > length) {
- uint64_t left = e.length + punched_length - length;
- e.length = length - punched_length;
- remains.emplace_back(e.offset + e.length, left);
- }
+ to_be_punched.emplace_back(it->second.offset, punched);
+ deleted_keys.push_back(it->first);
- to_be_punched.push_back(e);
- punched_length += e.length;
- } else { // else the extent will remain
- remains.push_back(e);
+ re_add.offset = it->second.offset + punched;
+ re_add.length = remaining;
+ re_add_key = it->first + punched;
+
+ it++;
+ break;
}
- l_offset += e.length;
+ deleted_keys.push_back(it->first);
+ to_be_punched.push_back(it->second);
+ it++;
+ }
+
+ for (auto k : deleted_keys) {
+ extent_map.erase(k);
+ }
+
+ if (re_add.length > 0) {
+ extent_map[re_add_key] = re_add;
}
- size -= punched_length;
- extents = remains;
old_extents = to_be_punched;
+ // std::cout << "to be deleted\n";
+ // for (auto e : to_be_punched) {
+ // std::cout << "off " << e.offset << ", len " << e.length << std::endl;
+ // }
}
- void append(PExtentVector &ext) {
+ void append(PExtentVector &ext, uint64_t offset) {
for (auto &e : ext) {
- extents.push_back(e);
- size += e.length;
+ ceph_assert(e.length > 0);
+ // std::cout << "adding off " << offset << ", len " << e.length
+ // << std::endl;
+ extent_map[offset] = e;
+ offset += e.length;
}
-
- std::sort(extents.begin(), extents.end(),
- [](bluestore_pextent_t &a, bluestore_pextent_t &b) {
- return a.offset < b.offset;
- });
}
void verify_extents() {
- uint64_t total{0};
- for (auto &e : extents) {
- ceph_assert(total <= e.offset);
- ceph_assert(e.length > 0);
- total += e.length;
+ // std::cout << "verifying extents:\n";
+ for (auto &[l_off, ext] : extent_map) {
+ // std::cout << l_off << " " << ext.offset << " " << ext.length
+ // << std::endl;
+ ceph_assert(ext.is_valid());
+ ceph_assert(ext.length > 0);
}
+ }
- ceph_assert(total == size);
+ uint64_t ext_length() {
+ uint64_t ret{0};
+ for (auto &[_, ext] : extent_map) {
+ ret += ext.length;
+ }
+ return ret;
}
};
+
typedef boost::intrusive_ptr<Object> ObjectRef;
struct Collection : public CollectionImpl {
o->oid = new_oid;
}
- void verify_objects() {
- for (auto &[_, obj] : objects) {
- obj->verify_extents();
- }
- }
-
Collection(ObjectStoreImitator *sim_, coll_t cid_)
: CollectionImpl(sim_->cct, cid_), exists(true), commit_queue(nullptr) {
}
int _do_write(CollectionRef &c, ObjectRef &o, uint64_t offset,
uint64_t length, ceph::buffer::list &bl,
uint32_t fadvise_flags);
- int _do_alloc_write(CollectionRef c, ObjectRef &o, bufferlist &bl);
+ int _do_alloc_write(CollectionRef c, ObjectRef &o, bufferlist &bl,
+ uint64_t offset, uint64_t length);
void _do_truncate(CollectionRef &c, ObjectRef &o, uint64_t offset);
int _do_zero(CollectionRef &c, ObjectRef &o, uint64_t offset, size_t length);
void print_status();
void verify_objects(CollectionHandle &ch);
+ // Generate metrics for per-object fragmentation, defined by:
+ // frag_score = 1 - sum((size proportion of each extents / object size) ^
+ // index of each extent in a vector sorted by descending length).
+ // This should only be called after the generators are finished as it will
+ // attempt to change an object's extents.
+ void print_per_object_fragmentation();
+
// Overrides
// This is often not called directly but through queue_transaction