generators.clear();
os->print_status();
os->print_per_object_fragmentation();
+ os->print_per_access_fragmentation();
return 0;
}
t2.create(ch->get_cid(), obj2);
tls.emplace_back(std::move(t2));
+ os->queue_transactions(ch, tls);
+ os->verify_objects(ch);
+
gen_type rng(time(0));
boost::uniform_int<> u_size(0, _1Mb * 4);
boost::uniform_int<> u_offset(0, _1Mb);
for (unsigned i{0}; i < 200; ++i) {
- ObjectStore::Transaction t3;
+ tls.clear();
+ ObjectStore::Transaction t3;
auto size = u_size(rng);
auto offset = u_offset(rng);
tls.emplace_back(std::move(t3));
ObjectStore::Transaction t4;
-
size = u_size(rng);
offset = u_offset(rng);
t4.write(ch->get_cid(), obj2, offset, size, make_bl(size, 'c'));
tls.emplace_back(std::move(t4));
+
+ os->queue_transactions(ch, tls);
+ os->verify_objects(ch);
+
+ bufferlist dummy;
+
+ size = u_size(rng);
+ offset = u_offset(rng);
+ os->read(ch, obj1, offset, size, dummy);
+
+ dummy.clear();
+
+ size = u_size(rng);
+ offset = u_offset(rng);
+ os->read(ch, obj2, offset, size, dummy);
}
- os->queue_transactions(ch, tls);
- os->verify_objects(ch);
return 0;
}
};
// ---------- Object -----------
void ObjectStoreImitator::Object::punch_hole(uint64_t offset, uint64_t length,
- uint64_t min_alloc_size,
PExtentVector &old_extents) {
if (extent_map.empty())
return;
}
}
+void ObjectStoreImitator::print_per_access_fragmentation() {
+ for (auto &[_, coll_ref] : coll_map) {
+ for (auto &[id, read_ops] : coll_ref->read_ops) {
+ unsigned blks{0}, jmps{0};
+ for (auto &op : read_ops) {
+ blks += op.blks;
+ jmps += op.jmps;
+ }
+
+ double avg_total_blks = (double)blks / read_ops.size();
+ double avg_jmps = (double)jmps / read_ops.size();
+ double avg_jmps_per_blk = (double)jmps / (double)blks;
+
+ std::cout << "Object: " << id.hobj.oid.name
+ << ", average total blks read: " << avg_total_blks
+ << ", average total jumps: " << avg_jmps
+ << ", average jumps per block: " << avg_jmps_per_blk
+ << std::endl;
+ }
+ }
+}
+
// ------- Transactions -------
int ObjectStoreImitator::queue_transactions(CollectionHandle &ch,
int ObjectStoreImitator::_do_zero(CollectionRef &c, ObjectRef &o,
uint64_t offset, size_t length) {
PExtentVector old_extents;
- o->punch_hole(offset, length, min_alloc_size, old_extents);
+ o->punch_hole(offset, length, old_extents);
alloc->release(old_extents);
return 0;
}
int ObjectStoreImitator::_do_read(Collection *c, ObjectRef &o, uint64_t offset,
- size_t len, ceph::buffer::list &bl,
+ size_t length, ceph::buffer::list &bl,
uint32_t op_flags, uint64_t retry_count) {
- auto data = std::string(len, 'a');
+ auto data = std::string(length, 'a');
bl.append(data);
+
+ // Keeping track of read ops to evaluate per-access fragmentation
+ ReadOp op(offset, length);
+ bluestore_pextent_t last_ext;
+ uint64_t end = length + offset;
+
+ auto it = o->extent_map.lower_bound(offset);
+ if ((it == o->extent_map.end() || it->first > offset) &&
+ it != o->extent_map.begin()) {
+ it = std::prev(it);
+
+ auto diff = offset - it->first;
+ if (diff < it->second.length) {
+ // end not in this extent
+ if (end > it->first + it->second.length) {
+ op.blks += div_round_up(it->second.length - diff, min_alloc_size);
+ } else { // end is within this extent so we take up the entire length
+ op.blks += div_round_up(length, min_alloc_size);
+ }
+
+ last_ext = it->second;
+ it++;
+ }
+ }
+
+ while (it != o->extent_map.end() && it->first < end) {
+ auto extent = it->second;
+ if (last_ext.length > 0 &&
+ last_ext.offset + last_ext.length != extent.offset) {
+ op.jmps++;
+ }
+
+ if (extent.length > length) {
+ op.blks += div_round_up(length, min_alloc_size);
+ break;
+ }
+
+ op.blks += div_round_up(extent.length, min_alloc_size);
+ length -= extent.length;
+ it++;
+ }
+
+ c->read_ops[o->oid].push_back(op);
+ // std::cout << "blks: " << op.blks << ", jmps: " << op.jmps
+ // << ", offset: " << op.offset << ", length: " << op.length
+ // << std::endl;
+
return bl.length();
}
length = p2align(length, min_alloc_size);
PExtentVector punched;
- o->punch_hole(offset, length, min_alloc_size, punched);
+ o->punch_hole(offset, length, punched);
alloc->release(punched);
// all writes will trigger an allocation
return;
PExtentVector old_extents;
- o->punch_hole(offset, o->size - offset, min_alloc_size, old_extents);
+ o->punch_hole(offset, o->size - offset, old_extents);
o->size = offset;
alloc->release(old_extents);
}
uint64_t nid_ = 0, uint64_t size_ = 0)
: c(c_), oid(oid_), exists(exists_), nid(nid_), size(size_) {}
- void punch_hole(uint64_t offset, uint64_t length, uint64_t min_alloc_size,
+ void punch_hole(uint64_t offset, uint64_t length,
PExtentVector &old_extents);
void verify_extents();
void append(PExtentVector &ext, uint64_t offset);
uint64_t ext_length();
};
-
typedef boost::intrusive_ptr<Object> ObjectRef;
+ struct ReadOp {
+ uint64_t offset;
+ uint64_t length;
+ unsigned blks;
+ unsigned
+ jmps; // # of times we have to stop iterating over continuous extents
+ ReadOp(uint64_t offset = 0, uint64_t length = 0, unsigned blks = 0,
+ unsigned jmps = 0)
+ : offset(offset), length(length), blks(blks), jmps(jmps) {}
+ };
+
struct Collection : public CollectionImpl {
bluestore_cnode_t cnode;
std::map<ghobject_t, ObjectRef> objects;
+ std::unordered_map<ghobject_t, std::vector<ReadOp>> read_ops;
ceph::shared_mutex lock = ceph::make_shared_mutex(
"FragmentationSimulator::Collection::lock", true, false);
int _clone(CollectionRef &c, ObjectRef &oldo, ObjectRef &newo);
int _clone_range(CollectionRef &c, ObjectRef &oldo, ObjectRef &newo,
uint64_t srcoff, uint64_t length, uint64_t dstoff);
- int read(CollectionHandle &c, const ghobject_t &oid, uint64_t offset,
- size_t len, ceph::buffer::list &bl, uint32_t op_flags = 0) override;
// Helpers
uint32_t fadvise_flags);
int _do_alloc_write(CollectionRef c, ObjectRef &o, bufferlist &bl,
uint64_t offset, uint64_t length);
-
void _do_truncate(CollectionRef &c, ObjectRef &o, uint64_t offset);
int _do_zero(CollectionRef &c, ObjectRef &o, uint64_t offset, size_t length);
int _do_clone_range(CollectionRef &c, ObjectRef &oldo, ObjectRef &newo,
void print_status();
void verify_objects(CollectionHandle &ch);
- // Generate metrics for per-object fragmentation, defined by:
- // frag_score = 1 - sum((size proportion of each extents / object size) ^
- // index of each extent in a vector sorted by descending length).
- // This should only be called after the generators are finished as it will
- // attempt to change an object's extents.
+ // Generate metrics for per-object fragmentation (how fragmented are each
+ // object's extents), defined by: frag_score = 1 - sum((size proportion of
+ // each extents / object size) ^ index of each extent in a vector sorted by
+ // descending length + 1). This should only be called after the generators
+ // are finished as it will attempt to change an object's extents.
void print_per_object_fragmentation();
+ // Genereate metrisc for per-access fragmentation, which is jumps/blocks read.
+ // Jumps are how many times we have to stop reading continuous extents
+ void print_per_access_fragmentation();
+
// Overrides
// This is often not called directly but through queue_transaction
int queue_transactions(CollectionHandle &ch, std::vector<Transaction> &tls,
TrackedOpRef op = TrackedOpRef(),
ThreadPool::TPHandle *handle = NULL) override;
+ int read(CollectionHandle &c, const ghobject_t &oid, uint64_t offset,
+ size_t len, ceph::buffer::list &bl, uint32_t op_flags = 0) override;
CollectionHandle open_collection(const coll_t &cid) override;
CollectionHandle create_new_collection(const coll_t &cid) override;
void set_collection_commit_queue(const coll_t &cid,