From 21ac4f918cef16ec5b3d59d45077353795deadaf Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Fri, 30 Jul 2021 14:02:26 +0300 Subject: [PATCH] os/bluefs: allow incremental file metadata updates in bluefs log Signed-off-by: Adam Kupczyk (cherry picked from commit 821aadaaa92b4aa5920cd4bbbb59024ab55ea5d7) --- src/os/bluestore/BlueFS.cc | 72 +++++++++++++++++++++++++++++--- src/os/bluestore/bluefs_types.cc | 37 ++++++++++++++++ src/os/bluestore/bluefs_types.h | 51 +++++++++++++++++++++- 3 files changed, 152 insertions(+), 8 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 2d0dc36ee22ae..8b9ced8e91cdd 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -1426,6 +1426,61 @@ int BlueFS::_replay(bool noop, bool to_stdout) } } break; + case bluefs_transaction_t::OP_FILE_UPDATE_INC: + { + bluefs_fnode_delta_t delta; + decode(delta, p); + dout(20) << __func__ << " 0x" << std::hex << pos << std::dec + << ": op_file_update_inc " << " " << delta << " " << dendl; + if (unlikely(to_stdout)) { + std::cout << " 0x" << std::hex << pos << std::dec + << ": op_file_update_inc " << " " << delta << std::endl; + } + if (!noop) { + FileRef f = _get_file(delta.ino); + bluefs_fnode_t& fnode = f->fnode; + if (delta.offset != fnode.allocated) { + derr << __func__ << " invalid op_file_update_inc, new extents miss end of file" + << " fnode=" << fnode + << " delta=" << delta + << dendl; + ceph_assert(delta.offset == fnode.allocated); + } + if (cct->_conf->bluefs_log_replay_check_allocations) { + int r = _check_allocations(fnode, + used_blocks, false, "OP_FILE_UPDATE_INC"); + if (r < 0) { + return r; + } + } + + fnode.ino = delta.ino; + fnode.mtime = delta.mtime; + if (fnode.ino != 1) { + vselector->sub_usage(f->vselector_hint, fnode); + } + fnode.size = delta.size; + fnode.claim_extents(delta.extents); + dout(20) << __func__ << " 0x" << std::hex << pos << std::dec + << ": op_file_update_inc produced " << " " << fnode << " " << dendl; + + if (fnode.ino != 1) { + vselector->add_usage(f->vselector_hint, fnode); + } + + if (fnode.ino > ino_last) { + ino_last = fnode.ino; + } + if (cct->_conf->bluefs_log_replay_check_allocations) { + int r = _check_allocations(f->fnode, + used_blocks, true, "OP_FILE_UPDATE_INC"); + if (r < 0) { + return r; + } + } + } + } + break; case bluefs_transaction_t::OP_FILE_REMOVE: { @@ -2224,6 +2279,8 @@ void BlueFS::_rewrite_log_and_layout_sync(bool allocate_with_fallback, _close_writer(log_writer); + // we will write it to super + log_file->fnode.reset_delta(); log_file->fnode.size = bl.length(); vselector->sub_usage(log_file->vselector_hint, old_fnode); vselector->add_usage(log_file->vselector_hint, log_file->fnode); @@ -2404,6 +2461,8 @@ void BlueFS::_compact_log_async(std::unique_lock& l) new_log->fnode.append_extent(*from); ++from; } + // we will write it to super + new_log->fnode.reset_delta(); // clear the extents from old log file, they are added to new log log_file->fnode.clear_extents(); @@ -2492,8 +2551,8 @@ int BlueFS::_flush_and_sync_log(std::unique_lock& l, if (lsi != dirty_files.end()) { dout(20) << __func__ << " " << lsi->second.size() << " dirty_files" << dendl; for (auto &f : lsi->second) { - dout(20) << __func__ << " op_file_update " << f.fnode << dendl; - log_t.op_file_update(f.fnode); + dout(20) << __func__ << " op_file_update_inc " << f.fnode << dendl; + log_t.op_file_update_inc(f.fnode); } } @@ -2519,7 +2578,7 @@ int BlueFS::_flush_and_sync_log(std::unique_lock& l, &log_writer->file->fnode); ceph_assert(r == 0); vselector->add_usage(log_writer->file->vselector_hint, log_writer->file->fnode); - log_t.op_file_update(log_writer->file->fnode); + log_t.op_file_update_inc(log_writer->file->fnode); just_expanded_log = true; } @@ -2933,7 +2992,8 @@ int BlueFS::_truncate(FileWriter *h, uint64_t offset) vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size); h->file->fnode.size = offset; vselector->add_usage(h->file->vselector_hint, h->file->fnode.size); - log_t.op_file_update(h->file->fnode); + + log_t.op_file_update_inc(h->file->fnode); return 0; } @@ -3125,15 +3185,15 @@ int BlueFS::_preallocate(FileRef f, uint64_t off, uint64_t len) uint64_t allocated = f->fnode.get_allocated(); if (off + len > allocated) { uint64_t want = off + len - allocated; - vselector->sub_usage(f->vselector_hint, f->fnode); + vselector->sub_usage(f->vselector_hint, f->fnode); int r = _allocate(vselector->select_prefer_bdev(f->vselector_hint), want, &f->fnode); vselector->add_usage(f->vselector_hint, f->fnode); if (r < 0) return r; - log_t.op_file_update(f->fnode); + log_t.op_file_update_inc(f->fnode); } return 0; } diff --git a/src/os/bluestore/bluefs_types.cc b/src/os/bluestore/bluefs_types.cc index 4a2a8152c2d7b..3a812cf5f018d 100644 --- a/src/os/bluestore/bluefs_types.cc +++ b/src/os/bluestore/bluefs_types.cc @@ -147,6 +147,31 @@ mempool::bluefs::vector::iterator bluefs_fnode_t::seek( return p; } +bluefs_fnode_delta_t* bluefs_fnode_t::make_delta(bluefs_fnode_delta_t* delta) { + ceph_assert(delta); + delta->ino = ino; + delta->size = size; + delta->mtime = mtime; + delta->offset = allocated_commited; + delta->extents.clear(); + if (allocated_commited < allocated) { + uint64_t x_off = 0; + auto p = seek(allocated_commited, &x_off); + ceph_assert(p != extents.end()); + if (x_off > 0) { + ceph_assert(x_off < p->length); + delta->extents.emplace_back(p->bdev, p->offset + x_off, p->length - x_off); + ++p; + } + while (p != extents.end()) { + delta->extents.push_back(*p); + ++p; + } + reset_delta(); + } + return delta; +} + void bluefs_fnode_t::dump(Formatter *f) const { f->dump_unsigned("ino", ino); @@ -175,10 +200,22 @@ ostream& operator<<(ostream& out, const bluefs_fnode_t& file) << " size 0x" << std::hex << file.size << std::dec << " mtime " << file.mtime << " allocated " << std::hex << file.allocated << std::dec + << " alloc_commit " << std::hex << file.allocated_commited << std::dec << " extents " << file.extents << ")"; } +// bluefs_fnode_delta_t + +std::ostream& operator<<(std::ostream& out, const bluefs_fnode_delta_t& delta) +{ + return out << "delta(ino " << delta.ino + << " size 0x" << std::hex << delta.size << std::dec + << " mtime " << delta.mtime + << " offset " << std::hex << delta.offset << std::dec + << " extents " << delta.extents + << ")"; +} // bluefs_transaction_t diff --git a/src/os/bluestore/bluefs_types.h b/src/os/bluestore/bluefs_types.h index e1cb0d8e4a958..b53000188ae77 100644 --- a/src/os/bluestore/bluefs_types.h +++ b/src/os/bluestore/bluefs_types.h @@ -35,6 +35,29 @@ WRITE_CLASS_DENC(bluefs_extent_t) std::ostream& operator<<(std::ostream& out, const bluefs_extent_t& e); +struct bluefs_fnode_delta_t { + uint64_t ino; + uint64_t size; + utime_t mtime; + uint64_t offset; // Contains offset in file of extents. + // Equal to 'allocated' when created. + // Used for consistency checking. + mempool::bluefs::vector extents; + + DENC(bluefs_fnode_delta_t, v, p) { + DENC_START(1, 1, p); + denc_varint(v.ino, p); + denc_varint(v.size, p); + denc(v.mtime, p); + denc(v.offset, p); + denc(v.extents, p); + DENC_FINISH(p); + } +}; +WRITE_CLASS_DENC(bluefs_fnode_delta_t) + +std::ostream& operator<<(std::ostream& out, const bluefs_fnode_delta_t& delta); + struct bluefs_fnode_t { uint64_t ino; uint64_t size; @@ -47,8 +70,9 @@ struct bluefs_fnode_t { mempool::bluefs::vector extents_index; uint64_t allocated; + uint64_t allocated_commited; - bluefs_fnode_t() : ino(0), size(0), __unused__(0), allocated(0) {} + bluefs_fnode_t() : ino(0), size(0), __unused__(0), allocated(0), allocated_commited(0) {} uint64_t get_allocated() const { return allocated; @@ -61,6 +85,7 @@ struct bluefs_fnode_t { extents_index.emplace_back(allocated); allocated += p.length; } + allocated_commited = allocated; } DENC_HELPERS @@ -87,6 +112,15 @@ struct bluefs_fnode_t { DENC_FINISH(p); } + void reset_delta() { + allocated_commited = allocated; + } + void claim_extents(mempool::bluefs::vector& extents) { + for (const auto& p : extents) { + append_extent(p); + } + extents.clear(); + } void append_extent(const bluefs_extent_t& ext) { if (!extents.empty() && extents.back().end() == ext.offset && @@ -114,15 +148,18 @@ struct bluefs_fnode_t { other.extents.swap(extents); other.extents_index.swap(extents_index); std::swap(allocated, other.allocated); + std::swap(allocated_commited, other.allocated_commited); } void clear_extents() { extents_index.clear(); extents.clear(); allocated = 0; + allocated_commited = 0; } mempool::bluefs::vector::iterator seek( uint64_t off, uint64_t *x_off); + bluefs_fnode_delta_t* make_delta(bluefs_fnode_delta_t* delta); void dump(ceph::Formatter *f) const; static void generate_test_instances(std::list& ls); @@ -195,6 +232,7 @@ struct bluefs_transaction_t { OP_FILE_REMOVE, ///< remove file (ino) OP_JUMP, ///< jump the seq # and offset OP_JUMP_SEQ, ///< jump the seq # + OP_FILE_UPDATE_INC, ///< incremental update file metadata (file) } op_t; uuid_d uuid; ///< fs uuid @@ -237,10 +275,19 @@ struct bluefs_transaction_t { encode(dir, op_bl); encode(file, op_bl); } - void op_file_update(const bluefs_fnode_t& file) { + void op_file_update(bluefs_fnode_t& file) { using ceph::encode; encode((__u8)OP_FILE_UPDATE, op_bl); encode(file, op_bl); + file.reset_delta(); + } + /* streams update to bufferlist and clears update state */ + void op_file_update_inc(bluefs_fnode_t& file) { + using ceph::encode; + bluefs_fnode_delta_t delta; + file.make_delta(&delta); //also resets delta to zero + encode((__u8)OP_FILE_UPDATE_INC, op_bl); + encode(delta, op_bl); } void op_file_remove(uint64_t ino) { using ceph::encode; -- 2.39.5