From: Adam Kupczyk Date: Mon, 28 Jun 2021 10:56:36 +0000 (+0200) Subject: os/bluestore/bluefs: Rename functions to reflect lock that are used X-Git-Tag: v17.1.0~92^2~15 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=dc787693ed500161c8923787a699ae117423cbbc;p=ceph.git os/bluestore/bluefs: Rename functions to reflect lock that are used This is modification that only changes names of functions, so tracking of potential deadlocks is simpler. All internal functions start with _. Signed-off-by: Adam Kupczyk --- diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 7bf0590ced7..64a0535aade 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -538,7 +538,7 @@ void BlueFS::dump_block_extents(ostream& out) int BlueFS::get_block_extents(unsigned id, interval_set *extents) { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " bdev " << id << dendl; ceph_assert(id < alloc.size()); for (auto& p : nodes.file_map) { @@ -591,13 +591,13 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout) ceph_assert(log.seq_live == 1); log.t.seq = 1; log.t.op_init(); - flush_and_sync_log(); + _flush_and_sync_log_LD(); // write supers super.log_fnode = log_file->fnode; super.memorized_layout = layout; _write_super(BDEV_DB); - flush_bdev(); + _flush_bdev(); // clean up super = bluefs_super_t(); @@ -983,7 +983,7 @@ int BlueFS::prepare_new_device(int id, const bluefs_layout_t& layout) new_log_dev_cur = BDEV_NEWDB; new_log_dev_next = BDEV_DB; } - rewrite_log_and_layout_sync(false, + _rewrite_log_and_layout_sync_LN_LD(false, BDEV_NEWDB, new_log_dev_cur, new_log_dev_next, @@ -991,7 +991,7 @@ int BlueFS::prepare_new_device(int id, const bluefs_layout_t& layout) layout); //} } else if(id == BDEV_NEWWAL) { - rewrite_log_and_layout_sync(false, + _rewrite_log_and_layout_sync_LN_LD(false, BDEV_DB, BDEV_NEWWAL, BDEV_WAL, @@ -1206,7 +1206,7 @@ int BlueFS::_replay(bool noop, bool to_stdout) int r = _read(log_reader, read_pos, super.block_size, &bl, NULL); if (r != (int)super.block_size && cct->_conf->bluefs_replay_recovery) { - r += do_replay_recovery_read(log_reader, pos, read_pos + r, super.block_size - r, &bl); + r += _do_replay_recovery_read(log_reader, pos, read_pos + r, super.block_size - r, &bl); } assert(r == (int)super.block_size); read_pos += r; @@ -1266,7 +1266,7 @@ int BlueFS::_replay(bool noop, bool to_stdout) << ", which is past eof" << dendl; if (cct->_conf->bluefs_replay_recovery) { //try to search for more data - r += do_replay_recovery_read(log_reader, pos, read_pos + r, more - r, &t); + r += _do_replay_recovery_read(log_reader, pos, read_pos + r, more - r, &t); if (r < (int)more) { //in normal mode we must read r==more, for recovery it is too strict break; @@ -1830,7 +1830,7 @@ int BlueFS::device_migrate_to_existing( new_log_dev_next; } - rewrite_log_and_layout_sync( + _rewrite_log_and_layout_sync_LN_LD( false, (flags & REMOVE_DB) ? BDEV_SLOW : BDEV_DB, new_log_dev_cur, @@ -1965,7 +1965,7 @@ int BlueFS::device_migrate_to_new( BDEV_DB : BDEV_SLOW; - rewrite_log_and_layout_sync( + _rewrite_log_and_layout_sync_LN_LD( false, super_dev, new_log_dev_cur, @@ -1990,7 +1990,7 @@ BlueFS::FileRef BlueFS::_get_file(uint64_t ino) } } -void BlueFS::_drop_link(FileRef file) +void BlueFS::_drop_link_D(FileRef file) { dout(20) << __func__ << " had refs " << file->refs << " on " << file->fnode << dendl; @@ -2262,9 +2262,9 @@ void BlueFS::invalidate_cache(FileRef f, uint64_t offset, uint64_t length) } } -uint64_t BlueFS::estimate_log_size() +uint64_t BlueFS::_estimate_log_size_N() { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); int avg_dir_size = 40; // fixme int avg_file_size = 12; uint64_t size = 4096 * 2; @@ -2278,14 +2278,14 @@ void BlueFS::compact_log() { if (!cct->_conf->bluefs_replay_recovery_disable_compact) { if (cct->_conf->bluefs_compact_log_sync) { - compact_log_sync(); + _compact_log_sync_LN_LD(); } else { - compact_log_async(); + _compact_log_async_LD_NF_D(); } } } -bool BlueFS::should_start_compact_log() +bool BlueFS::_should_start_compact_log_L_N() { if (log_is_compacting.load() == true) { // compaction is already running @@ -2293,10 +2293,10 @@ bool BlueFS::should_start_compact_log() } uint64_t current; { - std::lock_guard dirl(log.lock); + std::lock_guard ll(log.lock); current = log.writer->file->fnode.size; } - uint64_t expected = estimate_log_size(); + uint64_t expected = _estimate_log_size_N(); float ratio = (float)current / (float)expected; dout(10) << __func__ << " current 0x" << std::hex << current << " expected " << expected << std::dec @@ -2309,10 +2309,10 @@ bool BlueFS::should_start_compact_log() return true; } -void BlueFS::compact_log_dump_metadata(bluefs_transaction_t *t, +void BlueFS::_compact_log_dump_metadata_N(bluefs_transaction_t *t, int flags) { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); t->seq = 1; t->uuid = super.uuid; @@ -2360,12 +2360,12 @@ void BlueFS::compact_log_dump_metadata(bluefs_transaction_t *t, } } -void BlueFS::compact_log_sync() +void BlueFS::_compact_log_sync_LN_LD() { dout(10) << __func__ << dendl; auto prefer_bdev = vselector->select_prefer_bdev(log.writer->file->vselector_hint); - rewrite_log_and_layout_sync(true, + _rewrite_log_and_layout_sync_LN_LD(true, BDEV_DB, prefer_bdev, prefer_bdev, @@ -2374,7 +2374,7 @@ void BlueFS::compact_log_sync() logger->inc(l_bluefs_log_compactions); } -void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, +void BlueFS::_rewrite_log_and_layout_sync_LN_LD(bool allocate_with_fallback, int super_dev, int log_dev, int log_dev_new, @@ -2400,7 +2400,7 @@ void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, << " flags:" << flags << dendl; bluefs_transaction_t t; - compact_log_dump_metadata(&t, flags); + _compact_log_dump_metadata_N(&t, flags); dout(20) << __func__ << " op_jump_seq " << log.seq_live << dendl; t.op_jump_seq(log.seq_live); @@ -2446,11 +2446,11 @@ void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, if (!cct->_conf->bluefs_sync_write) { list completed_ios; _claim_completed_aios(log.writer, &completed_ios); - wait_for_aio(log.writer); + _wait_for_aio(log.writer); completed_ios.clear(); } #endif - flush_bdev(); + _flush_bdev(); super.memorized_layout = layout; super.log_fnode = log_file->fnode; @@ -2465,7 +2465,7 @@ void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, ++super.version; _write_super(super_dev); - flush_bdev(); + _flush_bdev(); dout(10) << __func__ << " release old log extents " << old_fnode.extents << dendl; std::lock_guard dl(dirty.lock); @@ -2497,7 +2497,7 @@ void BlueFS::rewrite_log_and_layout_sync(bool allocate_with_fallback, * 8. Release the old log space. Clean up. */ -void BlueFS::compact_log_async() +void BlueFS::_compact_log_async_LD_NF_D() //also locks FW for new_writer { dout(10) << __func__ << dendl; // only one compaction allowed at one time @@ -2553,8 +2553,8 @@ void BlueFS::compact_log_async() // we need to flush all bdev because we will be streaming all dirty files to log // TODO - think - if _flush_and_sync_log_jump will not add dirty files nor release pending allocations // then flush_bdev() will not be necessary - flush_bdev(); - _flush_and_sync_log_jump(old_log_jump_to, runway); + _flush_bdev(); + _flush_and_sync_log_jump_D(old_log_jump_to, runway); log.lock.unlock(); // out of jump section - now log can be used to write to @@ -2564,7 +2564,7 @@ void BlueFS::compact_log_async() //this needs files lock //what will happen, if a file is modified *twice* before we stream it to log? //the later state that we capture will be seen earlier and replay will see a temporary retraction (!) - compact_log_dump_metadata(&t, 0); + _compact_log_dump_metadata_N(&t, 0); uint64_t max_alloc_size = std::max(alloc_size[BDEV_WAL], std::max(alloc_size[BDEV_DB], @@ -2595,6 +2595,7 @@ void BlueFS::compact_log_async() new_log_writer = _create_writer(new_log); new_log_writer->append(bl); + new_log->lock.lock(); new_log_writer->lock.lock(); // 3. flush r = _flush_special(new_log_writer); @@ -2603,6 +2604,7 @@ void BlueFS::compact_log_async() // 4. wait _flush_bdev(new_log_writer); new_log_writer->lock.unlock(); + new_log->lock.unlock(); // 5. update our log fnode // discard first old_log_jump_to extents @@ -2656,7 +2658,7 @@ void BlueFS::compact_log_async() ++super.version; _write_super(BDEV_DB); - flush_bdev(); + _flush_bdev(); old_forbidden = atomic_exchange(&log_forbidden_to_expand, false); ceph_assert(old_forbidden == true); @@ -2829,9 +2831,9 @@ void BlueFS::_flush_and_sync_log_core(int64_t runway) } // Clears dirty.files up to (including) seq_stable. -void BlueFS::_clear_dirty_set_stable(uint64_t seq) +void BlueFS::_clear_dirty_set_stable_D(uint64_t seq) { - std::lock_guard lg(dirty.lock); + std::lock_guard dl(dirty.lock); // clean dirty files if (seq > dirty.seq_stable) { @@ -2889,9 +2891,8 @@ void BlueFS::_release_pending_allocations(vector>& to_rel } } -int BlueFS::flush_and_sync_log(uint64_t want_seq) +int BlueFS::_flush_and_sync_log_LD(uint64_t want_seq) { - // we synchronize writing to log, by lock to log_lock int64_t available_runway; do { log.lock.lock(); @@ -2930,7 +2931,7 @@ int BlueFS::flush_and_sync_log(uint64_t want_seq) //now log.lock is no longer needed log.lock.unlock(); - _clear_dirty_set_stable(seq); + _clear_dirty_set_stable_D(seq); _release_pending_allocations(to_release); _update_logger_stats(); @@ -2938,7 +2939,7 @@ int BlueFS::flush_and_sync_log(uint64_t want_seq) } // Flushes log and immediately adjusts log_writer pos. -int BlueFS::_flush_and_sync_log_jump(uint64_t jump_to, +int BlueFS::_flush_and_sync_log_jump_D(uint64_t jump_to, int64_t available_runway) { ceph_assert(ceph_mutex_is_locked(log.lock)); @@ -2963,7 +2964,7 @@ int BlueFS::_flush_and_sync_log_jump(uint64_t jump_to, _flush_bdev(log.writer); - _clear_dirty_set_stable(seq); + _clear_dirty_set_stable_D(seq); _release_pending_allocations(to_release); _update_logger_stats(); @@ -3013,7 +3014,7 @@ ceph::bufferlist BlueFS::FileWriter::flush_buffer( return bl; } -int BlueFS::_signal_dirty_to_log(FileWriter *h) +int BlueFS::_signal_dirty_to_log_D(FileWriter *h) { ceph_assert(ceph_mutex_is_locked(h->lock)); std::lock_guard dl(dirty.lock); @@ -3042,12 +3043,12 @@ int BlueFS::_signal_dirty_to_log(FileWriter *h) return 0; } -void BlueFS::flush_range(FileWriter *h, uint64_t offset, uint64_t length) { +void BlueFS::flush_range/*WF*/(FileWriter *h, uint64_t offset, uint64_t length) { std::unique_lock hl(h->lock); - _flush_range(h, offset, length); + _flush_range_F(h, offset, length); } -int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length) +int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length) { ceph_assert(ceph_mutex_is_locked(h->lock)); dout(10) << __func__ << " " << h << " pos 0x" << std::hex << h->pos @@ -3106,8 +3107,10 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length) int BlueFS::_flush_data(FileWriter *h, uint64_t offset, uint64_t length, bool buffered) { - //ceph_assert(ceph_mutex_is_locked(h->lock)); - //ceph_assert(ceph_mutex_is_locked(h->file->lock)); + if (h->file->fnode.ino != 1) { + ceph_assert(ceph_mutex_is_locked(h->lock)); + ceph_assert(ceph_mutex_is_locked(h->file->lock)); + } uint64_t x_off = 0; auto p = h->file->fnode.seek(offset, &x_off); ceph_assert(p != h->file->fnode.extents.end()); @@ -3197,7 +3200,7 @@ void BlueFS::_claim_completed_aios(FileWriter *h, list *ls) dout(10) << __func__ << " got " << ls->size() << " aios" << dendl; } -void BlueFS::wait_for_aio(FileWriter *h) +void BlueFS::_wait_for_aio(FileWriter *h) { // NOTE: this is safe to call without a lock, as long as our reference is // stable. @@ -3215,7 +3218,7 @@ void BlueFS::wait_for_aio(FileWriter *h) #endif -void BlueFS::append_try_flush(FileWriter *h, const char* buf, size_t len) +void BlueFS::append_try_flush/*_WFL_WFN*/(FileWriter *h, const char* buf, size_t len) { std::unique_lock hl(h->lock); size_t max_size = 1ull << 30; // cap to 1GB @@ -3231,10 +3234,10 @@ void BlueFS::append_try_flush(FileWriter *h, const char* buf, size_t len) } if (need_flush) { bool flushed = false; - int r = _flush(h, true, &flushed); + int r = _flush_F(h, true, &flushed); ceph_assert(r == 0); if (r == 0 && flushed) { - maybe_compact_log(); + _maybe_compact_log_LN_NF_LD_D(); } // make sure we've made any progress with flush hence the // loop doesn't iterate forever @@ -3247,14 +3250,14 @@ void BlueFS::flush(FileWriter *h, bool force) { std::unique_lock hl(h->lock); bool flushed = false; - int r = _flush(h, force, &flushed); + int r = _flush_F(h, force, &flushed); ceph_assert(r == 0); if (r == 0 && flushed) { - maybe_compact_log(); + _maybe_compact_log_LN_NF_LD_D(); } } -int BlueFS::_flush(FileWriter *h, bool force, bool *flushed) +int BlueFS::_flush_F(FileWriter *h, bool force, bool *flushed) { ceph_assert(ceph_mutex_is_locked(h->lock)); uint64_t length = h->get_buffer_length(); @@ -3278,7 +3281,7 @@ int BlueFS::_flush(FileWriter *h, bool force, bool *flushed) << std::hex << offset << "~" << length << std::dec << " to " << h->file->fnode << dendl; ceph_assert(h->pos <= h->file->fnode.size); - int r = _flush_range(h, offset, length); + int r = _flush_range_F(h, offset, length); if (flushed) { *flushed = true; } @@ -3293,8 +3296,7 @@ int BlueFS::_flush(FileWriter *h, bool force, bool *flushed) // smart enough to discover it on its own. int BlueFS::_flush_special(FileWriter *h) { - //ceph_assert(ceph_mutex_is_locked(h->lock)); - //ceph_assert(ceph_mutex_is_locked(h->file->lock)); + ceph_assert(h->file->fnode.ino <= 1); uint64_t length = h->get_buffer_length(); uint64_t offset = h->pos; ceph_assert(length + offset <= h->file->fnode.get_allocated()); @@ -3325,7 +3327,7 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset) ceph_abort_msg("actually this shouldn't happen"); } if (h->get_buffer_length()) { - int r = _flush(h, true); + int r = _flush_F(h, true); if (r < 0) return r; } @@ -3349,28 +3351,36 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset) int BlueFS::fsync(FileWriter *h) { std::unique_lock hl(h->lock); - dout(10) << __func__ << " " << h << " " << h->file->fnode << dendl; - int r = _flush(h, true); - if (r < 0) - return r; - if (h->file->is_dirty) { - _signal_dirty_to_log(h); - h->file->is_dirty = false; + uint64_t old_dirty_seq = 0; + { + dout(10) << __func__ << " " << h << " " << h->file->fnode << dendl; + int r = _flush_F(h, true); + if (r < 0) + return r; + _flush_bdev(h); + if (h->file->is_dirty) { + _signal_dirty_to_log_D(h); + h->file->is_dirty = false; + } + { + std::lock_guard dl(dirty.lock); + if (dirty.seq_stable < h->file->dirty_seq) { + old_dirty_seq = h->file->dirty_seq; + dout(20) << __func__ << " file metadata was dirty (" << old_dirty_seq + << ") on " << h->file->fnode << ", flushing log" << dendl; + } + } } - uint64_t old_dirty_seq = h->file->dirty_seq; - - _flush_bdev(h); - if (old_dirty_seq) { uint64_t s = log.seq_live; // AKAK !!! locks! dout(20) << __func__ << " file metadata was dirty (" << old_dirty_seq << ") on " << h->file->fnode << ", flushing log" << dendl; - flush_and_sync_log(old_dirty_seq); + _flush_and_sync_log_LD(old_dirty_seq); // AK - TODO - think - how can dirty_seq change if we are under h lock? ceph_assert(h->file->dirty_seq == 0 || // cleaned h->file->dirty_seq >= s); // or redirtied by someone else } - maybe_compact_log(); + _maybe_compact_log_LN_NF_LD_D(); return 0; } @@ -3388,14 +3398,14 @@ void BlueFS::_flush_bdev(FileWriter *h) if (!cct->_conf->bluefs_sync_write) { list completed_ios; _claim_completed_aios(h, &completed_ios); - wait_for_aio(h); + _wait_for_aio(h); completed_ios.clear(); } #endif - flush_bdev(flush_devs); + _flush_bdev(flush_devs); } -void BlueFS::flush_bdev(std::array& dirty_bdevs) +void BlueFS::_flush_bdev(std::array& dirty_bdevs) { // NOTE: this is safe to call without a lock. dout(20) << __func__ << dendl; @@ -3405,7 +3415,7 @@ void BlueFS::flush_bdev(std::array& dirty_bdevs) } } -void BlueFS::flush_bdev() +void BlueFS::_flush_bdev() { // NOTE: this is safe to call without a lock. dout(20) << __func__ << dendl; @@ -3568,24 +3578,24 @@ void BlueFS::sync_metadata(bool avoid_compact) lgeneric_subdout(cct, bluefs, 10) << __func__; start = ceph_clock_now(); *_dout << dendl; - flush_bdev(); // FIXME? - flush_and_sync_log(); + _flush_bdev(); // FIXME? + _flush_and_sync_log_LD(); dout(10) << __func__ << " done in " << (ceph_clock_now() - start) << dendl; } if (!avoid_compact) { - maybe_compact_log(); + _maybe_compact_log_LN_NF_LD_D(); } } -void BlueFS::maybe_compact_log() +void BlueFS::_maybe_compact_log_LN_NF_LD_D() { if (!cct->_conf->bluefs_replay_recovery_disable_compact && - should_start_compact_log()) { + _should_start_compact_log_L_N()) { if (cct->_conf->bluefs_compact_log_sync) { - compact_log_sync(); + _compact_log_sync_LN_LD(); } else { - compact_log_async(); + _compact_log_async_LD_NF_D(); } } } @@ -3596,7 +3606,7 @@ int BlueFS::open_for_write( FileWriter **h, bool overwrite) { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; map::iterator p = nodes.dir_map.find(dirname); DirRef dir; @@ -3743,7 +3753,7 @@ int BlueFS::open_for_read( FileReader **h, bool random) { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << (random ? " (random)":" (sequential)") << dendl; map::iterator p = nodes.dir_map.find(dirname); @@ -3772,8 +3782,8 @@ int BlueFS::rename( std::string_view old_dirname, std::string_view old_filename, std::string_view new_dirname, std::string_view new_filename) { - std::lock_guard dirl(nodes.lock); std::lock_guard ll(log.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << old_dirname << "/" << old_filename << " -> " << new_dirname << "/" << new_filename << dendl; map::iterator p = nodes.dir_map.find(old_dirname); @@ -3804,7 +3814,7 @@ int BlueFS::rename( << " already exists, unlinking" << dendl; ceph_assert(q->second != file); log.t.op_dir_unlink(new_dirname, new_filename); - _drop_link(q->second); + _drop_link_D(q->second); } dout(10) << __func__ << " " << new_dirname << "/" << new_filename << " " @@ -3820,8 +3830,8 @@ int BlueFS::rename( int BlueFS::mkdir(std::string_view dirname) { - std::lock_guard dirl(nodes.lock); std::lock_guard ll(log.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << dendl; map::iterator p = nodes.dir_map.find(dirname); if (p != nodes.dir_map.end()) { @@ -3835,8 +3845,8 @@ int BlueFS::mkdir(std::string_view dirname) int BlueFS::rmdir(std::string_view dirname) { - std::lock_guard dirl(nodes.lock); std::lock_guard ll(log.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << dendl; auto p = nodes.dir_map.find(dirname); if (p == nodes.dir_map.end()) { @@ -3855,7 +3865,7 @@ int BlueFS::rmdir(std::string_view dirname) bool BlueFS::dir_exists(std::string_view dirname) { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); map::iterator p = nodes.dir_map.find(dirname); bool exists = p != nodes.dir_map.end(); dout(10) << __func__ << " " << dirname << " = " << (int)exists << dendl; @@ -3865,7 +3875,7 @@ bool BlueFS::dir_exists(std::string_view dirname) int BlueFS::stat(std::string_view dirname, std::string_view filename, uint64_t *size, utime_t *mtime) { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; map::iterator p = nodes.dir_map.find(dirname); if (p == nodes.dir_map.end()) { @@ -3893,7 +3903,8 @@ int BlueFS::stat(std::string_view dirname, std::string_view filename, int BlueFS::lock_file(std::string_view dirname, std::string_view filename, FileLock **plock) { - std::lock_guard dirl(nodes.lock); + std::lock_guard ll(log.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; map::iterator p = nodes.dir_map.find(dirname); if (p == nodes.dir_map.end()) { @@ -3913,7 +3924,6 @@ int BlueFS::lock_file(std::string_view dirname, std::string_view filename, nodes.file_map[ino_last] = file; dir->file_map[string{filename}] = file; ++file->refs; - std::lock_guard ll(log.lock); log.t.op_file_update(file->fnode); log.t.op_dir_link(dirname, filename, file->fnode.ino); } else { @@ -3932,7 +3942,7 @@ int BlueFS::lock_file(std::string_view dirname, std::string_view filename, int BlueFS::unlock_file(FileLock *fl) { - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << fl << " on " << fl->file->fnode << dendl; ceph_assert(fl->file->locked); fl->file->locked = false; @@ -3946,7 +3956,7 @@ int BlueFS::readdir(std::string_view dirname, vector *ls) if (!dirname.empty() && dirname.back() == '/') { dirname.remove_suffix(1); } - std::lock_guard dirl(nodes.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << dendl; if (dirname.empty()) { // list dirs @@ -3974,8 +3984,8 @@ int BlueFS::readdir(std::string_view dirname, vector *ls) int BlueFS::unlink(std::string_view dirname, std::string_view filename) { - std::lock_guard dirl(nodes.lock); std::lock_guard ll(log.lock); + std::lock_guard nl(nodes.lock); dout(10) << __func__ << " " << dirname << "/" << filename << dendl; map::iterator p = nodes.dir_map.find(dirname); if (p == nodes.dir_map.end()) { @@ -3997,7 +4007,7 @@ int BlueFS::unlink(std::string_view dirname, std::string_view filename) } dir->file_map.erase(string{filename}); log.t.op_dir_unlink(dirname, filename); - _drop_link(file); + _drop_link_D(file); return 0; } @@ -4020,7 +4030,7 @@ bool BlueFS::wal_is_rotational() When we find it, we decode following bytes as extent. We read that whole extent and then check if merged with existing log part gives a proper bluefs transaction. */ -int BlueFS::do_replay_recovery_read(FileReader *log_reader, +int BlueFS::_do_replay_recovery_read(FileReader *log_reader, size_t replay_pos, size_t read_offset, size_t read_len, diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 7a955d65ff6..cd894a21a4d 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -411,7 +411,7 @@ private: FileRef _get_file(uint64_t ino); - void _drop_link(FileRef f); + void _drop_link_D(FileRef f); unsigned _get_slow_device_id() { return bdev[BDEV_SLOW] ? BDEV_SLOW : BDEV_DB; @@ -423,32 +423,32 @@ private: PExtentVector* extents); /* signal replay log to include h->file in nearest log flush */ - int _signal_dirty_to_log(FileWriter *h); - int _flush_range(FileWriter *h, uint64_t offset, uint64_t length); + int _signal_dirty_to_log_D(FileWriter *h); + int _flush_range_F(FileWriter *h, uint64_t offset, uint64_t length); int _flush_data(FileWriter *h, uint64_t offset, uint64_t length, bool buffered); - int _flush(FileWriter *h, bool force, bool *flushed = nullptr); + int _flush_F(FileWriter *h, bool force, bool *flushed = nullptr); int _flush_special(FileWriter *h); int _fsync(FileWriter *h); #ifdef HAVE_LIBAIO void _claim_completed_aios(FileWriter *h, std::list *ls); - void wait_for_aio(FileWriter *h); // safe to call without a lock + void _wait_for_aio(FileWriter *h); // safe to call without a lock #endif int64_t _maybe_extend_log(); void _extend_log(); uint64_t _log_advance_seq(); void _consume_dirty(uint64_t seq); - void clear_dirty_set_stable(uint64_t seq_stable); - void release_pending_allocations(std::vector>& to_release); + void _clear_dirty_set_stable_D(uint64_t seq_stable); + void _release_pending_allocations(std::vector>& to_release); void _flush_and_sync_log_core(int64_t available_runway); - int _flush_and_sync_log_jump(uint64_t jump_to, + int _flush_and_sync_log_jump_D(uint64_t jump_to, int64_t available_runway); - int flush_and_sync_log(uint64_t want_seq = 0); + int _flush_and_sync_log_LD(uint64_t want_seq = 0); - uint64_t estimate_log_size(); - bool should_start_compact_log(); + uint64_t _estimate_log_size_N(); + bool _should_start_compact_log_L_N(); enum { REMOVE_DB = 1, @@ -456,12 +456,12 @@ private: RENAME_SLOW2DB = 4, RENAME_DB2SLOW = 8, }; - void compact_log_dump_metadata(bluefs_transaction_t *t, + void _compact_log_dump_metadata_N(bluefs_transaction_t *t, int flags); - void compact_log_sync(); - void compact_log_async(); + void _compact_log_sync_LN_LD(); + void _compact_log_async_LD_NF_D(); - void rewrite_log_and_layout_sync(bool allocate_with_fallback, + void _rewrite_log_and_layout_sync_LN_LD(bool allocate_with_fallback, int super_dev, int log_dev, int new_log_dev, @@ -471,8 +471,8 @@ private: //void _aio_finish(void *priv); void _flush_bdev(FileWriter *h); - void flush_bdev(); // this is safe to call without a lock - void flush_bdev(std::array& dirty_bdevs); // this is safe to call without a lock + void _flush_bdev(); // this is safe to call without a lock + void _flush_bdev(std::array& dirty_bdevs); // this is safe to call without a lock int _preallocate(FileRef f, uint64_t off, uint64_t len); int _truncate(FileWriter *h, uint64_t off); @@ -592,7 +592,7 @@ public: /// sync any uncommitted state to disk void sync_metadata(bool avoid_compact); /// test and compact log, if necessary - void maybe_compact_log(); + void _maybe_compact_log_LN_NF_LD_D(); void set_volume_selector(BlueFSVolumeSelector* s) { vselector.reset(s); @@ -636,11 +636,11 @@ public: void invalidate_cache(FileRef f, uint64_t offset, uint64_t len); int preallocate(FileRef f, uint64_t offset, uint64_t len); int truncate(FileWriter *h, uint64_t offset); - int do_replay_recovery_read(FileReader *log, - size_t log_pos, - size_t read_offset, - size_t read_len, - bufferlist* bl); + int _do_replay_recovery_read(FileReader *log, + size_t log_pos, + size_t read_offset, + size_t read_len, + bufferlist* bl); size_t probe_alloc_avail(int dev, uint64_t alloc_size); @@ -710,5 +710,22 @@ public: void get_paths(const std::string& base, paths& res) const override; }; - +/** + * Directional graph of locks. + * Vertices - Locks. Edges (directed) - locking progression. + * Edge A->B exist if last taken lock was A and next taken lock is B. + * + * Column represents last lock taken. + * Row represents next lock taken. + * + * < | L | D | N | F | W + * -------------|---|---|---|---|--- + * log L | < < + * dirty D | + * nodes N | < + * File F | < + * FileWriter W | < < < + * + * Claim: Deadlock is possible IFF graph contains cycles. + */ #endif