From 03362796396a9124fa68cf12707288437836c592 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 29 Dec 2015 13:43:03 -0500 Subject: [PATCH] os/bluestore/BlueFS: delay IOContext dtor until after io completes It's possible for the IO to be in flight when the caller closes the writer handle (although dangerous of them). Queue the IOContext for async cleanup when we sync everything to disk. Signed-off-by: Sage Weil --- src/os/bluestore/BlueFS.cc | 32 +++++++++++++++++++++++++++++--- src/os/bluestore/BlueFS.h | 13 ++++++++++--- src/os/bluestore/BlueRocksEnv.cc | 2 +- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 3b4ce730578d4..ad2c1dd34aac5 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -176,7 +176,7 @@ int BlueFS::mkfs(uuid_d osd_uuid) // clean up super = bluefs_super_t(); - delete log_writer; + _close_writer(log_writer); log_writer = NULL; block_all.clear(); _stop_alloc(); @@ -251,10 +251,17 @@ int BlueFS::mount() void BlueFS::umount() { dout(1) << __func__ << dendl; + sync_metadata(); - delete log_writer; + _close_writer(log_writer); log_writer = NULL; + // manually clean up it's iocs + for (auto p : ioc_reap_queue) { + delete p; + } + ioc_reap_queue.clear(); + block_all.clear(); _stop_alloc(); file_map.clear(); @@ -791,7 +798,7 @@ void BlueFS::_compact_log() assert(r == 0); } - delete log_writer; + _close_writer(log_writer); log_file->fnode.size = bl.length(); log_writer = new FileWriter(log_file, bdev.size()); @@ -1123,6 +1130,8 @@ void BlueFS::sync_metadata() } dout(10) << __func__ << dendl; utime_t start = ceph_clock_now(NULL); + vector iocv; + iocv.swap(ioc_reap_queue); for (auto p : alloc) { p->commit_start(); } @@ -1130,6 +1139,9 @@ void BlueFS::sync_metadata() for (auto p : alloc) { p->commit_finish(); } + for (auto p : iocv) { + delete p; + } utime_t end = ceph_clock_now(NULL); utime_t dur = end - start; dout(10) << __func__ << " done in " << dur << dendl; @@ -1212,6 +1224,20 @@ int BlueFS::open_for_write( return 0; } +void BlueFS::_close_writer(FileWriter *h) +{ + dout(10) << __func__ << " " << h << dendl; + for (auto i : h->iocv) { + if (i->has_aios()) { + ioc_reap_queue.push_back(i); + } else { + delete i; + } + } + h->iocv.clear(); + delete h; +} + int BlueFS::open_for_read( const string& dirname, const string& filename, diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 9d3382f00a069..85b6638b9a22f 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -82,9 +82,7 @@ public: } ~FileWriter() { file->num_writers.dec(); - for (auto p : iocv) { - delete p; - } + assert(iocv.empty()); // caller must call BlueFS::close_writer() } void append(const char *buf, size_t len) { @@ -182,6 +180,8 @@ private: vector > block_all; ///< extents in bdev we own vector alloc; ///< allocators for bdevs + vector ioc_reap_queue; ///< iocs from closed writers + void _init_alloc(); void _stop_alloc(); @@ -221,6 +221,8 @@ private: int _write_super(); int _replay(); ///< replay journal + void _close_writer(FileWriter *h); + // always put the super in the second 4k block. FIXME should this be // block size independent? unsigned get_super_offset() { @@ -260,6 +262,11 @@ public: FileReader **h, bool random = false); + void close_writer(FileWriter *h) { + Mutex::Locker l(lock); + _close_writer(h); + } + int rename(const string& old_dir, const string& old_file, const string& new_dir, const string& new_file); diff --git a/src/os/bluestore/BlueRocksEnv.cc b/src/os/bluestore/BlueRocksEnv.cc index a7659dc79f6e3..857472fa93918 100644 --- a/src/os/bluestore/BlueRocksEnv.cc +++ b/src/os/bluestore/BlueRocksEnv.cc @@ -156,7 +156,7 @@ class BlueRocksWritableFile : public rocksdb::WritableFile { public: BlueRocksWritableFile(BlueFS *fs, BlueFS::FileWriter *h) : fs(fs), h(h) {} ~BlueRocksWritableFile() { - delete h; + fs->close_writer(h); } // Indicates if the class makes use of unbuffered I/O -- 2.39.5