From: Sage Weil Date: Thu, 10 Dec 2015 22:15:57 +0000 (-0500) Subject: os/bluestore: support second block.wal device X-Git-Tag: v10.0.3~154^2~146 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3745afb4c82cd2cb78d9dc4baf73f4011e926ca3;p=ceph.git os/bluestore: support second block.wal device Use this device for the bluefs log. Signed-off-by: Sage Weil --- diff --git a/src/common/config_opts.h b/src/common/config_opts.h index d5f2cddcb274..36cbcdb1e500 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -850,6 +850,8 @@ OPTION(bluestore_bluefs_max_free_fs_main_ratio, OPT_FLOAT, .8) OPTION(bluestore_bluefs_min_gift_ratio, OPT_FLOAT, 1) OPTION(bluestore_block_path, OPT_STR, "") OPTION(bluestore_block_size, OPT_U64, 10 * 1024*1024*1024) // 10gb for testing +OPTION(bluestore_block_wal_path, OPT_STR, "") +OPTION(bluestore_block_wal_size, OPT_U64, 128 * 1024*1024) // 128MB for testing OPTION(bluestore_max_dir_size, OPT_U32, 1000000) OPTION(bluestore_min_alloc_size, OPT_U32, 512*1024) OPTION(bluestore_onode_map_size, OPT_U32, 1024) // onodes per collection diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 910c674f47fd..ec7179e70d4f 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -58,6 +58,11 @@ int BlueFS::add_block_device(unsigned id, string path) return 0; } +uint64_t BlueFS::get_block_device_size(unsigned id) +{ + return bdev[id]->get_size(); +} + void BlueFS::add_block_extent(unsigned id, uint64_t offset, uint64_t length) { Mutex::Locker l(lock); @@ -120,7 +125,11 @@ int BlueFS::mkfs(uint64_t super_offset_a, uint64_t super_offset_b) // init log FileRef log_file = new File; log_file->fnode.ino = 1; - _allocate(0, g_conf->bluefs_max_log_runway, &log_file->fnode.extents); + if (bdev.size() >= 2) + log_file->fnode.prefer_bdev = 1; + _allocate(log_file->fnode.prefer_bdev, + g_conf->bluefs_max_log_runway, + &log_file->fnode.extents); log_writer = new FileWriter(log_file, bdev.size()); // initial txn @@ -762,7 +771,8 @@ void BlueFS::_compact_log() vector old_extents; old_extents.swap(log_file->fnode.extents); while (log_file->fnode.get_allocated() < need) { - int r = _allocate(0, need - log_file->fnode.get_allocated(), + int r = _allocate(log_file->fnode.prefer_bdev, + need - log_file->fnode.get_allocated(), &log_file->fnode.extents); assert(r == 0); } @@ -811,7 +821,8 @@ int BlueFS::_flush_log() if (runway < g_conf->bluefs_min_log_runway) { dout(10) << __func__ << " allocating more log runway (" << runway << " remaining" << dendl; - int r = _allocate(0, g_conf->bluefs_max_log_runway, + int r = _allocate(log_writer->file->fnode.prefer_bdev, + g_conf->bluefs_max_log_runway, &log_writer->file->fnode.extents); assert(r == 0); log_t.op_file_update(log_writer->file->fnode); @@ -864,7 +875,9 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length) uint64_t allocated = h->file->fnode.get_allocated(); if (allocated < offset + length) { - int r = _allocate(0, offset + length - allocated, &h->file->fnode.extents); + int r = _allocate(h->file->fnode.prefer_bdev, + offset + length - allocated, + &h->file->fnode.extents); if (r < 0) return r; } @@ -930,7 +943,7 @@ int BlueFS::_flush_range(FileWriter *h, uint64_t offset, uint64_t length) z.zero(); t.append(z); } - bdev[0]->aio_write(p->offset + x_off, t, h->iocv[0]); + bdev[p->bdev]->aio_write(p->offset + x_off, t, h->iocv[p->bdev]); bloff += x_len; length -= x_len; ++p; @@ -1029,6 +1042,12 @@ int BlueFS::_allocate(unsigned id, uint64_t len, vector *ev) uint64_t left = ROUND_UP_TO(len, g_conf->bluefs_alloc_size); int r = alloc[id]->reserve(left); if (r < 0) { + if (id) { + derr << __func__ << " failed to allocate " << left << " on bdev " << id + << ", free " << alloc[id]->get_free() + << "; fallback to bdev 0" << dendl; + return _allocate(0, len, ev); + } derr << __func__ << " failed to allocate " << left << " on bdev " << id << ", free " << alloc[id]->get_free() << dendl; return r; @@ -1064,7 +1083,7 @@ int BlueFS::_preallocate(FileRef f, uint64_t off, uint64_t len) uint64_t allocated = f->fnode.get_allocated(); if (off + len > allocated) { uint64_t want = off + len - allocated; - int r = _allocate(0, want, &f->fnode.extents); + int r = _allocate(f->fnode.prefer_bdev, want, &f->fnode.extents); if (r < 0) return r; log_t.op_file_update(f->fnode); diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index dc22b434d527..c005493822b8 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -259,6 +259,7 @@ public: int compact(); int add_block_device(unsigned bdev, string path); + uint64_t get_block_device_size(unsigned bdev); /// gift more block space void add_block_extent(unsigned bdev, uint64_t offset, uint64_t len); diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 7aa8a6f2aad4..7eb4d756b074 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -937,6 +937,16 @@ int BlueStore::_open_db(bool create) if (create) { bluefs->add_block_extent(0, g_conf->bluestore_bluefs_initial_offset, g_conf->bluestore_bluefs_initial_length); + } + snprintf(bfn, sizeof(bfn), "%s/block.wal", path.c_str()); + struct stat st; + if (::stat(bfn, &st) == 0) { + bluefs->add_block_device(1, bfn); + if (create) { + bluefs->add_block_extent(1, 0, bluefs->get_block_device_size(1)); + } + } + if (create) { bluefs->mkfs(0, 4096); } int r = bluefs->mount(0, 4096); @@ -1244,7 +1254,7 @@ int BlueStore::mkfs() dout(1) << __func__ << " fsid is already set to " << fsid << dendl; } - // block device + // block symlink/file if (g_conf->bluestore_block_path.length()) { int r = ::symlinkat(g_conf->bluestore_block_path.c_str(), path_fd, "block"); if (r < 0) { @@ -1273,6 +1283,37 @@ int BlueStore::mkfs() } } + // block.wal symlink/file + if (g_conf->bluestore_block_wal_path.length()) { + int r = ::symlinkat(g_conf->bluestore_block_wal_path.c_str(), path_fd, + "block.wal"); + if (r < 0) { + r = -errno; + derr << __func__ << " failed to create block.wal symlink to " + << g_conf->bluestore_block_wal_path + << ": " << cpp_strerror(r) << dendl; + goto out_close_fsid; + } + } else if (g_conf->bluestore_block_wal_size) { + struct stat st; + int r = ::fstatat(path_fd, "block.wal", &st, 0); + if (r < 0) + r = -errno; + if (r == -ENOENT) { + int fd = ::openat(path_fd, "block.wal", O_CREAT|O_RDWR, 0644); + if (fd < 0) { + int r = -errno; + derr << __func__ << " faile to create block.wal file: " + << cpp_strerror(r) << dendl; + goto out_close_fsid; + } + int r = ::ftruncate(fd, g_conf->bluestore_block_wal_size); + assert(r == 0); + dout(1) << __func__ << " created block.wal file with size " + << pretty_si_t(g_conf->bluestore_block_wal_size) << "B" << dendl; + } + } + r = _open_bdev(); if (r < 0) goto out_close_fsid; diff --git a/src/os/bluestore/bluefs_types.cc b/src/os/bluestore/bluefs_types.cc index 493e7e0b711b..4102c485cfd0 100644 --- a/src/os/bluestore/bluefs_types.cc +++ b/src/os/bluestore/bluefs_types.cc @@ -127,6 +127,7 @@ void bluefs_fnode_t::encode(bufferlist& bl) const ::encode(ino, bl); ::encode(size, bl); ::encode(mtime, bl); + ::encode(prefer_bdev, bl); ::encode(extents, bl); ENCODE_FINISH(bl); } @@ -137,6 +138,7 @@ void bluefs_fnode_t::decode(bufferlist::iterator& p) ::decode(ino, p); ::decode(size, p); ::decode(mtime, p); + ::decode(prefer_bdev, p); ::decode(extents, p); DECODE_FINISH(p); } @@ -146,6 +148,7 @@ void bluefs_fnode_t::dump(Formatter *f) const f->dump_unsigned("ino", ino); f->dump_unsigned("size", size); f->dump_stream("mtime") << mtime; + f->dump_unsigned("prefer_bdev", prefer_bdev); f->open_array_section("extents"); for (auto& p : extents) f->dump_object("extent", p); @@ -160,6 +163,7 @@ void bluefs_fnode_t::generate_test_instances(list& ls) ls.back()->size = 1048576; ls.back()->mtime = utime_t(123,45); ls.back()->extents.push_back(bluefs_extent_t(0, 1048576, 4096)); + ls.back()->prefer_bdev = 1; } ostream& operator<<(ostream& out, const bluefs_fnode_t& file) @@ -167,6 +171,7 @@ ostream& operator<<(ostream& out, const bluefs_fnode_t& file) return out << "file(" << file.ino << " size " << file.size << " mtime " << file.mtime + << " bdev " << (int)file.prefer_bdev << " extents " << file.extents << ")"; } diff --git a/src/os/bluestore/bluefs_types.h b/src/os/bluestore/bluefs_types.h index cb9db9e2d577..7ae6ff69de27 100644 --- a/src/os/bluestore/bluefs_types.h +++ b/src/os/bluestore/bluefs_types.h @@ -33,9 +33,10 @@ struct bluefs_fnode_t { uint64_t ino; uint64_t size; utime_t mtime; + uint8_t prefer_bdev; vector extents; - bluefs_fnode_t() : ino(0), size(0) {} + bluefs_fnode_t() : ino(0), size(0), prefer_bdev(0) {} uint64_t get_allocated() const { uint64_t r = 0;