Use this device for the bluefs log.
Signed-off-by: Sage Weil <sage@redhat.com>
OPTION(bluestore_bluefs_min_gift_ratio, OPT_FLOAT, 1)
OPTION(bluestore_block_path, OPT_STR, "")
OPTION(bluestore_block_size, OPT_U64, 10 * 1024*1024*1024) // 10gb for testing
+OPTION(bluestore_block_wal_path, OPT_STR, "")
+OPTION(bluestore_block_wal_size, OPT_U64, 128 * 1024*1024) // 128MB for testing
OPTION(bluestore_max_dir_size, OPT_U32, 1000000)
OPTION(bluestore_min_alloc_size, OPT_U32, 512*1024)
OPTION(bluestore_onode_map_size, OPT_U32, 1024) // onodes per collection
return 0;
}
+uint64_t BlueFS::get_block_device_size(unsigned id)
+{
+ return bdev[id]->get_size();
+}
+
void BlueFS::add_block_extent(unsigned id, uint64_t offset, uint64_t length)
{
Mutex::Locker l(lock);
// init log
FileRef log_file = new File;
log_file->fnode.ino = 1;
- _allocate(0, g_conf->bluefs_max_log_runway, &log_file->fnode.extents);
+ if (bdev.size() >= 2)
+ log_file->fnode.prefer_bdev = 1;
+ _allocate(log_file->fnode.prefer_bdev,
+ g_conf->bluefs_max_log_runway,
+ &log_file->fnode.extents);
log_writer = new FileWriter(log_file, bdev.size());
// initial txn
vector<bluefs_extent_t> old_extents;
old_extents.swap(log_file->fnode.extents);
while (log_file->fnode.get_allocated() < need) {
- int r = _allocate(0, need - log_file->fnode.get_allocated(),
+ int r = _allocate(log_file->fnode.prefer_bdev,
+ need - log_file->fnode.get_allocated(),
&log_file->fnode.extents);
assert(r == 0);
}
if (runway < g_conf->bluefs_min_log_runway) {
dout(10) << __func__ << " allocating more log runway ("
<< runway << " remaining" << dendl;
- int r = _allocate(0, g_conf->bluefs_max_log_runway,
+ int r = _allocate(log_writer->file->fnode.prefer_bdev,
+ g_conf->bluefs_max_log_runway,
&log_writer->file->fnode.extents);
assert(r == 0);
log_t.op_file_update(log_writer->file->fnode);
uint64_t allocated = h->file->fnode.get_allocated();
if (allocated < offset + length) {
- int r = _allocate(0, offset + length - allocated, &h->file->fnode.extents);
+ int r = _allocate(h->file->fnode.prefer_bdev,
+ offset + length - allocated,
+ &h->file->fnode.extents);
if (r < 0)
return r;
}
z.zero();
t.append(z);
}
- bdev[0]->aio_write(p->offset + x_off, t, h->iocv[0]);
+ bdev[p->bdev]->aio_write(p->offset + x_off, t, h->iocv[p->bdev]);
bloff += x_len;
length -= x_len;
++p;
uint64_t left = ROUND_UP_TO(len, g_conf->bluefs_alloc_size);
int r = alloc[id]->reserve(left);
if (r < 0) {
+ if (id) {
+ derr << __func__ << " failed to allocate " << left << " on bdev " << id
+ << ", free " << alloc[id]->get_free()
+ << "; fallback to bdev 0" << dendl;
+ return _allocate(0, len, ev);
+ }
derr << __func__ << " failed to allocate " << left << " on bdev " << id
<< ", free " << alloc[id]->get_free() << dendl;
return r;
uint64_t allocated = f->fnode.get_allocated();
if (off + len > allocated) {
uint64_t want = off + len - allocated;
- int r = _allocate(0, want, &f->fnode.extents);
+ int r = _allocate(f->fnode.prefer_bdev, want, &f->fnode.extents);
if (r < 0)
return r;
log_t.op_file_update(f->fnode);
int compact();
int add_block_device(unsigned bdev, string path);
+ uint64_t get_block_device_size(unsigned bdev);
/// gift more block space
void add_block_extent(unsigned bdev, uint64_t offset, uint64_t len);
if (create) {
bluefs->add_block_extent(0, g_conf->bluestore_bluefs_initial_offset,
g_conf->bluestore_bluefs_initial_length);
+ }
+ snprintf(bfn, sizeof(bfn), "%s/block.wal", path.c_str());
+ struct stat st;
+ if (::stat(bfn, &st) == 0) {
+ bluefs->add_block_device(1, bfn);
+ if (create) {
+ bluefs->add_block_extent(1, 0, bluefs->get_block_device_size(1));
+ }
+ }
+ if (create) {
bluefs->mkfs(0, 4096);
}
int r = bluefs->mount(0, 4096);
dout(1) << __func__ << " fsid is already set to " << fsid << dendl;
}
- // block device
+ // block symlink/file
if (g_conf->bluestore_block_path.length()) {
int r = ::symlinkat(g_conf->bluestore_block_path.c_str(), path_fd, "block");
if (r < 0) {
}
}
+ // block.wal symlink/file
+ if (g_conf->bluestore_block_wal_path.length()) {
+ int r = ::symlinkat(g_conf->bluestore_block_wal_path.c_str(), path_fd,
+ "block.wal");
+ if (r < 0) {
+ r = -errno;
+ derr << __func__ << " failed to create block.wal symlink to "
+ << g_conf->bluestore_block_wal_path
+ << ": " << cpp_strerror(r) << dendl;
+ goto out_close_fsid;
+ }
+ } else if (g_conf->bluestore_block_wal_size) {
+ struct stat st;
+ int r = ::fstatat(path_fd, "block.wal", &st, 0);
+ if (r < 0)
+ r = -errno;
+ if (r == -ENOENT) {
+ int fd = ::openat(path_fd, "block.wal", O_CREAT|O_RDWR, 0644);
+ if (fd < 0) {
+ int r = -errno;
+ derr << __func__ << " faile to create block.wal file: "
+ << cpp_strerror(r) << dendl;
+ goto out_close_fsid;
+ }
+ int r = ::ftruncate(fd, g_conf->bluestore_block_wal_size);
+ assert(r == 0);
+ dout(1) << __func__ << " created block.wal file with size "
+ << pretty_si_t(g_conf->bluestore_block_wal_size) << "B" << dendl;
+ }
+ }
+
r = _open_bdev();
if (r < 0)
goto out_close_fsid;
::encode(ino, bl);
::encode(size, bl);
::encode(mtime, bl);
+ ::encode(prefer_bdev, bl);
::encode(extents, bl);
ENCODE_FINISH(bl);
}
::decode(ino, p);
::decode(size, p);
::decode(mtime, p);
+ ::decode(prefer_bdev, p);
::decode(extents, p);
DECODE_FINISH(p);
}
f->dump_unsigned("ino", ino);
f->dump_unsigned("size", size);
f->dump_stream("mtime") << mtime;
+ f->dump_unsigned("prefer_bdev", prefer_bdev);
f->open_array_section("extents");
for (auto& p : extents)
f->dump_object("extent", p);
ls.back()->size = 1048576;
ls.back()->mtime = utime_t(123,45);
ls.back()->extents.push_back(bluefs_extent_t(0, 1048576, 4096));
+ ls.back()->prefer_bdev = 1;
}
ostream& operator<<(ostream& out, const bluefs_fnode_t& file)
return out << "file(" << file.ino
<< " size " << file.size
<< " mtime " << file.mtime
+ << " bdev " << (int)file.prefer_bdev
<< " extents " << file.extents
<< ")";
}
uint64_t ino;
uint64_t size;
utime_t mtime;
+ uint8_t prefer_bdev;
vector<bluefs_extent_t> extents;
- bluefs_fnode_t() : ino(0), size(0) {}
+ bluefs_fnode_t() : ino(0), size(0), prefer_bdev(0) {}
uint64_t get_allocated() const {
uint64_t r = 0;