From 1230e696c4424b1744db9972133a369a3fe2a996 Mon Sep 17 00:00:00 2001 From: Willem Jan Withagen Date: Sun, 11 Feb 2018 22:06:46 +0100 Subject: [PATCH] common: posix_fallocate on ZFS returns EINVAL But even still it would not work on any COW FS. So reorganised the code to have a common routine that in the end will allocate a file on disk if needed FileStore would not build when there was no HAVE_POSIX_FALLOCATE other than on Apple. With ceph_posix_fallocate FileStore will also fallback to manually allocating the required file. Signed-off-by: Willem Jan Withagen --- src/CMakeLists.txt | 1 + src/common/compat.cc | 78 +++++++++++++++++++++++++++++++++ src/include/compat.h | 3 ++ src/os/bluestore/BlueStore.cc | 21 +-------- src/os/filestore/FileJournal.cc | 21 +-------- 5 files changed, 85 insertions(+), 39 deletions(-) create mode 100644 src/common/compat.cc diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2bf655c7d1e..634f6de46b5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -536,6 +536,7 @@ set(libcommon_files dmclock/support/src/run_every.cc osdc/Striper.cc osdc/Objecter.cc + common/compat.cc common/Graylog.cc common/fs_types.cc common/dns_resolve.cc diff --git a/src/common/compat.cc b/src/common/compat.cc new file mode 100644 index 00000000000..18b75874ad0 --- /dev/null +++ b/src/common/compat.cc @@ -0,0 +1,78 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__linux__) +#include +#endif + +#include "include/compat.h" +#include "common/safe_io.h" + +// The type-value for a ZFS FS in fstatfs. +#define FS_ZFS_TYPE 0xde + +// On FreeBSD, ZFS fallocate always fails since it is considered impossible to +// reserve space on a COW filesystem. posix_fallocate() returns EINVAL +// Linux in this case already emulates the reservation in glibc +// In which case it is allocated manually, and still that is not a real guarantee +// that a full buffer is allocated on disk, since it could be compressed. +// To prevent this the written buffer needs to be loaded with random data. +int manual_fallocate(int fd, off_t offset, off_t len) { + int r = lseek(fd, offset, SEEK_SET); + if (r == -1) + return errno; + char data[1024*128]; + // TODO: compressing filesystems would require random data + memset(data, 0x42, sizeof(data)); + for (off_t off = 0; off < len; off += sizeof(data)) { + if (off + sizeof(data) > len) + r = safe_write(fd, data, len - off); + else + r = safe_write(fd, data, sizeof(data)); + if (r == -1) { + return errno; + } + } + return 0; +} + +int on_zfs(int basedir_fd) { + struct statfs basefs; + (void)fstatfs(basedir_fd, &basefs); + return (basefs.f_type == FS_ZFS_TYPE); +} + +int ceph_posix_fallocate(int fd, off_t offset, off_t len) { + // Return 0 if oke, otherwise errno > 0 + +#ifdef HAVE_POSIX_FALLOCATE + if (on_zfs(fd)) { + return manual_fallocate(fd, offset, len); + } else { + return posix_fallocate(fd, offset, len); + } +#elif defined(__APPLE__) + fstore_t store; + store.fst_flags = F_ALLOCATECONTIG; + store.fst_posmode = F_PEOFPOSMODE; + store.fst_offset = offset; + store.fst_length = len; + + int ret = fcntl(fd, F_PREALLOCATE, &store); + if (ret == -1) { + ret = errno; + } + return ret; +#else + return manual_fallocate(fd, offset, len); +#endif +} + diff --git a/src/include/compat.h b/src/include/compat.h index d60e4c3c445..06b4b127a7e 100644 --- a/src/include/compat.h +++ b/src/include/compat.h @@ -13,6 +13,7 @@ #define CEPH_COMPAT_H #include "acconfig.h" +#include #if defined(__linux__) #define PROCPREFIX @@ -166,4 +167,6 @@ 0; }) #endif +int ceph_posix_fallocate(int fd, off_t offset, off_t len); + #endif /* !CEPH_COMPAT_H */ diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index d863ccf0f29..130287ef284 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5310,30 +5310,13 @@ int BlueStore::_setup_block_symlink_or_file( } if (cct->_conf->bluestore_block_preallocate_file) { -#ifdef HAVE_POSIX_FALLOCATE - r = ::posix_fallocate(fd, 0, size); - if (r) { + r = ::ceph_posix_fallocate(fd, 0, size); + if (r > 0) { derr << __func__ << " failed to prefallocate " << name << " file to " << size << ": " << cpp_strerror(r) << dendl; VOID_TEMP_FAILURE_RETRY(::close(fd)); return -r; } -#else - char data[1024*128]; - for (uint64_t off = 0; off < size; off += sizeof(data)) { - if (off + sizeof(data) > size) - r = ::write(fd, data, size - off); - else - r = ::write(fd, data, sizeof(data)); - if (r < 0) { - r = -errno; - derr << __func__ << " failed to prefallocate w/ write " << name << " file to " - << size << ": " << cpp_strerror(r) << dendl; - VOID_TEMP_FAILURE_RETRY(::close(fd)); - return r; - } - } -#endif } dout(1) << __func__ << " resized " << name << " file to " << pretty_si_t(size) << "B" << dendl; diff --git a/src/os/filestore/FileJournal.cc b/src/os/filestore/FileJournal.cc index ba40620c272..5f616e6b5b4 100644 --- a/src/os/filestore/FileJournal.cc +++ b/src/os/filestore/FileJournal.cc @@ -198,32 +198,13 @@ int FileJournal::_open_file(int64_t oldsize, blksize_t blksize, << newsize << " bytes: " << cpp_strerror(err) << dendl; return -err; } -#ifdef HAVE_POSIX_FALLOCATE - ret = ::posix_fallocate(fd, 0, newsize); + ret = ceph_posix_fallocate(fd, 0, newsize); if (ret) { derr << "FileJournal::_open_file : unable to preallocation journal to " << newsize << " bytes: " << cpp_strerror(ret) << dendl; return -ret; } max_size = newsize; -#elif defined(__APPLE__) - fstore_t store; - store.fst_flags = F_ALLOCATECONTIG; - store.fst_posmode = F_PEOFPOSMODE; - store.fst_offset = 0; - store.fst_length = newsize; - - ret = ::fcntl(fd, F_PREALLOCATE, &store); - if (ret == -1) { - ret = -errno; - derr << "FileJournal::_open_file : unable to preallocation journal to " - << newsize << " bytes: " << cpp_strerror(ret) << dendl; - return ret; - } - max_size = newsize; -#else -# error "Journal pre-allocation not supported on platform." -#endif } else { max_size = oldsize; -- 2.39.5