From: Jianpeng Ma Date: Thu, 13 Nov 2014 03:32:57 +0000 (+0800) Subject: FileJournal: Add ssd discard for journal which using ssd disk as journal. X-Git-Tag: v0.90~68 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=9f9eb6799e4fbdb20bb4dce9a15a560635d6af93;p=ceph.git FileJournal: Add ssd discard for journal which using ssd disk as journal. Journal is like a ring buffer. After data wrote to media disk, journal can overwrite. But for those data, ssd dont't know it's nouse and can remove. So add discard to tell ssd to remove those data. This maybe not increase the performance. But it can increase the lifetime of ssd. Signed-off-by: Jianpeng Ma --- diff --git a/src/common/blkdev.cc b/src/common/blkdev.cc index 8b19abb2c4f04..50157828240f4 100644 --- a/src/common/blkdev.cc +++ b/src/common/blkdev.cc @@ -1,6 +1,9 @@ #include #include #include +#include +#include +#include #include "include/int_types.h" #ifdef __linux__ @@ -22,6 +25,46 @@ int get_block_device_size(int fd, int64_t *psize) return ret; } +bool block_device_support_discard(const char *devname) +{ + bool can_trim = false; + char *p = strstr((char *)devname, "sd"); + char name[32] = {0}; + + strcpy(name, p); + for (unsigned int i = 0; i < strlen(name); i++) { + if(isdigit(name[i])) { + name[i] = 0; + break; + } + } + + char filename[100] = {0}; + sprintf(filename, "/sys/block/%s/queue/discard_granularity", name); + + FILE *fp = fopen(filename, "r"); + if (fp == NULL) { + can_trim = false; + } else { + char buff[256] = {0}; + if (fgets(buff, sizeof(buff) - 1, fp)) { + if (strcmp(buff, "0")) + can_trim = false; + else + can_trim = true; + } else + can_trim = false; + fclose(fp); + } + return can_trim; +} + +int block_device_discard(int fd, int64_t offset, int64_t len) +{ + uint64_t range[2] = {(uint64_t)offset, (uint64_t)len}; + return ioctl(fd, BLKDISCARD, range); +} + #elif defined(__APPLE__) #include @@ -40,6 +83,15 @@ int get_block_device_size(int fd, int64_t *psize) return ret; } +bool block_device_support_discard(const char *devname) +{ + return false; +} + +int block_device_discard(int fd, int64_t offset, int64_t len) +{ + return -EOPNOTSUPP; +} #elif defined(__FreeBSD__) #include @@ -51,6 +103,15 @@ int get_block_device_size(int fd, int64_t *psize) return ret; } +bool block_device_support_discard(const char *devname) +{ + return false; +} + +int block_device_discard(int fd, int64_t offset, int64_t len) +{ + return -EOPNOTSUPP; +} #else # error "Unable to query block device size: unsupported platform, please report." #endif diff --git a/src/common/blkdev.h b/src/common/blkdev.h index a5b6471a51db6..5606205cc3277 100644 --- a/src/common/blkdev.h +++ b/src/common/blkdev.h @@ -2,5 +2,6 @@ #define __CEPH_COMMON_BLKDEV_H extern int get_block_device_size(int fd, int64_t *psize); - +extern bool block_device_support_discard(const char *devname); +extern int block_device_discard(int fd, int64_t offset, int64_t len); #endif diff --git a/src/os/FileJournal.cc b/src/os/FileJournal.cc index 8b1fc7253bcc7..53ed03f291f65 100644 --- a/src/os/FileJournal.cc +++ b/src/os/FileJournal.cc @@ -154,6 +154,8 @@ int FileJournal::_open_block_device() /* block devices have to write in blocks of CEPH_PAGE_SIZE */ block_size = CEPH_PAGE_SIZE; + discard = block_device_support_discard(fn.c_str()); + dout(10) << fn << " support discard: " << (int)discard << dendl; _check_disk_write_cache(); return 0; } @@ -1519,6 +1521,23 @@ void FileJournal::commit_start(uint64_t seq) } } +/* + *send discard command to joural block deivce + */ +void FileJournal::do_discard(int64_t offset, int64_t end) +{ + dout(10) << __func__ << "trim(" << offset << ", " << end << dendl; + + offset = ROUND_UP_TO(offset, block_size); + if (offset >= end) + return; + end = ROUND_UP_TO(end - block_size, block_size); + assert(end >= offset); + if (offset < end) + if (block_device_discard(fd, offset, end - offset) < 0) + dout(1) << __func__ << "ioctl(BLKDISCARD) error:" << cpp_strerror(errno) << dendl; +} + void FileJournal::committed_thru(uint64_t seq) { Mutex::Locker locker(write_lock); @@ -1551,6 +1570,8 @@ void FileJournal::committed_thru(uint64_t seq) while (!journalq.empty() && journalq.front().first <= seq) { journalq.pop_front(); } + + int64_t old_start = header.start; if (!journalq.empty()) { header.start = journalq.front().second; header.start_seq = journalq.front().first; @@ -1558,6 +1579,17 @@ void FileJournal::committed_thru(uint64_t seq) header.start = write_pos; header.start_seq = seq + 1; } + + if (discard) { + dout(10) << __func__ << " will trim (" << old_start << ", " << header.start << ")" << dendl; + if (old_start < header.start) + do_discard(old_start, header.start - 1); + else { + do_discard(old_start, header.max_size - 1); + do_discard(get_top(), header.start - 1); + } + } + must_write_header = true; print_header(); diff --git a/src/os/FileJournal.h b/src/os/FileJournal.h index d499871aad220..878b9094b6687 100644 --- a/src/os/FileJournal.h +++ b/src/os/FileJournal.h @@ -224,7 +224,8 @@ private: bool directio, aio, force_aio; bool must_write_header; off64_t write_pos; // byte where the next entry to be written will go - off64_t read_pos; // + off64_t read_pos; // + bool discard; //for block journal whether support discard #ifdef HAVE_LIBAIO /// state associated with an in-flight aio request @@ -326,6 +327,8 @@ private: off64_t *out_pos ///< [out] next position to read, will be wrapped ); + void do_discard(int64_t offset, int64_t end); + class Writer : public Thread { FileJournal *journal; public: @@ -365,6 +368,7 @@ private: directio(dio), aio(ai), force_aio(faio), must_write_header(false), write_pos(0), read_pos(0), + discard(false), #ifdef HAVE_LIBAIO aio_lock("FileJournal::aio_lock"), aio_ctx(0),