From: Sage Weil Date: Thu, 19 Mar 2015 18:35:13 +0000 (-0700) Subject: os/chain_xattr: stripe shortish xattrs over small chunks for XFS X-Git-Tag: v9.0.2~92^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c6cdb4081e366f471b372102905a1192910ab2da;p=ceph.git os/chain_xattr: stripe shortish xattrs over small chunks for XFS XFS has a hard limit of 255 (or 254?) bytes for xattrs to be inlined in the inode due to a single byte for the length in the on-disk format. If we have an xattr that is a bit bigger than that it will get kicked out into a separate extent, but if we stripe it across a few shorter xattrs it can be inlined when the xfs inode is e.g. 2K. Adjust the chain_xattr logic to capture this case. Note that we are doing this unconditionally regardless of fs type, but that is probably okay given that most users use XFS and the cost isn't huge. Reported-by: Ning Yao Signed-off-by: Sage Weil --- diff --git a/src/os/chain_xattr.cc b/src/os/chain_xattr.cc index b50cade0b578..f50a9ccf011d 100644 --- a/src/os/chain_xattr.cc +++ b/src/os/chain_xattr.cc @@ -116,7 +116,8 @@ static int getxattr_len(const char *fn, const char *name) break; total += r; i++; - } while (r == CHAIN_XATTR_MAX_BLOCK_LEN); + } while (r == CHAIN_XATTR_MAX_BLOCK_LEN || + r == CHAIN_XATTR_SHORT_BLOCK_LEN); return total; } @@ -135,7 +136,6 @@ int chain_getxattr(const char *fn, const char *name, void *val, size_t size) do { chunk_size = (size < CHAIN_XATTR_MAX_BLOCK_LEN ? size : CHAIN_XATTR_MAX_BLOCK_LEN); get_raw_xattr_name(name, i, raw_name, sizeof(raw_name)); - size -= chunk_size; r = sys_getxattr(fn, raw_name, (char *)val + pos, chunk_size); if (i && r == -ENODATA) { @@ -147,17 +147,21 @@ int chain_getxattr(const char *fn, const char *name, void *val, size_t size) break; } - if (r > 0) + if (r > 0) { pos += r; + size -= r; + } i++; - } while (size && r == CHAIN_XATTR_MAX_BLOCK_LEN); + } while (size && (r == CHAIN_XATTR_MAX_BLOCK_LEN || + r == CHAIN_XATTR_SHORT_BLOCK_LEN)); if (r >= 0) { ret = pos; /* is there another chunk? that can happen if the last read size span over exactly one block */ - if (chunk_size == CHAIN_XATTR_MAX_BLOCK_LEN) { + if (chunk_size == CHAIN_XATTR_MAX_BLOCK_LEN || + chunk_size == CHAIN_XATTR_SHORT_BLOCK_LEN) { get_raw_xattr_name(name, i, raw_name, sizeof(raw_name)); r = sys_getxattr(fn, raw_name, 0, 0); if (r > 0) { // there's another chunk.. the original buffer was too small @@ -183,7 +187,8 @@ static int chain_fgetxattr_len(int fd, const char *name) break; total += r; i++; - } while (r == CHAIN_XATTR_MAX_BLOCK_LEN); + } while (r == CHAIN_XATTR_MAX_BLOCK_LEN || + r == CHAIN_XATTR_SHORT_BLOCK_LEN); return total; } @@ -202,7 +207,6 @@ int chain_fgetxattr(int fd, const char *name, void *val, size_t size) do { chunk_size = (size < CHAIN_XATTR_MAX_BLOCK_LEN ? size : CHAIN_XATTR_MAX_BLOCK_LEN); get_raw_xattr_name(name, i, raw_name, sizeof(raw_name)); - size -= chunk_size; r = sys_fgetxattr(fd, raw_name, (char *)val + pos, chunk_size); if (i && r == -ENODATA) { @@ -214,17 +218,21 @@ int chain_fgetxattr(int fd, const char *name, void *val, size_t size) break; } - if (r > 0) + if (r > 0) { pos += r; + size -= r; + } i++; - } while (size && r == CHAIN_XATTR_MAX_BLOCK_LEN); + } while (size && (r == CHAIN_XATTR_MAX_BLOCK_LEN || + r == CHAIN_XATTR_SHORT_BLOCK_LEN)); if (r >= 0) { ret = pos; /* is there another chunk? that can happen if the last read size span over exactly one block */ - if (chunk_size == CHAIN_XATTR_MAX_BLOCK_LEN) { + if (chunk_size == CHAIN_XATTR_MAX_BLOCK_LEN || + chunk_size == CHAIN_XATTR_SHORT_BLOCK_LEN) { get_raw_xattr_name(name, i, raw_name, sizeof(raw_name)); r = sys_fgetxattr(fd, raw_name, 0, 0); if (r > 0) { // there's another chunk.. the original buffer was too small @@ -238,14 +246,24 @@ int chain_fgetxattr(int fd, const char *name, void *val, size_t size) // setxattr +static int get_xattr_block_size(size_t size) +{ + if (size <= CHAIN_XATTR_SHORT_LEN_THRESHOLD) + // this may fit in the inode; stripe over short attrs so that XFS + // won't kick it out. + return CHAIN_XATTR_SHORT_BLOCK_LEN; + return CHAIN_XATTR_MAX_BLOCK_LEN; +} + int chain_setxattr(const char *fn, const char *name, const void *val, size_t size) { int i = 0, pos = 0; char raw_name[CHAIN_XATTR_MAX_NAME_LEN * 2 + 16]; int ret = 0; + size_t max_chunk_size = get_xattr_block_size(size); do { - size_t chunk_size = (size < CHAIN_XATTR_MAX_BLOCK_LEN ? size : CHAIN_XATTR_MAX_BLOCK_LEN); + size_t chunk_size = (size < max_chunk_size ? size : max_chunk_size); get_raw_xattr_name(name, i, raw_name, sizeof(raw_name)); size -= chunk_size; @@ -278,9 +296,10 @@ int chain_fsetxattr(int fd, const char *name, const void *val, size_t size) int i = 0, pos = 0; char raw_name[CHAIN_XATTR_MAX_NAME_LEN * 2 + 16]; int ret = 0; + size_t max_chunk_size = get_xattr_block_size(size); do { - size_t chunk_size = (size < CHAIN_XATTR_MAX_BLOCK_LEN ? size : CHAIN_XATTR_MAX_BLOCK_LEN); + size_t chunk_size = (size < max_chunk_size ? size : max_chunk_size); get_raw_xattr_name(name, i, raw_name, sizeof(raw_name)); size -= chunk_size; diff --git a/src/os/chain_xattr.h b/src/os/chain_xattr.h index 7e8312ff32b2..2d77568012ce 100644 --- a/src/os/chain_xattr.h +++ b/src/os/chain_xattr.h @@ -11,6 +11,12 @@ #define CHAIN_XATTR_MAX_NAME_LEN 128 #define CHAIN_XATTR_MAX_BLOCK_LEN 2048 +/* + * XFS will only inline xattrs < 255 bytes, so for xattrs that are + * likely to fit in the inode, stripe over short xattrs. + */ +#define CHAIN_XATTR_SHORT_BLOCK_LEN 250 +#define CHAIN_XATTR_SHORT_LEN_THRESHOLD 1000 // wrappers to hide annoying errno handling. diff --git a/src/test/objectstore/chain_xattr.cc b/src/test/objectstore/chain_xattr.cc index 1617fe9038bd..15190e7b6710 100644 --- a/src/test/objectstore/chain_xattr.cc +++ b/src/test/objectstore/chain_xattr.cc @@ -182,6 +182,40 @@ TEST(chain_xattr, chunk_aligned) { ASSERT_EQ(0, chain_fremovexattr(fd, name2.c_str())); } + for (int len = CHAIN_XATTR_SHORT_BLOCK_LEN - 10; + len < CHAIN_XATTR_SHORT_BLOCK_LEN + 10; + ++len) { + cout << len << std::endl; + const string x(len, 'x'); + char buf[len*2]; + ASSERT_EQ(len, chain_setxattr(file, name.c_str(), x.c_str(), len)); + char attrbuf[4096]; + int l = ceph_os_listxattr(file, attrbuf, sizeof(attrbuf)); + for (char *p = attrbuf; p - attrbuf < l; p += strlen(p) + 1) { + cout << " attr " << p << std::endl; + } + ASSERT_EQ(len, chain_getxattr(file, name.c_str(), buf, len*2)); + } + + { + // test tail path in chain_getxattr + const char *aname = "user.baz"; + char buf[CHAIN_XATTR_SHORT_BLOCK_LEN*3]; + memset(buf, 'x', sizeof(buf)); + ASSERT_EQ(sizeof(buf), chain_setxattr(file, aname, buf, sizeof(buf))); + ASSERT_EQ(-ERANGE, chain_getxattr(file, aname, buf, + CHAIN_XATTR_SHORT_BLOCK_LEN*2)); + } + { + // test tail path in chain_fgetxattr + const char *aname = "user.biz"; + char buf[CHAIN_XATTR_SHORT_BLOCK_LEN*3]; + memset(buf, 'x', sizeof(buf)); + ASSERT_EQ(sizeof(buf), chain_fsetxattr(fd, aname, buf, sizeof(buf))); + ASSERT_EQ(-ERANGE, chain_fgetxattr(fd, aname, buf, + CHAIN_XATTR_SHORT_BLOCK_LEN*2)); + } + ::close(fd); ::unlink(file); }