From c064c4456d33d27417e0b730aa0960f5f33d73c2 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 30 Sep 2009 10:24:28 -0700 Subject: [PATCH] uninline some header types --- src/Makefile.am | 7 ++ src/TODO | 5 + src/client/Client.cc | 18 ++-- src/include/ceph_frag.cc | 21 ++++ src/include/ceph_frag.h | 109 +++++++++++++++++++ src/include/ceph_fs.cc | 79 ++++++++++++++ src/include/ceph_fs.h | 223 +-------------------------------------- src/include/frag.h | 2 + src/include/types.h | 1 + src/kernel/Makefile | 3 +- src/kernel/ceph_frag.c | 1 + src/kernel/ceph_frag.h | 1 + src/kernel/ceph_fs.c | 1 + src/kernel/inode.c | 21 ++++ src/kernel/osdmap.c | 1 - src/kernel/osdmap.h | 20 ++++ src/kernel/super.h | 20 +--- src/kernel/types.h | 1 + src/mds/mdstypes.h | 2 +- src/osd/OSDMap.h | 4 +- src/osdc/Filer.cc | 10 +- src/osdc/Journaler.cc | 10 +- src/osdc/Journaler.h | 2 +- 23 files changed, 299 insertions(+), 263 deletions(-) create mode 100644 src/include/ceph_frag.cc create mode 100644 src/include/ceph_frag.h create mode 100644 src/include/ceph_fs.cc create mode 120000 src/kernel/ceph_frag.c create mode 120000 src/kernel/ceph_frag.h create mode 120000 src/kernel/ceph_fs.c diff --git a/src/Makefile.am b/src/Makefile.am index bdf10bbd96b35..832b71a3c9153 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -301,7 +301,9 @@ libcommon_files = \ common/common_init.cc \ common/buffer.cc \ common/debug.cc \ + include/ceph_fs.cc \ include/ceph_strings.cc \ + include/ceph_frag.cc \ config.cc \ common/lockdep.cc @@ -433,6 +435,7 @@ noinst_HEADERS = \ include/blobhash.h\ include/buffer.h\ include/byteorder.h\ + include/ceph_frag.h\ include/ceph_fs.h\ include/color.h\ include/crc32c.h\ @@ -467,7 +470,11 @@ noinst_HEADERS = \ kernel/buffer.h\ kernel/caps.c\ kernel/ceph_debug.h\ + kernel/ceph_frag.c\ + kernel/ceph_frag.h\ + kernel/ceph_fs.c\ kernel/ceph_fs.h\ + kernel/ceph_strings.c\ kernel/ceph_ver.h\ kernel/crush/crush.c\ kernel/crush/crush.h\ diff --git a/src/TODO b/src/TODO index b3606a969ec91..eb7a86f6c8dd0 100644 --- a/src/TODO +++ b/src/TODO @@ -20,6 +20,11 @@ v0.16 - kclient: retry alloc on ENOMEM when reading from connection? - client authentication + +/- uninline frags +/- uninline string hash + + bugs - mislinked directory? - premature filejournal trimming? diff --git a/src/client/Client.cc b/src/client/Client.cc index 2986a037aa8f4..07202eb646032 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -3505,7 +3505,7 @@ int Client::fill_stat(Inode *in, struct stat *st, frag_info_t *dirstat, nest_inf st->st_size = in->size; st->st_blocks = (in->size + 511) >> 9; } - st->st_blksize = MAX(ceph_file_layout_su(in->layout), 4096); + st->st_blksize = MAX(in->layout.fl_object_stripe_unit, 4096); if (dirstat) *dirstat = in->dirstat; @@ -3566,7 +3566,7 @@ int Client::fill_stat_precise(Inode *in, struct stat_precise *st, frag_info_t *d st->st_size = in->size; st->st_blocks = (in->size + 511) >> 9; } - st->st_blksize = MAX(ceph_file_layout_su(in->layout), 4096); + st->st_blksize = MAX(in->layout.fl_object_stripe_unit, 4096); if (dirstat) *dirstat = in->dirstat; @@ -4339,7 +4339,7 @@ int Client::_read_async(Fh *f, __u64 off, __u64 len, bufferlist *bl) l = MAX(l, g_conf.client_readahead_min); if (g_conf.client_readahead_max_bytes) l = MIN(l, g_conf.client_readahead_max_bytes); - loff_t p = ceph_file_layout_period(in->layout); + loff_t p = in->layout.fl_stripe_count * in->layout.fl_object_size; if (g_conf.client_readahead_max_periods) l = MIN(l, g_conf.client_readahead_max_periods * p); @@ -4348,7 +4348,7 @@ int Client::_read_async(Fh *f, __u64 off, __u64 len, bufferlist *bl) l -= (off+l) % p; else { // align readahead with stripe unit if we cross su boundary - int su = ceph_file_layout_su(in->layout); + int su = in->layout.fl_object_stripe_unit; if ((off+l)/su != off/su) l -= (off+l) % su; } @@ -5815,21 +5815,21 @@ int Client::get_file_stripe_unit(int fd) { ceph_file_layout layout; describe_layout(fd, &layout); - return ceph_file_layout_su(layout); + return layout.fl_object_stripe_unit; } int Client::get_file_stripe_width(int fd) { ceph_file_layout layout; describe_layout(fd, &layout); - return ceph_file_layout_stripe_width(layout); + return layout.fl_object_stripe_unit * layout.fl_stripe_count; } int Client::get_file_stripe_period(int fd) { ceph_file_layout layout; describe_layout(fd, &layout); - return ceph_file_layout_period(layout); + return layout.fl_object_size * layout.fl_stripe_count; } int Client::get_file_replication(int fd) @@ -5841,7 +5841,7 @@ int Client::get_file_replication(int fd) Fh *f = fd_map[fd]; Inode *in = f->inode; - pool = ceph_file_layout_pg_pool(in->layout); + pool = in->layout.fl_pg_pool; return osdmap->get_pg_pool(pool).get_size(); } @@ -5849,7 +5849,7 @@ int Client::get_default_preferred_pg(int fd) { ceph_file_layout layout; describe_layout(fd, &layout); - return ceph_file_layout_pg_preferred(layout); + return layout.fl_pg_preferred; } int Client::get_file_stripe_address(int fd, loff_t offset, string& address) diff --git a/src/include/ceph_frag.cc b/src/include/ceph_frag.cc new file mode 100644 index 0000000000000..ab6cf35c40919 --- /dev/null +++ b/src/include/ceph_frag.cc @@ -0,0 +1,21 @@ +/* + * Ceph 'frag' type + */ +#include "types.h" + +int ceph_frag_compare(__u32 a, __u32 b) +{ + unsigned va = ceph_frag_value(a); + unsigned vb = ceph_frag_value(b); + if (va < vb) + return -1; + if (va > vb) + return 1; + va = ceph_frag_bits(a); + vb = ceph_frag_bits(b); + if (va < vb) + return -1; + if (va > vb) + return 1; + return 0; +} diff --git a/src/include/ceph_frag.h b/src/include/ceph_frag.h new file mode 100644 index 0000000000000..793f50cb7c224 --- /dev/null +++ b/src/include/ceph_frag.h @@ -0,0 +1,109 @@ +#ifndef _FS_CEPH_FRAG_H +#define _FS_CEPH_FRAG_H + +/* + * "Frags" are a way to describe a subset of a 32-bit number space, + * using a mask and a value to match against that mask. Any given frag + * (subset of the number space) can be partitioned into 2^n sub-frags. + * + * Frags are encoded into a 32-bit word: + * 8 upper bits = "bits" + * 24 lower bits = "value" + * (We could go to 5+27 bits, but who cares.) + * + * We use the _most_ significant bits of the 24 bit value. This makes + * values logically sort. + * + * Unfortunately, because the "bits" field is still in the high bits, we + * can't sort encoded frags numerically. However, it does allow you + * to feed encoded frags as values into frag_contains_value. + */ +static inline __u32 ceph_frag_make(__u32 b, __u32 v) +{ + return (b << 24) | + (v & (0xffffffu << (24-b)) & 0xffffffu); +} +static inline __u32 ceph_frag_bits(__u32 f) +{ + return f >> 24; +} +static inline __u32 ceph_frag_value(__u32 f) +{ + return f & 0xffffffu; +} +static inline __u32 ceph_frag_mask(__u32 f) +{ + return (0xffffffu << (24-ceph_frag_bits(f))) & 0xffffffu; +} +static inline __u32 ceph_frag_mask_shift(__u32 f) +{ + return 24 - ceph_frag_bits(f); +} + +static inline int ceph_frag_contains_value(__u32 f, __u32 v) +{ + return (v & ceph_frag_mask(f)) == ceph_frag_value(f); +} +static inline int ceph_frag_contains_frag(__u32 f, __u32 sub) +{ + /* is sub as specific as us, and contained by us? */ + return ceph_frag_bits(sub) >= ceph_frag_bits(f) && + (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f); +} + +static inline __u32 ceph_frag_parent(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f) - 1, + ceph_frag_value(f) & (ceph_frag_mask(f) << 1)); +} +static inline int ceph_frag_is_left_child(__u32 f) +{ + return ceph_frag_bits(f) > 0 && + (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0; +} +static inline int ceph_frag_is_right_child(__u32 f) +{ + return ceph_frag_bits(f) > 0 && + (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1; +} +static inline __u32 ceph_frag_sibling(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f), + ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f))); +} +static inline __u32 ceph_frag_left_child(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f)); +} +static inline __u32 ceph_frag_right_child(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f)+1, + ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f)))); +} +static inline __u32 ceph_frag_make_child(__u32 f, int by, int i) +{ + int newbits = ceph_frag_bits(f) + by; + return ceph_frag_make(newbits, + ceph_frag_value(f) | (i << (24 - newbits))); +} +static inline int ceph_frag_is_leftmost(__u32 f) +{ + return ceph_frag_value(f) == 0; +} +static inline int ceph_frag_is_rightmost(__u32 f) +{ + return ceph_frag_value(f) == ceph_frag_mask(f); +} +static inline __u32 ceph_frag_next(__u32 f) +{ + return ceph_frag_make(ceph_frag_bits(f), + ceph_frag_value(f) + (0x1000000 >> ceph_frag_bits(f))); +} + +/* + * comparator to sort frags logically, as when traversing the + * number space in ascending order... + */ +int ceph_frag_compare(__u32 a, __u32 b); + +#endif diff --git a/src/include/ceph_fs.cc b/src/include/ceph_fs.cc new file mode 100644 index 0000000000000..fc615ba076dfe --- /dev/null +++ b/src/include/ceph_fs.cc @@ -0,0 +1,79 @@ +/* + * Some non-inline ceph helpers + */ +#include "types.h" + +int ceph_flags_to_mode(int flags) +{ +#ifdef O_DIRECTORY /* fixme */ + if ((flags & O_DIRECTORY) == O_DIRECTORY) + return CEPH_FILE_MODE_PIN; +#endif +#ifdef O_LAZY + if (flags & O_LAZY) + return CEPH_FILE_MODE_LAZY; +#endif + if ((flags & O_APPEND) == O_APPEND) + flags |= O_WRONLY; + + flags &= O_ACCMODE; + if ((flags & O_RDWR) == O_RDWR) + return CEPH_FILE_MODE_RDWR; + if ((flags & O_WRONLY) == O_WRONLY) + return CEPH_FILE_MODE_WR; + return CEPH_FILE_MODE_RD; +} + +int ceph_caps_for_mode(int mode) +{ + switch (mode) { + case CEPH_FILE_MODE_PIN: + return CEPH_CAP_PIN; + case CEPH_FILE_MODE_RD: + return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | + CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE; + case CEPH_FILE_MODE_RDWR: + return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | + CEPH_CAP_FILE_EXCL | + CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | + CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | + CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | + CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; + case CEPH_FILE_MODE_WR: + return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | + CEPH_CAP_FILE_EXCL | + CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | + CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | + CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; + } + return 0; +} + +/* Name hashing routines. Initial hash value */ +/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ +#define ceph_init_name_hash() 0 + +/* partial hash update function. Assume roughly 4 bits per character */ +static unsigned long ceph_partial_name_hash(unsigned long c, unsigned long prevhash) +{ + return (prevhash + (c << 4) + (c >> 4)) * 11; +} + +/* + * Finally: cut down the number of bits to a int value (and try to avoid + * losing bits) + */ +static unsigned long ceph_end_name_hash(unsigned long hash) +{ + return (unsigned int) hash; +} + +/* Compute the hash for a name string. */ +unsigned int ceph_full_name_hash(const char *name, unsigned int len) +{ + unsigned long hash = ceph_init_name_hash(); + while (len--) + hash = ceph_partial_name_hash(*name++, hash); + return ceph_end_name_hash(hash); +} + diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index f5c99de531521..f7c8384aa21b4 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -49,125 +49,7 @@ #define CEPH_MAX_MON 31 -/* - * "Frags" are a way to describe a subset of a 32-bit number space, - * using a mask and a value to match against that mask. Any given frag - * (subset of the number space) can be partitioned into 2^n sub-frags. - * - * Frags are encoded into a 32-bit word: - * 8 upper bits = "bits" - * 24 lower bits = "value" - * (We could go to 5+27 bits, but who cares.) - * - * We use the _most_ significant bits of the 24 bit value. This makes - * values logically sort. - * - * Unfortunately, because the "bits" field is still in the high bits, we - * can't sort encoded frags numerically. However, it does allow you - * to feed encoded frags as values into frag_contains_value. - */ -static inline __u32 ceph_frag_make(__u32 b, __u32 v) -{ - return (b << 24) | - (v & (0xffffffu << (24-b)) & 0xffffffu); -} -static inline __u32 ceph_frag_bits(__u32 f) -{ - return f >> 24; -} -static inline __u32 ceph_frag_value(__u32 f) -{ - return f & 0xffffffu; -} -static inline __u32 ceph_frag_mask(__u32 f) -{ - return (0xffffffu << (24-ceph_frag_bits(f))) & 0xffffffu; -} -static inline __u32 ceph_frag_mask_shift(__u32 f) -{ - return 24 - ceph_frag_bits(f); -} - -static inline int ceph_frag_contains_value(__u32 f, __u32 v) -{ - return (v & ceph_frag_mask(f)) == ceph_frag_value(f); -} -static inline int ceph_frag_contains_frag(__u32 f, __u32 sub) -{ - /* is sub as specific as us, and contained by us? */ - return ceph_frag_bits(sub) >= ceph_frag_bits(f) && - (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f); -} - -static inline __u32 ceph_frag_parent(__u32 f) -{ - return ceph_frag_make(ceph_frag_bits(f) - 1, - ceph_frag_value(f) & (ceph_frag_mask(f) << 1)); -} -static inline int ceph_frag_is_left_child(__u32 f) -{ - return ceph_frag_bits(f) > 0 && - (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0; -} -static inline int ceph_frag_is_right_child(__u32 f) -{ - return ceph_frag_bits(f) > 0 && - (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1; -} -static inline __u32 ceph_frag_sibling(__u32 f) -{ - return ceph_frag_make(ceph_frag_bits(f), - ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f))); -} -static inline __u32 ceph_frag_left_child(__u32 f) -{ - return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f)); -} -static inline __u32 ceph_frag_right_child(__u32 f) -{ - return ceph_frag_make(ceph_frag_bits(f)+1, - ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f)))); -} -static inline __u32 ceph_frag_make_child(__u32 f, int by, int i) -{ - int newbits = ceph_frag_bits(f) + by; - return ceph_frag_make(newbits, - ceph_frag_value(f) | (i << (24 - newbits))); -} -static inline int ceph_frag_is_leftmost(__u32 f) -{ - return ceph_frag_value(f) == 0; -} -static inline int ceph_frag_is_rightmost(__u32 f) -{ - return ceph_frag_value(f) == ceph_frag_mask(f); -} -static inline __u32 ceph_frag_next(__u32 f) -{ - return ceph_frag_make(ceph_frag_bits(f), - ceph_frag_value(f) + (0x1000000 >> ceph_frag_bits(f))); -} - -/* - * comparator to sort frags logically, as when traversing the - * number space in ascending order... - */ -static inline int ceph_frag_compare(__u32 a, __u32 b) -{ - unsigned va = ceph_frag_value(a); - unsigned vb = ceph_frag_value(b); - if (va < vb) - return -1; - if (va > vb) - return 1; - va = ceph_frag_bits(a); - vb = ceph_frag_bits(b); - if (va < vb) - return -1; - if (va > vb) - return 1; - return 0; -} +unsigned int ceph_full_name_hash(const char *name, unsigned int len); /* @@ -190,63 +72,6 @@ struct ceph_file_layout { __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ } __attribute__ ((packed)); -#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit)) -#define ceph_file_layout_stripe_count(l) \ - ((__s32)le32_to_cpu((l).fl_stripe_count)) -#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size)) -#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash)) -#define ceph_file_layout_object_su(l) \ - ((__s32)le32_to_cpu((l).fl_object_stripe_unit)) -#define ceph_file_layout_pg_preferred(l) \ - ((__s32)le32_to_cpu((l).fl_pg_preferred)) -#define ceph_file_layout_pg_pool(l) \ - ((__s32)le32_to_cpu((l).fl_pg_pool)) - -#define ceph_file_layout_stripe_width(l) (le32_to_cpu((l).fl_stripe_unit) * \ - le32_to_cpu((l).fl_stripe_count)) - -/* "period" == bytes before i start on a new set of objects */ -#define ceph_file_layout_period(l) (le32_to_cpu((l).fl_object_size) * \ - le32_to_cpu((l).fl_stripe_count)) - - - -/* - * string hash. - * - * taken from Linux, tho we should probably take care to use this one - * in case the upstream hash changes. - */ - -/* Name hashing routines. Initial hash value */ -/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ -#define ceph_init_name_hash() 0 - -/* partial hash update function. Assume roughly 4 bits per character */ -static inline unsigned long -ceph_partial_name_hash(unsigned long c, unsigned long prevhash) -{ - return (prevhash + (c << 4) + (c >> 4)) * 11; -} - -/* - * Finally: cut down the number of bits to a int value (and try to avoid - * losing bits) - */ -static inline unsigned long ceph_end_name_hash(unsigned long hash) -{ - return (unsigned int) hash; -} - -/* Compute the hash for a name string. */ -static inline unsigned int -ceph_full_name_hash(const char *name, unsigned int len) -{ - unsigned long hash = ceph_init_name_hash(); - while (len--) - hash = ceph_partial_name_hash(*name++, hash); - return ceph_end_name_hash(hash); -} @@ -582,26 +407,7 @@ struct ceph_mds_reply_dirfrag { #define CEPH_FILE_MODE_LAZY 4 /* lazy io */ #define CEPH_FILE_MODE_NUM 8 /* bc these are bit fields.. mostly */ -static inline int ceph_flags_to_mode(int flags) -{ -#ifdef O_DIRECTORY /* fixme */ - if ((flags & O_DIRECTORY) == O_DIRECTORY) - return CEPH_FILE_MODE_PIN; -#endif -#ifdef O_LAZY - if (flags & O_LAZY) - return CEPH_FILE_MODE_LAZY; -#endif - if ((flags & O_APPEND) == O_APPEND) - flags |= O_WRONLY; - - flags &= O_ACCMODE; - if ((flags & O_RDWR) == O_RDWR) - return CEPH_FILE_MODE_RDWR; - if ((flags & O_WRONLY) == O_WRONLY) - return CEPH_FILE_MODE_WR; - return CEPH_FILE_MODE_RD; -} +int ceph_flags_to_mode(int flags); /* capability bits */ @@ -681,30 +487,7 @@ static inline int ceph_flags_to_mode(int flags) #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ CEPH_LOCK_IXATTR) -static inline int ceph_caps_for_mode(int mode) -{ - switch (mode) { - case CEPH_FILE_MODE_PIN: - return CEPH_CAP_PIN; - case CEPH_FILE_MODE_RD: - return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | - CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE; - case CEPH_FILE_MODE_RDWR: - return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | - CEPH_CAP_FILE_EXCL | - CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | - CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | - CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | - CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; - case CEPH_FILE_MODE_WR: - return CEPH_CAP_PIN | CEPH_CAP_FILE_SHARED | - CEPH_CAP_FILE_EXCL | - CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | - CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | - CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; - } - return 0; -} +int ceph_caps_for_mode(int mode); enum { CEPH_CAP_OP_GRANT, /* mds->client grant */ diff --git a/src/include/frag.h b/src/include/frag.h index f5d3dc78b32dc..bfae38eda866a 100644 --- a/src/include/frag.h +++ b/src/include/frag.h @@ -21,6 +21,8 @@ #include #include "buffer.h" +#include "ceph_frag.h" + /* * * the goal here is to use a binary split strategy to partition a namespace. diff --git a/src/include/types.h b/src/include/types.h index f166cc5b2000c..9c9604a7ee07a 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -24,6 +24,7 @@ #include #include "ceph_fs.h" +#include "ceph_frag.h" #define _BACKWARD_BACKWARD_WARNING_H /* make gcc 4.3 shut up about hash_*. */ diff --git a/src/kernel/Makefile b/src/kernel/Makefile index a15e8a95566fd..d7493d61614dd 100644 --- a/src/kernel/Makefile +++ b/src/kernel/Makefile @@ -12,7 +12,8 @@ ceph-objs := super.o inode.o dir.o file.o addr.o ioctl.o \ mds_client.o mdsmap.o \ mon_client.o \ osd_client.o osdmap.o crush/crush.o crush/mapper.o \ - debugfs.o ceph_strings.o + debugfs.o \ + ceph_fs.o ceph_strings.o ceph_frag.o else #Otherwise we were called directly from the command diff --git a/src/kernel/ceph_frag.c b/src/kernel/ceph_frag.c new file mode 120000 index 0000000000000..31c753435ab09 --- /dev/null +++ b/src/kernel/ceph_frag.c @@ -0,0 +1 @@ +../include/ceph_frag.cc \ No newline at end of file diff --git a/src/kernel/ceph_frag.h b/src/kernel/ceph_frag.h new file mode 120000 index 0000000000000..0b670e2a89440 --- /dev/null +++ b/src/kernel/ceph_frag.h @@ -0,0 +1 @@ +../include/ceph_frag.h \ No newline at end of file diff --git a/src/kernel/ceph_fs.c b/src/kernel/ceph_fs.c new file mode 120000 index 0000000000000..7a945bbedb2a4 --- /dev/null +++ b/src/kernel/ceph_fs.c @@ -0,0 +1 @@ +../include/ceph_fs.cc \ No newline at end of file diff --git a/src/kernel/inode.c b/src/kernel/inode.c index 91c5039a5ff20..c7402f0f9747c 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -138,6 +138,27 @@ static struct ceph_inode_frag *__get_or_create_frag(struct ceph_inode_info *ci, return frag; } +/* + * find a specific frag @f + */ +struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f) +{ + struct rb_node *n = ci->i_fragtree.rb_node; + + while (n) { + struct ceph_inode_frag *frag = + rb_entry(n, struct ceph_inode_frag, node); + int c = ceph_frag_compare(f, frag->frag); + if (c < 0) + n = n->rb_left; + else if (c > 0) + n = n->rb_right; + else + return frag; + } + return NULL; +} + /* * Choose frag containing the given value @v. If @pfrag is * specified, copy the frag delegation info to the caller if diff --git a/src/kernel/osdmap.c b/src/kernel/osdmap.c index a811d479eca9a..ad7f937376db2 100644 --- a/src/kernel/osdmap.c +++ b/src/kernel/osdmap.c @@ -8,7 +8,6 @@ #include "decode.h" #include "ceph_debug.h" - char *ceph_osdmap_state_str(char *str, int len, int state) { int flag = 0; diff --git a/src/kernel/osdmap.h b/src/kernel/osdmap.h index c76eecbf46dd4..cdf6f1aecf0a9 100644 --- a/src/kernel/osdmap.h +++ b/src/kernel/osdmap.h @@ -53,6 +53,26 @@ struct ceph_osdmap { struct crush_map *crush; }; +/* file layout helpers */ +#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit)) +#define ceph_file_layout_stripe_count(l) \ + ((__s32)le32_to_cpu((l).fl_stripe_count)) +#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size)) +#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash)) +#define ceph_file_layout_object_su(l) \ + ((__s32)le32_to_cpu((l).fl_object_stripe_unit)) +#define ceph_file_layout_pg_preferred(l) \ + ((__s32)le32_to_cpu((l).fl_pg_preferred)) +#define ceph_file_layout_pg_pool(l) \ + ((__s32)le32_to_cpu((l).fl_pg_pool)) + +#define ceph_file_layout_stripe_width(l) (le32_to_cpu((l).fl_stripe_unit) * \ + le32_to_cpu((l).fl_stripe_count)) + +/* "period" == bytes before i start on a new set of objects */ +#define ceph_file_layout_period(l) (le32_to_cpu((l).fl_object_size) * \ + le32_to_cpu((l).fl_stripe_count)) + static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd) { return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP); diff --git a/src/kernel/super.h b/src/kernel/super.h index bb5bbed7dbea4..669a9a4748fe6 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -398,24 +398,8 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask) /* find a specific frag @f */ -static inline struct ceph_inode_frag * -__ceph_find_frag(struct ceph_inode_info *ci, u32 f) -{ - struct rb_node *n = ci->i_fragtree.rb_node; - - while (n) { - struct ceph_inode_frag *frag = - rb_entry(n, struct ceph_inode_frag, node); - int c = ceph_frag_compare(f, frag->frag); - if (c < 0) - n = n->rb_left; - else if (c > 0) - n = n->rb_right; - else - return frag; - } - return NULL; -} +extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, + u32 f); /* * choose fragment for value @v. copy frag content to pfrag, if leaf diff --git a/src/kernel/types.h b/src/kernel/types.h index c36318348f866..8a514568cab2a 100644 --- a/src/kernel/types.h +++ b/src/kernel/types.h @@ -8,6 +8,7 @@ #include #include "ceph_fs.h" +#include "ceph_frag.h" /* * Identify inodes by both their ino AND snapshot id (a u64). diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 1e2c1a55d57e0..5598089964ddd 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -376,7 +376,7 @@ struct inode_t { bool is_truncating() const { return truncate_size != -1ull; } int64_t get_layout_size_increment() { - return ceph_file_layout_period(layout); + return layout.fl_object_size * layout.fl_stripe_count; } __u64 get_max_size() const { diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index 6f7f6cb6153d1..a35d92a29f823 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -631,8 +631,8 @@ private: // oid -> pg ceph_object_layout file_to_object_layout(object_t oid, ceph_file_layout& layout) { return make_object_layout(oid, layout.fl_pg_pool, - ceph_file_layout_pg_preferred(layout), - ceph_file_layout_object_su(layout)); + layout.fl_pg_preferred, + layout.fl_object_stripe_unit); } ceph_object_layout make_object_layout(object_t oid, int pg_pool, int preferred=-1, int object_stripe_unit = 0) { diff --git a/src/osdc/Filer.cc b/src/osdc/Filer.cc index db60396d86793..50b8b3ab90125 100644 --- a/src/osdc/Filer.cc +++ b/src/osdc/Filer.cc @@ -64,7 +64,7 @@ int Filer::probe(inodeno_t ino, Probe *probe = new Probe(ino, *layout, snapid, start_from, end, pmtime, flags, fwd, onfinish); // period (bytes before we jump unto a new set of object(s)) - __u64 period = ceph_file_layout_period(*layout); + __u64 period = layout->fl_stripe_count * layout->fl_object_size; // start with 1+ periods. probe->probing_len = period; @@ -183,7 +183,7 @@ void Filer::_probed(Probe *probe, const object_t& oid, __u64 size, utime_t mtime // keep probing! dout(10) << "_probed probing further" << dendl; - __u64 period = ceph_file_layout_period(probe->layout); + __u64 period = probe->layout.fl_stripe_count * probe->layout.fl_object_size; if (probe->fwd) { probe->probing_off += probe->probing_len; assert(probe->probing_off % period == 0); @@ -225,9 +225,9 @@ void Filer::file_to_extents(inodeno_t ino, ceph_file_layout *layout, */ map< object_t, ObjectExtent > object_extents; - __u32 object_size = ceph_file_layout_object_size(*layout); - __u32 su = ceph_file_layout_su(*layout); - __u32 stripe_count = ceph_file_layout_stripe_count(*layout); + __u32 object_size = layout->fl_object_size; + __u32 su = layout->fl_object_stripe_unit; + __u32 stripe_count = layout->fl_stripe_count; assert(object_size >= su); __u64 stripes_per_object = object_size / su; dout(20) << " stripes_per_object " << stripes_per_object << dendl; diff --git a/src/osdc/Journaler.cc b/src/osdc/Journaler.cc index c860a582a3723..9dc2c5b06e921 100644 --- a/src/osdc/Journaler.cc +++ b/src/osdc/Journaler.cc @@ -35,7 +35,7 @@ void Journaler::create(ceph_file_layout *l) write_pos = flush_pos = ack_pos = safe_pos = read_pos = requested_pos = received_pos = - expire_pos = trimming_pos = trimmed_pos = ceph_file_layout_period(layout); + expire_pos = trimming_pos = trimmed_pos = layout.fl_stripe_count * layout.fl_object_size; } void Journaler::set_layout(ceph_file_layout *l) @@ -48,7 +48,7 @@ void Journaler::set_layout(ceph_file_layout *l) // prefetch intelligently. // (watch out, this is big if you use big objects or weird striping) - fetch_len = ceph_file_layout_period(layout) * g_conf.journaler_prefetch_periods; + fetch_len = layout.fl_stripe_count * layout.fl_object_size * g_conf.journaler_prefetch_periods; prefetch_from = fetch_len / 2; } @@ -315,7 +315,7 @@ __s64 Journaler::append_entry(bufferlist& bl) if (!g_conf.journaler_allow_split_entries) { // will we span a stripe boundary? - int p = ceph_file_layout_su(layout); + int p = layout.fl_object_stripe_unit; if (write_pos / p != (write_pos + (__s64)(bl.length() + sizeof(s))) / p) { // yes. // move write_pos forward. @@ -357,7 +357,7 @@ __s64 Journaler::append_entry(bufferlist& bl) write_pos += sizeof(s) + s; // flush previous object? - int su = ceph_file_layout_su(layout); + int su = layout.fl_object_stripe_unit; int write_off = write_pos % su; int write_obj = write_pos / su; int flush_obj = flush_pos / su; @@ -783,7 +783,7 @@ public: void Journaler::trim() { __s64 trim_to = last_committed.expire_pos; - trim_to -= trim_to % ceph_file_layout_period(layout); + trim_to -= trim_to % (layout.fl_stripe_count * layout.fl_object_size); dout(10) << "trim last_commited head was " << last_committed << ", can trim to " << trim_to << dendl; diff --git a/src/osdc/Journaler.h b/src/osdc/Journaler.h index 0fa129558ffb7..c2f665401a8cb 100644 --- a/src/osdc/Journaler.h +++ b/src/osdc/Journaler.h @@ -243,7 +243,7 @@ public: __s64 get_expire_pos() const { return expire_pos; } __s64 get_trimmed_pos() const { return trimmed_pos; } - __s64 get_layout_period() const { return ceph_file_layout_period(layout); } + __s64 get_layout_period() const { return layout.fl_stripe_count * layout.fl_object_size; } ceph_file_layout& get_layout() { return layout; } // write -- 2.39.5