From: Sage Weil Date: Sat, 7 Nov 2009 05:36:43 +0000 (-0800) Subject: osd: make object hash a pg_pool parameter X-Git-Tag: v0.18~128^2~10 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5779a1333a1a8cb6cc32bb6deab01d099c5f0fd5;p=ceph.git osd: make object hash a pg_pool parameter --- diff --git a/src/Makefile.am b/src/Makefile.am index 0f9b2ce8c72c..1f1cb1561dc4 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -309,6 +309,7 @@ libcommon_files = \ common/buffer.cc \ common/debug.cc \ include/ceph_fs.cc \ + include/ceph_hash.cc \ include/ceph_strings.cc \ include/ceph_frag.cc \ config.cc \ @@ -447,6 +448,7 @@ noinst_HEADERS = \ include/byteorder.h\ include/ceph_frag.h\ include/ceph_fs.h\ + include/ceph_hash.h\ include/color.h\ include/crc32c.h\ include/cstring.h\ diff --git a/src/include/ceph_fs.cc b/src/include/ceph_fs.cc index b3ecf1b07521..79d76bc4303f 100644 --- a/src/include/ceph_fs.cc +++ b/src/include/ceph_fs.cc @@ -72,80 +72,3 @@ int ceph_caps_for_mode(int mode) } return 0; } - -/* - * Robert Jenkin's hash function. - * http://burtleburtle.net/bob/hash/evahash.html - * This is in the public domain. - */ -#define mix(a, b, c) \ - do { \ - a = a - b; a = a - c; a = a ^ (c >> 13); \ - b = b - c; b = b - a; b = b ^ (a << 8); \ - c = c - a; c = c - b; c = c ^ (b >> 13); \ - a = a - b; a = a - c; a = a ^ (c >> 12); \ - b = b - c; b = b - a; b = b ^ (a << 16); \ - c = c - a; c = c - b; c = c ^ (b >> 5); \ - a = a - b; a = a - c; a = a ^ (c >> 3); \ - b = b - c; b = b - a; b = b ^ (a << 10); \ - c = c - a; c = c - b; c = c ^ (b >> 15); \ - } while (0) - -unsigned int ceph_full_name_hash(const char *str, unsigned int length) -{ - const unsigned char *k = (const unsigned char *)str; - __u32 a, b, c; /* the internal state */ - __u32 len; /* how many key bytes still need mixing */ - - /* Set up the internal state */ - len = length; - a = 0x9e3779b9; /* the golden ratio; an arbitrary value */ - b = a; - c = 0; /* variable initialization of internal state */ - - /* handle most of the key */ - while (len >= 12) { - a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) + - ((__u32)k[3] << 24)); - b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) + - ((__u32)k[7] << 24)); - c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) + - ((__u32)k[11] << 24)); - mix(a, b, c); - k = k + 12; - len = len - 12; - } - - /* handle the last 11 bytes */ - c = c + length; - switch (len) { /* all the case statements fall through */ - case 11: - c = c + ((__u32)k[10] << 24); - case 10: - c = c + ((__u32)k[9] << 16); - case 9: - c = c + ((__u32)k[8] << 8); - /* the first byte of c is reserved for the length */ - case 8: - b = b + ((__u32)k[7] << 24); - case 7: - b = b + ((__u32)k[6] << 16); - case 6: - b = b + ((__u32)k[5] << 8); - case 5: - b = b + k[4]; - case 4: - a = a + ((__u32)k[3] << 24); - case 3: - a = a + ((__u32)k[2] << 16); - case 2: - a = a + ((__u32)k[1] << 8); - case 1: - a = a + k[0]; - /* case 0: nothing left to add */ - } - mix(a, b, c); - - return c; -} - diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 25fc537f4140..36becb024788 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -49,8 +49,6 @@ #define CEPH_MAX_MON 31 -unsigned int ceph_full_name_hash(const char *name, unsigned int len); - /* * ceph_file_layout - describe data layout for a file/inode diff --git a/src/include/ceph_hash.cc b/src/include/ceph_hash.cc new file mode 100644 index 000000000000..ac8be54631fe --- /dev/null +++ b/src/include/ceph_hash.cc @@ -0,0 +1,118 @@ + +#include "types.h" + +/* + * Robert Jenkin's hash function. + * http://burtleburtle.net/bob/hash/evahash.html + * This is in the public domain. + */ +#define mix(a, b, c) \ + do { \ + a = a - b; a = a - c; a = a ^ (c >> 13); \ + b = b - c; b = b - a; b = b ^ (a << 8); \ + c = c - a; c = c - b; c = c ^ (b >> 13); \ + a = a - b; a = a - c; a = a ^ (c >> 12); \ + b = b - c; b = b - a; b = b ^ (a << 16); \ + c = c - a; c = c - b; c = c ^ (b >> 5); \ + a = a - b; a = a - c; a = a ^ (c >> 3); \ + b = b - c; b = b - a; b = b ^ (a << 10); \ + c = c - a; c = c - b; c = c ^ (b >> 15); \ + } while (0) + +unsigned ceph_str_hash_rjenkins(const char *str, unsigned length) +{ + const unsigned char *k = (const unsigned char *)str; + __u32 a, b, c; /* the internal state */ + __u32 len; /* how many key bytes still need mixing */ + + /* Set up the internal state */ + len = length; + a = 0x9e3779b9; /* the golden ratio; an arbitrary value */ + b = a; + c = 0; /* variable initialization of internal state */ + + /* handle most of the key */ + while (len >= 12) { + a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) + + ((__u32)k[3] << 24)); + b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) + + ((__u32)k[7] << 24)); + c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) + + ((__u32)k[11] << 24)); + mix(a, b, c); + k = k + 12; + len = len - 12; + } + + /* handle the last 11 bytes */ + c = c + length; + switch (len) { /* all the case statements fall through */ + case 11: + c = c + ((__u32)k[10] << 24); + case 10: + c = c + ((__u32)k[9] << 16); + case 9: + c = c + ((__u32)k[8] << 8); + /* the first byte of c is reserved for the length */ + case 8: + b = b + ((__u32)k[7] << 24); + case 7: + b = b + ((__u32)k[6] << 16); + case 6: + b = b + ((__u32)k[5] << 8); + case 5: + b = b + k[4]; + case 4: + a = a + ((__u32)k[3] << 24); + case 3: + a = a + ((__u32)k[2] << 16); + case 2: + a = a + ((__u32)k[1] << 8); + case 1: + a = a + k[0]; + /* case 0: nothing left to add */ + } + mix(a, b, c); + + return c; +} + +/* + * linux dcache hash + */ +unsigned ceph_str_hash_linux(const char *str, unsigned length) +{ + unsigned long hash = 0; + unsigned char c; + + while (length-- > 0) { + c = *str++; + hash = (hash + (c << 4) + (c >> 4)) * 11; + } + return hash; +} + + +unsigned ceph_str_hash(int type, const char *s, unsigned len) +{ + switch (type) { + case CEPH_STR_HASH_LINUX: + return ceph_str_hash_linux(s, len); + case CEPH_STR_HASH_RJENKINS: + return ceph_str_hash_rjenkins(s, len); + default: + return -1; + } +} + +const char *ceph_str_hash_name(int type) +{ + switch (type) { + case CEPH_STR_HASH_LINUX: + return "linux"; + case CEPH_STR_HASH_RJENKINS: + return "rjenkins"; + default: + return "unknown"; + } +} diff --git a/src/include/ceph_hash.h b/src/include/ceph_hash.h new file mode 100644 index 000000000000..5ac470c433c9 --- /dev/null +++ b/src/include/ceph_hash.h @@ -0,0 +1,13 @@ +#ifndef _FS_CEPH_HASH_H +#define _FS_CEPH_HASH_H + +#define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ +#define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ + +extern unsigned ceph_str_hash_linux(const char *s, unsigned len); +extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len); + +extern unsigned ceph_str_hash(int type, const char *s, unsigned len); +extern const char *ceph_str_hash_name(int type); + +#endif diff --git a/src/include/nstring.h b/src/include/nstring.h index d18e79108d56..0a39959a703e 100644 --- a/src/include/nstring.h +++ b/src/include/nstring.h @@ -9,6 +9,8 @@ typedef tstring nstring; typedef cstring nstring; #endif +#include "ceph_hash.h" + static inline bool operator==(const nstring &l, const char *s) { return strcmp(l.c_str(), s) == 0; } @@ -43,7 +45,7 @@ namespace __gnu_cxx { { //static hash H; //return H(x.c_str()); - return ceph_full_name_hash(x.c_str(), x.length()); + return ceph_str_hash_linux(x.c_str(), x.length()); } }; } diff --git a/src/include/object.h b/src/include/object.h index a3227b7c55a9..f0bb910959c7 100644 --- a/src/include/object.h +++ b/src/include/object.h @@ -69,7 +69,7 @@ namespace __gnu_cxx { size_t operator()(const object_t& r) const { //static hash H; //return H(r.name); - return ceph_full_name_hash(r.name.c_str(), r.name.length()); + return ceph_str_hash_linux(r.name.c_str(), r.name.length()); } }; } diff --git a/src/include/rados.h b/src/include/rados.h index 85bdef78d142..fb23ff9297c9 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -84,6 +84,7 @@ struct ceph_pg_pool { __u8 type; /* CEPH_PG_TYPE_* */ __u8 size; /* number of osds in each pg */ __u8 crush_ruleset; /* crush placement rule */ + __u8 object_hash; /* hash mapping object name to ps */ __le32 pg_num, pgp_num; /* number of pg's */ __le32 lpg_num, lpgp_num; /* number of localized pg's */ __le32 last_change; /* most recent epoch changed */ diff --git a/src/mds/Anchor.h b/src/mds/Anchor.h index ceaa54ea13c1..ce7f95f3671b 100644 --- a/src/mds/Anchor.h +++ b/src/mds/Anchor.h @@ -42,7 +42,7 @@ public: ino(i), dirino(di), dn_hash(hash), nref(nr), updated(u) { } Anchor(inodeno_t i, inodeno_t di, const nstring &dname, int nr, version_t u) : ino(i), dirino(di), - dn_hash(ceph_full_name_hash(dname.data(), dname.length())), + dn_hash(ceph_str_hash_linux(dname.data(), dname.length())), nref(nr), updated(u) { } void encode(bufferlist &bl) const { diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index afe9e3fb0609..cd9fb7004137 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -225,7 +225,7 @@ frag_t CInode::pick_dirfrag(const nstring& dn) if (dirfragtree.empty()) return frag_t(); // avoid the string hash if we can. - __u32 h = ceph_full_name_hash(dn.data(), dn.length()); + __u32 h = ceph_str_hash_linux(dn.data(), dn.length()); return dirfragtree[h]; } diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 78893686823d..b25e6e4e2cff 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -106,6 +106,7 @@ void OSDMap::build_simple(epoch_t e, ceph_fsid_t &fsid, pools[pool].v.type = CEPH_PG_TYPE_REP; pools[pool].v.size = 2; pools[pool].v.crush_ruleset = p->first; + pools[pool].v.object_hash = CEPH_STR_HASH_RJENKINS; pools[pool].v.pg_num = num_osd << pg_bits; pools[pool].v.pgp_num = num_osd << pg_bits; pools[pool].v.lpg_num = lpg_bits ? (1 << (lpg_bits-1)) : 0; diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index 9a0cf99a4de0..776faa8b362f 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -637,8 +637,8 @@ private: ceph_object_layout make_object_layout(object_t oid, int pg_pool, int preferred=-1, int object_stripe_unit = 0) { // calculate ps (placement seed) - static hash H; - ps_t ps = H(oid); + pg_pool_t pool = get_pg_pool(pg_pool); + ps_t ps = ceph_str_hash(pool.v.object_hash, oid.name.c_str(), oid.name.length()); // mix in preferred osd, so we don't get the same peers for all of the placement pgs (e.g. 0.0p*) if (preferred >= 0) diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index a8d38bce5ebb..f7edcb888f9f 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -555,6 +555,10 @@ struct pg_pool_t { unsigned get_type() const { return v.type; } unsigned get_size() const { return v.size; } int get_crush_ruleset() const { return v.crush_ruleset; } + int get_object_hash() const { return v.object_hash; } + const char *get_object_hash_name() const { + return ceph_str_hash_name(get_object_hash()); + } epoch_t get_last_change() const { return v.last_change; } epoch_t get_snap_epoch() const { return v.snap_epoch; } snapid_t get_snap_seq() const { return snapid_t(v.snap_seq); } @@ -686,6 +690,7 @@ inline ostream& operator<<(ostream& out, const pg_pool_t& p) { } out << " pg_size " << p.get_size() << " crush_ruleset " << p.get_crush_ruleset() + << " object_hash " << p.get_object_hash_name() << " pg_num " << p.get_pg_num() << " pgp_num " << p.get_pgp_num() << " lpg_num " << p.get_lpg_num()