From c982780d57a5cb27bd2d887550435206180ec7e0 Mon Sep 17 00:00:00 2001 From: sage Date: Wed, 21 Sep 2005 07:43:46 +0000 Subject: [PATCH] *** empty log message *** git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@500 29311d96-e01e-0410-9327-a35deaab8ce9 --- ceph/crush/BinaryTree.h | 56 ++++++---- ceph/crush/Bucket.h | 13 ++- ceph/crush/Hash.h | 48 +++++++- ceph/crush/crush.h | 238 ++++++++++++++++++++++------------------ ceph/test/testcrush.cc | 196 +++++++++++++++++++++++---------- ceph/test/testtree.cc | 3 +- 6 files changed, 363 insertions(+), 191 deletions(-) diff --git a/ceph/crush/BinaryTree.h b/ceph/crush/BinaryTree.h index 62195e1f54a6a..523a781712c2d 100644 --- a/ceph/crush/BinaryTree.h +++ b/ceph/crush/BinaryTree.h @@ -3,8 +3,9 @@ #include #include -#include -#include +//#include +#include +//#include using namespace std; namespace crush { @@ -13,21 +14,31 @@ namespace crush { private: // tree def int root_node; // 0 for empty tree. - map node_nested; // all existing nodes in this map - map node_weight; // and this one - set node_complete; // only nodes with all possible children + int alloc; + vector node_nested; // all existing nodes in this map + vector node_weight; // and this one + vector node_complete; // only nodes with all possible children public: - BinaryTree() : root_node(0) {} + BinaryTree() : root_node(0), alloc(0) {} // accessors bool empty() const { return root_node == 0; } - bool exists(int n) const { return node_nested.count(n); } - int nested(int n) const { return exists(n) ? ((map)node_nested)[n]:0; } - float weight(int n) const { return exists(n) ? ((map)node_weight)[n]:0; } - bool complete(int n) const { return node_complete.count(n); } + bool exists(int n) const { return n < alloc && node_nested[n]; } + int nested(int n) const { return exists(n) ? node_nested[n]:0; } + float weight(int n) const { return exists(n) ? node_weight[n]:0; } + bool complete(int n) const { return exists(n) ? node_complete[n]:false; } int root() const { return root_node; } + int realloc(int n) { + while (alloc <= n) { + node_nested.push_back(0); + node_weight.push_back(0); + node_complete.push_back(0); + alloc++; + } + } + // tree navigation bool terminal(int n) const { return n & 1; } // odd nodes are leaves. int height(int n) const { @@ -67,21 +78,21 @@ namespace crush { assert(exists(n)); // erase node - node_nested.erase(n); - node_weight.erase(n); + node_nested[n] = 0; + node_weight[n] = 0; // adjust parents (!complete, -weight) int p = n; while (p != root_node) { p = parent(p); - node_complete.erase(p); + node_complete[p] = 0; node_weight[p] = weight(left(p)) + weight(right(p)); node_nested[p]--; if (nested(p) == 0) { - node_weight.erase(p); - node_nested.erase(p); + node_weight[p] = 0; + node_nested[p] = 0; } } @@ -90,8 +101,8 @@ namespace crush { (nested(left(root_node)) == 0 || nested(right(root_node)) == 0)) { // root now one child.. - node_weight.erase(root_node); - node_nested.erase(root_node); + node_weight[root_node] = 0; + node_nested[root_node] = 0; if (nested(left(root_node)) == 0) root_node = right(root_node); else @@ -101,8 +112,8 @@ namespace crush { if (terminal(root_node) && nested(root_node) == 0) { // empty! - node_weight.erase(root_node); - node_nested.erase(root_node); + node_weight[root_node] = 0; + node_nested[root_node] = 0; root_node = 0; } @@ -119,6 +130,7 @@ namespace crush { if (complete(root_node)) { // add new root int newroot = parent(root_node); + realloc(newroot); node_weight[newroot] = node_weight[root_node]; node_nested[newroot] = nested(root_node); @@ -151,20 +163,22 @@ namespace crush { // create at n //cout << "creating " << n << endl; + realloc(n); node_weight[n] = w; node_nested[n] = 1; - node_complete.insert(n); + node_complete[n] = 1; // ancestors: create, adjust weight, complete as appropriate int p = n; while (p != root_node) { p = parent(p); + realloc(p); // complete? if (!complete(p) && complete(left(p)) && complete(right(p))) - node_complete.insert(p); + node_complete[p] = 1; // weight (and implicitly create) node_weight[p] += w; diff --git a/ceph/crush/Bucket.h b/ceph/crush/Bucket.h index e07e6c98d518a..91ccd5b3b1ba4 100644 --- a/ceph/crush/Bucket.h +++ b/ceph/crush/Bucket.h @@ -19,10 +19,9 @@ namespace crush { float weight; public: - Bucket(int _id, - int _type, + Bucket(int _type, float _weight) : - id(_id), + id(0), type(_type), weight(_weight) { } @@ -31,6 +30,8 @@ namespace crush { float get_weight() const { return weight; } virtual int get_size() const = 0; + void set_id(int i) { id = i; } + void set_weight(float w) { weight = w; } virtual bool is_uniform() const = 0; @@ -50,9 +51,9 @@ namespace crush { vector primes; public: - UniformBucket(int _id, int _type, int _item_type, + UniformBucket(int _type, int _item_type, float _item_weight, vector& _items) : - Bucket(_id, _type, _item_weight*_items.size()), + Bucket(_type, _item_weight*_items.size()), item_type(_item_type), item_weight(_item_weight) { items = _items; @@ -108,7 +109,7 @@ namespace crush { map node_map; // node id -> item public: - MixedBucket(int _id, int _type) : Bucket(_id, _type, 0) { + MixedBucket(int _type) : Bucket(_type, 0) { } //float get_item_weight(int i) { return item_weight[i]; } diff --git a/ceph/crush/Hash.h b/ceph/crush/Hash.h index 796d7112f7b7c..ed6d885a6f2cd 100644 --- a/ceph/crush/Hash.h +++ b/ceph/crush/Hash.h @@ -2,6 +2,18 @@ #include using namespace __gnu_cxx; +#define hashmix(a,b,c) \ + a=a-b; a=a-c; a=a^(c>>13); \ + b=b-c; b=b-a; b=b^(a<<8); \ + c=c-a; c=c-b; c=c^(b>>13); \ + a=a-b; a=a-c; a=a^(c>>12); \ + b=b-c; b=b-a; b=b^(a<<16); \ + c=c-a; c=c-b; c=c^(b>>5); \ + a=a-b; a=a-c; a=a^(c>>3); \ + b=b-c; b=b-a; b=b^(a<<10); \ + c=c-a; c=c-b; c=c^(b>>15); + + namespace crush { class Hash { @@ -15,6 +27,37 @@ namespace crush { if (0) return (n ^ 0xdead1234) * (884811920 * 3 + 1); + // Robert jenkins' 96 bit mix + // sucks + if (0) { + int c = n; + int a = 12378912; + int b = 2982827; + a=a-b; a=a-c; a=a^(c>>13); + b=b-c; b=b-a; b=b^(a<<8); + c=c-a; c=c-b; c=c^(b>>13); + a=a-b; a=a-c; a=a^(c>>12); + b=b-c; b=b-a; b=b^(a<<16); + c=c-a; c=c-b; c=c^(b>>5); + a=a-b; a=a-c; a=a^(c>>3); + b=b-c; b=b-a; b=b^(a<<10); + c=c-a; c=c-b; c=c^(b>>15); + return c; + } + // robert jenkins 32-bit + // sucks + if (0) { + n += (n << 12); + n ^= (n >> 22); + n += (n << 4); + n ^= (n >> 9); + n += (n << 10); + n ^= (n >> 2); + n += (n << 7); + n ^= (n >> 12); + return n; + } + // djb2 if (0) { unsigned int hash = 5381; @@ -25,14 +68,17 @@ namespace crush { return hash; } - // JS + // JS (+ mixing -sage) // a little better than RS if (1) { unsigned int hash = 1315423911; + int a = 231232; + int b = 1232; for(unsigned int i = 0; i < 4; i++) { hash ^= ((hash << 5) + (n&255) + (hash >> 2)); + hashmix(a, b, hash); n = n >> 8; } diff --git a/ceph/crush/crush.h b/ceph/crush/crush.h index 90c519e7adea7..04a9809e86e3d 100644 --- a/ceph/crush/crush.h +++ b/ceph/crush/crush.h @@ -4,6 +4,7 @@ #include #include #include +#include using namespace std; ostream& operator<<(ostream& out, vector& v) @@ -65,35 +66,149 @@ namespace crush { class Crush { protected: map buckets; - map rules; + int bucketno; + Hash h; - set failed; public: + set failed; map overload; - + + vector collisions; + vector bumps; public: - Crush(int seed=123) : h(seed), collisions(20), bumps(20) {} + Crush(int seed=123) : bucketno(-1), h(seed), collisions(20), bumps(20) {} + ~Crush() { + // hose buckets + for (map::iterator it = buckets.begin(); + it != buckets.end(); + it++) { + delete it->second; + } + } - vector collisions; - vector bumps; - void add_bucket( Bucket *b ) { - buckets[b->get_id()] = b; - } - void add_rule( int id, Rule& r ) { - rules[id] = r; + + + int add_bucket( Bucket *b ) { + b->set_id(bucketno); + buckets[bucketno] = b; + int n = bucketno; + bucketno--; + return n; } + /* + * choose numrep distinct items of type type + */ + + static const int maxdepth = 20; + + void choose(int x, + int numrep, + int type, + Bucket *inbucket, + vector& outvec) { + + // for each replica + for (int rep=0; rep add_r(maxdepth); // re-choosing; initially zero + int out = -1; // my result + + // keep trying until we get a non-failed, non-colliding item + for (int attempt=0; ; attempt++) { + + // start with the input bucket + Bucket *in = inbucket; + int depth = 0; + + // choose through intervening buckets + while (1) { + // r may be twiddled to (try to) avoid past collisions + int r = rep; + if (in->is_uniform()) { + // uniform bucket; be careful! + if (numrep >= in->get_size()) { + // uniform bucket is too small; just walk thru elements + r += add_r[depth]; + } else { + // make sure numrep is not a multple of bucket size + int add = numrep*add_r[depth]; + if (in->get_size() % numrep == 0) { + add += add/in->get_size(); // shift seq once per pass through the bucket + } + r += add; + } + } else { + // mixed bucket; just make a distinct-ish r sequence + r += numrep * add_r[depth]; + } + + // choose + out = in->choose_r(x, r, h); + + // did we get the type we want? + if (in->is_uniform() && + ((UniformBucket*)in)->get_item_type() == type) + break; + + int itemtype = 0; // 0 is terminal type + if (buckets.count(out)) { // another bucket + in = buckets[out]; + itemtype = in->get_type(); + } + if (itemtype == type) break; // this is what we want! + depth++; + } + + // ok choice? + int bad_localness = 0; // 1 -> no locality in replacement, >1 more locality + if (type == 0 && failed.count(out)) + bad_localness = 1; // no locality + + if (overload.count(out)) { + float f = (float)(h(x, out) % 1000) / 1000.0; + if (f > overload[out]) + bad_localness = 1; // no locality + } + + for (int prep=0; prep& d) } } + +Bucket *make_bucket(Crush& c, vector& wid, int h, int& ndisks, int& nbuckets) +{ + if (h == 0) { + // uniform + Hash hash(123); + vector disks; + for (int i=0; imake_primes(hash); + c.add_bucket(b); + //cout << h << " uniformbucket with " << wid[h] << " disks" << endl; + return b; + } else { + // mixed + MixedBucket *b = new MixedBucket(nbuckets--, h+1); + for (int i=0; iadd_item(n->get_id(), n->get_weight()); + } + c.add_bucket(b); + //cout << h << " mixedbucket with " << wid[h] << endl; + return b; + } +} + +int make_hierarchy(Crush& c, vector& wid, int& ndisks, int& nbuckets) +{ + Bucket *b = make_bucket(c, wid, wid.size()-1, ndisks, nbuckets); + return b->get_id(); +} + + + int main() { Hash h(73232313); @@ -42,6 +77,8 @@ int main() // buckets vector disks; + int root = -1; + int nbuckets = -1; int ndisks = 0; if (0) { @@ -76,7 +113,7 @@ int main() //b.add_item(-3, ub3.get_weight()); } - if (1) { + if (0) { int bucket = -1; MixedBucket *root = new MixedBucket(bucket--, 2); @@ -84,27 +121,47 @@ int main() MixedBucket *b = new MixedBucket(bucket--, 1); int n = 5; - for (int j=0; jadd_item(disks[k], 10); + if (1) { + // add n buckets of n disks + for (int j=0; jadd_item(disks[k], 10); + + //b->add_item(disks[j], 10); + c.add_bucket(d); + b->add_item(d->get_id(), d->get_weight()); + } - //b->add_item(disks[j], 10); - c.add_bucket(d); - b->add_item(d->get_id(), d->get_weight()); - } + c.add_bucket(b); + root->add_item(b->get_id(), b->get_weight()); + } else { + // add n*n disks + make_disks(n*n, ndisks, disks); + for (int k=0; kadd_item(disks[k], 10); - c.add_bucket(b); - root->add_item(b->get_id(), b->get_weight()); + c.add_bucket(b); + root->add_item(b->get_id(), b->get_weight()); + } } c.add_bucket(root); } + if (1) { + vector wid; + for (int d=0; d<5; d++) + wid.push_back(10); + root = make_hierarchy(c, wid, ndisks, nbuckets); + } + + // rule int numrep = 1; @@ -113,7 +170,6 @@ int main() if (0) { rule.steps.push_back(RuleStep(CRUSH_RULE_TAKE, -100)); rule.steps.push_back(RuleStep(CRUSH_RULE_CHOOSE, numrep, 0)); - c.add_rule(numrep, rule); } if (1) { /* @@ -121,60 +177,90 @@ int main() rule.steps.push_back(RuleStep(CRUSH_RULE_CHOOSE, 2, 0)); rule.steps.push_back(RuleStep(CRUSH_RULE_EMIT)); */ - rule.steps.push_back(RuleStep(CRUSH_RULE_TAKE, -1)); + rule.steps.push_back(RuleStep(CRUSH_RULE_TAKE, root)); rule.steps.push_back(RuleStep(CRUSH_RULE_CHOOSE, 1, 0)); rule.steps.push_back(RuleStep(CRUSH_RULE_EMIT)); - c.add_rule(numrep, rule); } - c.overload[10] = .1; + //c.overload[10] = .1; + + int pg_per = 100; + int numpg = pg_per*ndisks/numrep; vector ocount(ndisks); + cout << ndisks << " disks, " << 1-nbuckets << " buckets" << endl; + cout << pg_per << " pgs per disk" << endl; + cout << numpg << " logical pgs" << endl; + cout << "numrep is " << numrep << endl; + - vector v(numrep); - int numo = 10000*ndisks/numrep; - cout << "nrep is " << numrep << endl; - cout << "placing " << numo << " logical, " << numo*numrep << " total" << endl; - for (int x=1; x v(numrep); + + for (int z=0; z