From 07c9de83b87a6f993e73c1f08ae0a172194287f8 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 28 Oct 2011 14:32:30 -0700 Subject: [PATCH] osdmaptool: build initial map from ceph.conf This builds the intial osd and crush maps from what is in the ceph.conf, taking advantage of host or rack tags that are present there. If there are >1 hosts, separate replica across hosts. If there are >2 racks, separate across racks. Semi arbitrary, but should capture most use cases. Signed-off-by: Sage Weil --- src/osd/OSDMap.cc | 149 ++++++++++++++++++++++++++++++++++++++++++++++ src/osd/OSDMap.h | 5 ++ src/osdmaptool.cc | 23 ++++--- 3 files changed, 170 insertions(+), 7 deletions(-) diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index b2231bda0f533..dbcd2f282fbac 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -919,6 +919,155 @@ void OSDMap::build_simple(CephContext *cct, epoch_t e, ceph_fsid_t &fsid, } } +void OSDMap::build_simple_from_conf(CephContext *cct, epoch_t e, ceph_fsid_t &fsid, + int pg_bits, int pgp_bits, int lpg_bits) +{ + ldout(cct, 10) << "build_simple_from_conf with " + << pg_bits << " pg bits per osd, " + << lpg_bits << " lpg bits" << dendl; + epoch = e; + set_fsid(fsid); + created = modified = ceph_clock_now(cct); + + const md_config_t *conf = cct->_conf; + + // count osds + int nosd = 1; + + vector sections; + conf->get_all_sections(sections); + for (vector::iterator i = sections.begin(); i != sections.end(); ++i) { + if (i->find("osd.") != 0) + continue; + + const char *begin = i->c_str() + 4; + char *end = (char*)begin; + int o = strtol(begin, &end, 10); + if (*end != '\0') + continue; + + if (o > nosd) + nosd = o; + } + + set_max_osd(nosd); + + // pgp_num <= pg_num + if (pgp_bits > pg_bits) + pgp_bits = pg_bits; + + // crush map + map rulesets; + rulesets[CEPH_DATA_RULE] = "data"; + rulesets[CEPH_METADATA_RULE] = "metadata"; + rulesets[CEPH_RBD_RULE] = "rbd"; + + for (map::iterator p = rulesets.begin(); p != rulesets.end(); p++) { + int64_t pool = ++pool_max; + pools[pool].type = pg_pool_t::TYPE_REP; + pools[pool].size = cct->_conf->osd_pool_default_size; + pools[pool].crush_ruleset = p->first; + pools[pool].object_hash = CEPH_STR_HASH_RJENKINS; + pools[pool].pg_num = nosd << pg_bits; + pools[pool].pgp_num = nosd << pgp_bits; + pools[pool].lpg_num = lpg_bits ? (1 << (lpg_bits-1)) : 0; + pools[pool].lpgp_num = lpg_bits ? (1 << (lpg_bits-1)) : 0; + pools[pool].last_change = epoch; + if (p->first == CEPH_DATA_RULE) + pools[pool].crash_replay_interval = cct->_conf->osd_default_data_pool_replay_window; + pool_name[pool] = p->second; + } + + build_simple_crush_map_from_conf(cct, crush, rulesets); + + for (int i=0; i& rulesets) +{ + crush.create(); + + crush.set_type_name(0, "osd"); + crush.set_type_name(1, "host"); + crush.set_type_name(2, "rack"); + crush.set_type_name(3, "pool"); + + const md_config_t *conf = cct->_conf; + int minrep = conf->osd_min_rep; + int maxrep = conf->osd_max_rep; + + set hosts, racks; + + // root + int rootid = crush.add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_DEFAULT, 3 /* pool */, 0, NULL, NULL); + crush.set_item_name(rootid, "root"); + + // add osds + vector sections; + conf->get_all_sections(sections); + for (vector::iterator i = sections.begin(); i != sections.end(); ++i) { + if (i->find("osd.") != 0) + continue; + + const char *begin = i->c_str() + 4; + char *end = (char*)begin; + int o = strtol(begin, &end, 10); + if (*end != '\0') + continue; + + string host; + string rack; + vector sections; + sections.push_back("osd"); + sections.push_back(*i); + conf->get_val_from_conf_file(sections, "host", host, false); + conf->get_val_from_conf_file(sections, "rack", rack, false); + + if (host.length() == 0) + host = "unknownhost"; + if (rack.length() == 0) + rack = "unknownrack"; + + hosts.insert(host); + racks.insert(rack); + + map loc; + loc["host"] = host; + loc["rack"] = rack; + loc["pool"] = "root"; + + dout(0) << " adding osd." << o << " at " << loc << dendl; + crush.insert_item(o, 1.0, *i, loc); + } + + // rules + for (map::iterator p = rulesets.begin(); p != rulesets.end(); p++) { + int ruleset = p->first; + crush_rule *rule = crush_make_rule(3, ruleset, pg_pool_t::TYPE_REP, minrep, maxrep); + crush_rule_set_step(rule, 0, CRUSH_RULE_TAKE, rootid, 0); + + if (racks.size() > 3) { + // spread replicas across hosts + crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_LEAF_FIRSTN, CRUSH_CHOOSE_N, 2); + } else if (hosts.size() > 1) { + // spread replicas across hosts + crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_LEAF_FIRSTN, CRUSH_CHOOSE_N, 1); + } else { + // just spread across osds + crush_rule_set_step(rule, 1, CRUSH_RULE_CHOOSE_FIRSTN, CRUSH_CHOOSE_N, 0); + } + crush_rule_set_step(rule, 2, CRUSH_RULE_EMIT, 0, 0); + int rno = crush_add_rule(crush.crush, rule, -1); + crush.set_rule_name(rno, p->second); + } + +} + + void OSDMap::build_simple_crush_map(CephContext *cct, CrushWrapper& crush, map& rulesets, int nosd, int ndom) { diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index 184d9bf71d849..971691f927e50 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -638,8 +638,13 @@ public: void build_simple(CephContext *cct, epoch_t e, ceph_fsid_t &fsid, int num_osd, int num_dom, int pg_bits, int pgp_bits, int lpg_bits); + void build_simple_from_conf(CephContext *cct, epoch_t e, ceph_fsid_t &fsid, + int pg_bits, int pgp_bits, int lpg_bits); static void build_simple_crush_map(CephContext *cct, CrushWrapper& crush, map& poolsets, int num_osd, int num_dom=0); + static void build_simple_crush_map_from_conf(CephContext *cct, CrushWrapper& crush, + map& rulesets); + private: void print_osd_line(int cur, ostream& out) const; diff --git a/src/osdmaptool.cc b/src/osdmaptool.cc index 35d7259cbdb1f..34b3cc4448b74 100644 --- a/src/osdmaptool.cc +++ b/src/osdmaptool.cc @@ -56,6 +56,7 @@ int main(int argc, const char **argv) bool print_json = false; bool tree = false; bool createsimple = false; + bool create_from_conf = false; int num_osd = 0, num_dom = 0; int pg_bits = g_conf->osd_pg_bits; int pgp_bits = g_conf->osd_pgp_bits; @@ -85,6 +86,13 @@ int main(int argc, const char **argv) exit(EXIT_FAILURE); } createsimple = true; + } else if (ceph_argparse_withint(args, i, &num_dom, &err, "--num_dom", (char*)NULL)) { + if (!err.str().empty()) { + cerr << err.str() << std::endl; + exit(EXIT_FAILURE); + } + } else if (ceph_argparse_flag(args, i, "--create-from-conf", (char*)NULL)) { + create_from_conf = true; } else if (ceph_argparse_flag(args, i, "--clobber", (char*)NULL)) { clobber = true; } else if (ceph_argparse_withint(args, i, &pg_bits, &err, "--pg_bits", (char*)NULL)) { @@ -102,11 +110,6 @@ int main(int argc, const char **argv) cerr << err.str() << std::endl; exit(EXIT_FAILURE); } - } else if (ceph_argparse_withint(args, i, &num_dom, &err, "--num_dom", (char*)NULL)) { - if (!err.str().empty()) { - cerr << err.str() << std::endl; - exit(EXIT_FAILURE); - } } else if (ceph_argparse_witharg(args, i, &val, "--export_crush", (char*)NULL)) { export_crush = val; } else if (ceph_argparse_witharg(args, i, &val, "--import_crush", (char*)NULL)) { @@ -138,7 +141,7 @@ int main(int argc, const char **argv) int r = 0; struct stat st; - if (!createsimple && !clobber) { + if (!createsimple && !create_from_conf && !clobber) { std::string error; r = bl.read_file(fn.c_str(), &error); if (r == 0) { @@ -155,7 +158,7 @@ int main(int argc, const char **argv) return -1; } } - else if (createsimple && !clobber && ::stat(fn.c_str(), &st) == 0) { + else if ((createsimple || create_from_conf) && !clobber && ::stat(fn.c_str(), &st) == 0) { cerr << me << ": " << fn << " exists, --clobber to overwrite" << std::endl; return -1; } @@ -170,6 +173,12 @@ int main(int argc, const char **argv) osdmap.build_simple(g_ceph_context, 0, fsid, num_osd, num_dom, pg_bits, pgp_bits, lpg_bits); modified = true; } + if (create_from_conf) { + ceph_fsid_t fsid; + memset(&fsid, 0, sizeof(ceph_fsid_t)); + osdmap.build_simple_from_conf(g_ceph_context, 0, fsid, pg_bits, pgp_bits, lpg_bits); + modified = true; + } if (!import_crush.empty()) { bufferlist cbl; -- 2.39.5