]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: update crush_location on startup from ceph-osd
authorSage Weil <sage@redhat.com>
Fri, 22 Apr 2016 15:07:05 +0000 (11:07 -0400)
committerSage Weil <sage@redhat.com>
Mon, 9 May 2016 12:54:44 +0000 (08:54 -0400)
Update the crush location from ceph-osd instead of relying on
kludgey bash in ceph-osd-prestart.sh.  Among other things, this
lets us get accurate statfs information from the ObjectStore
implementation instead of relying on 'df'.

Fixes: http://tracker.ceph.com/issues/15213
Signed-off-by: Sage Weil <sage@redhat.com>
src/ceph-osd-prestart.sh
src/common/config_opts.h
src/osd/OSD.cc
src/osd/OSD.h

index c9a777d9661c2d9391ff768621a87ae2f2e517c4..c5b99d53c695f85038377c0e44b7d30066d2b65c 100644 (file)
@@ -20,32 +20,6 @@ fi
 data="/var/lib/ceph/osd/${cluster:-ceph}-$id"
 journal="$data/journal"
 
-update="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_update_on_start || :)"
-
-if [ "${update:-1}" = "1" -o "${update:-1}" = "true" ]; then
-    # update location in crush
-    hook="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_location_hook || :)"
-    if [ -z "$hook" ]; then
-        hook="/usr/bin/ceph-crush-location"
-    fi
-    location="$($hook --cluster ${cluster:-ceph} --id $id --type osd)"
-    weight="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_initial_weight || :)"
-    if [ -e $data/block ]; then
-        defaultweight=`blockdev --getsize64 $data/block | awk '{ d= $1/1099511627776 ; r = sprintf("%.4f", d); print r }'`
-    else
-        defaultweight=`df -P -k $data/ | tail -1 | awk '{ d= $2/1073741824 ; r = sprintf("%.4f", d); print r }'`
-    fi
-    ceph \
-        --cluster="${cluster:-ceph}" \
-        --name="osd.$id" \
-        --keyring="$data/keyring" \
-        osd crush create-or-move \
-        -- \
-        "$id" \
-        "${weight:-${defaultweight:-1}}" \
-        $location
-fi
-
 if [ -L "$journal" -a ! -e "$journal" ]; then
     udevadm settle --timeout=5 || :
     if [ -L "$journal" -a ! -e "$journal" ]; then
index 0ec74a5f8acb9a267e26fd7f496b3370be4d19b4..a6227e9a56a0e07cec5c3cf8013d066138e70cb2 100644 (file)
@@ -612,9 +612,9 @@ OPTION(osd_pg_op_threshold_ratio, OPT_U64, 2)             // the expected maximu
 OPTION(osd_pg_bits, OPT_INT, 6)  // bits per osd
 OPTION(osd_pgp_bits, OPT_INT, 6)  // bits per osd
 OPTION(osd_crush_chooseleaf_type, OPT_INT, 1) // 1 = host
-// This parameter is not consumed by ceph C code but the upstart scripts.
-// OPTION(osd_crush_initial_weight, OPT_DOUBLE, 0) // the initial weight is for newly added osds.
 OPTION(osd_pool_use_gmt_hitset, OPT_BOOL, true) // try to use gmt for hitset archive names if all osds in cluster support it.
+OPTION(osd_crush_update_on_start, OPT_BOOL, true)
+OPTION(osd_crush_initial_weight, OPT_DOUBLE, 0) // the initial weight is for newly added osds.
 OPTION(osd_pool_default_crush_rule, OPT_INT, -1) // deprecated for osd_pool_default_crush_replicated_ruleset
 OPTION(osd_pool_default_crush_replicated_ruleset, OPT_INT, CEPH_DEFAULT_CRUSH_REPLICATED_RULESET)
 OPTION(osd_pool_erasure_code_stripe_width, OPT_U32, OSD_POOL_ERASURE_CODE_STRIPE_WIDTH) // in bytes
index 7caef4ef946e1a59250d8c58ca7ec015a5fe4924..bc12f78356948bbfd33aeb36b5341d397b636aa5 100644 (file)
@@ -2207,6 +2207,12 @@ int OSD::init()
     }
   }
 
+  r = update_crush_location();
+  if (r < 0) {
+    osd_lock.Lock();
+    goto monout;
+  }
+
   osd_lock.Lock();
   if (is_stopping())
     return 0;
@@ -2752,6 +2758,61 @@ int OSD::shutdown()
   return r;
 }
 
+int OSD::update_crush_location()
+{
+  if (!g_conf->osd_crush_update_on_start) {
+    dout(10) << __func__ << " osd_crush_update_on_start = false" << dendl;
+    return 0;
+  }
+
+  char weight[32];
+  if (g_conf->osd_crush_initial_weight) {
+    snprintf(weight, sizeof(weight), "%.4lf", g_conf->osd_crush_initial_weight);
+  } else {
+    struct statfs st;
+    int r = store->statfs(&st);
+    if (r < 0) {
+      derr << "statfs: " << cpp_strerror(r) << dendl;
+      return r;
+    }
+    snprintf(weight, sizeof(weight), "%.4lf",
+            MAX((double).00001,
+                (double)(st.f_blocks * st.f_bsize) /
+                (double)(1ull << 40 /* TB */)));
+  }
+
+  std::multimap<string,string> loc = cct->crush_location.get_location();
+  dout(10) << __func__ << " crush location is " << loc << dendl;
+
+  string cmd =
+    string("{\"prefix\": \"osd crush create-or-move\", ") +
+    string("\"id\": ") + stringify(whoami) + string(", ") +
+    string("\"weight\":") + weight + string(", ") +
+    string("\"args\": [");
+  for (multimap<string,string>::iterator p = loc.begin(); p != loc.end(); ++p) {
+    if (p != loc.begin())
+      cmd += ", ";
+    cmd += "\"" + p->first + "=" + p->second + "\"";
+  }
+  cmd += "]}";
+
+  dout(10) << __func__ << " cmd: " << cmd << dendl;
+  vector<string> vcmd{cmd};
+  bufferlist inbl;
+
+  C_SaferCond w;
+  string outs;
+  int r = monc->start_mon_command(vcmd, inbl, NULL, &outs, &w);
+  if (r == 0)
+    r = w.wait();
+  if (r < 0) {
+    derr << __func__ << " fail: '" << outs << "': " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  return 0;
+}
+
 void OSD::write_superblock(ObjectStore::Transaction& t)
 {
   dout(10) << "write_superblock " << superblock << dendl;
index cdf2afd45dc231d3aa1bb1da98956d05d282df42..7c6fac7fca7afea4202b96e240d88a0802978f1a 100644 (file)
@@ -2404,6 +2404,8 @@ protected:
   }
 
 private:
+  int update_crush_location();
+
   static int write_meta(ObjectStore *store,
                        uuid_d& cluster_fsid, uuid_d& osd_fsid, int whoami);