From 56c7cc8c59339164c052a432cd59a55aeca0dbab Mon Sep 17 00:00:00 2001 From: Ali Maredia Date: Fri, 8 Sep 2023 11:16:02 -0400 Subject: [PATCH] rgw: misc labeled op counters work Highlights of this commit include: - splitting the rgw perf counters cache int two caches for bucket labeled and user labeled op counters - add config overrides to verify suite for CI - add tenant label for op counters - misc cleanup cleanup - add docs for rgw metrics Signed-off-by: Ali Maredia --- doc/dev/perf_counters.rst | 4 + doc/radosgw/index.rst | 1 + doc/radosgw/metrics.rst | 204 +++ qa/suites/rgw/verify/overrides.yaml | 2 + src/common/options/rgw.yaml.in | 62 +- src/common/perf_counters.cc | 7 +- src/common/perf_counters_cache.cc | 7 +- src/exporter/DaemonMetricCollector.cc | 5 +- src/rgw/rgw_file.cc | 7 +- src/rgw/rgw_file_int.h | 2 + src/rgw/rgw_op.cc | 62 +- src/rgw/rgw_perf_counters.cc | 123 +- src/rgw/rgw_perf_counters.h | 29 +- src/rgw/vstart.sh.swift | 1930 +++++++++++++++++++++++++ src/test/test_perf_counters_cache.cc | 54 +- 15 files changed, 2376 insertions(+), 123 deletions(-) create mode 100644 doc/radosgw/metrics.rst create mode 100755 src/rgw/vstart.sh.swift diff --git a/doc/dev/perf_counters.rst b/doc/dev/perf_counters.rst index a64d14d33bd0e..1bcc6120e4af9 100644 --- a/doc/dev/perf_counters.rst +++ b/doc/dev/perf_counters.rst @@ -1,3 +1,5 @@ +.. _Perf Counters: + =============== Perf counters =============== @@ -200,6 +202,8 @@ The actual dump is similar to the schema, except that average values are grouped } } +.. _Labeled Perf Counters: + Labeled Perf Counters --------------------- diff --git a/doc/radosgw/index.rst b/doc/radosgw/index.rst index 70443620237af..ed67413646d80 100644 --- a/doc/radosgw/index.rst +++ b/doc/radosgw/index.rst @@ -84,4 +84,5 @@ Storage Cluster with one API and then retrieve that data with the other API. Lua Scripting D3N Data Cache Cloud Transition + Metrics diff --git a/doc/radosgw/metrics.rst b/doc/radosgw/metrics.rst new file mode 100644 index 0000000000000..75ef782fa6aa9 --- /dev/null +++ b/doc/radosgw/metrics.rst @@ -0,0 +1,204 @@ +======= +Metrics +======= + +The Ceph Object Gateway uses :ref:`Perf Counters` to track metrics. The counters can be labeled (:ref:`Labeled Perf Counters`). When counters are labeled, they are stored in the Ceph Object Gateway specific caches. + +These metrics can be sent to the time series database Prometheus to visualize a cluster wide view of usage data (ex: number of S3 put operations on a specific bucket) over time. + +.. contents:: + +Op Metrics +========== + +The following metrics related to S3 or Swift operations are tracked per Ceph Object Gateway. + +.. list-table:: Radosgw Op Metrics + :widths: 25 25 75 + :header-rows: 1 + + * - Name + - Type + - Description + * - put_ops + - Counter + - Number of put operations + * - put_b + - Counter + - Number of bytes put + * - put_initial_lat + - Guage + - Total latency of put operations + * - get_ops + - Counter + - Number of get operations + * - get_b + - Counter + - Number of bytes from get requests + * - get_initial_lat + - Guage + - Total latency of get operations + * - del_obj_ops + - Counter + - Number of delete object operations + * - del_obj_bytes + - Counter + - Number of bytes deleted + * - del_obj_lat + - Guage + - Total latency of delete object operations + * - del_bucket_ops + - Counter + - Number of delete bucket operations + * - del_bucket_lat + - Guage + - Total latency of delete bucket operations + * - copy_obj_ops + - Counter + - Number of copy object operations + * - copy_obj_bytes + - Counter + - Number of bytes copied + * - copy_obj_lat + - Guage + - Total latency of copy object operations + * - list_object_ops + - Counter + - Number of list object operations + * - list_object_lat + - Guage + - Total latency of list object operations + * - list_bucket_ops + - Counter + - Number of list bucket operations + * - list_bucket_lat + - Guage + - Total latency of list bucket operations + +More information about op metrics can be seen in the ``rgw_op`` section of the output of the ``counter schema`` command. +To view op metrics in the Ceph Object Gateway go to the ``rgw_op`` section of the output of the ``counter dump`` command:: + + "rgw_op": [ + { + "labels": {}, + "counters": { + "put_ops": 2, + "put_b": 5327, + "put_initial_lat": { + "avgcount": 2, + "sum": 2.818064835, + "avgtime": 1.409032417 + }, + "get_ops": 5, + "get_b": 5325, + "get_initial_lat": { + "avgcount": 2, + "sum": 0.003000069, + "avgtime": 0.001500034 + }, + ... + "list_buckets_ops": 1, + "list_buckets_lat": { + "avgcount": 1, + "sum": 0.002300000, + "avgtime": 0.002300000 + } + } + }, + ] + +Op Metrics Labels +-------------------- + +Op metrics can also be tracked per-user or per-bucket. These metrics are exported to Prometheus with labels like Bucket = {name} or User = {userid}:: + + "rgw_op": [ + ... + { + "labels": { + "Bucket": "bucket1" + }, + "counters": { + "put_ops": 2, + "put_b": 5327, + "put_initial_lat": { + "avgcount": 2, + "sum": 2.818064835, + "avgtime": 1.409032417 + }, + "get_ops": 5, + "get_b": 5325, + "get_initial_lat": { + "avgcount": 2, + "sum": 0.003000069, + "avgtime": 0.001500034 + }, + ... + "list_buckets_ops": 1, + "list_buckets_lat": { + "avgcount": 1, + "sum": 0.002300000, + "avgtime": 0.002300000 + } + } + }, + ... + ] + +:ref:`rgw-multitenancy` allows to use buckets and users of the same name simultaneously. If a user or bucket lies under a tenant, a label for tenant in the form Tenant = {tenantid} is added to the metric. + +In a large system with many users and buckets, it may not be tractable to export all metrics to Prometheus. For that reason, the collection of these labeled metrics is disabled by default. + +Once enabled, the working set of tracked users and buckets is constrained to limit memory and database usage. As a result, the collection of these labeled metrics will not always be reliable. + + +User & Bucket Counter Caches +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To track op metrics by user the Ceph Object Gateway the config value ``rgw_user_counters_cache`` must be set to ``true``. + +To track op metrics by bucket the Ceph Object Gateway the config value ``rgw_bucket_counters_cache`` must be set to ``true``. + +These config values are set in Ceph via the command ``ceph config set client.rgw rgw_{user,bucket}_counters_cache true`` + +Since the op metrics are labeled perf counters, they live in memory. If the Ceph Object Gateway is restarted or crashes, all counters in the Ceph Object Gateway, whether in a cache or not, are lost. + +User & Bucket Counter Cache Size & Eviction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Both ``rgw_user_counters_cache_size`` and ``rgw_bucket_counters_cache_size`` can be used to set number of entries in each cache. + +Counters are evicted from a cache once the number of counters in the cache are greater than the cache size config variable. The counters that are evicted are the least recently used (LRU). + +For example if the number of buckets exceeded ``rgw_bucket_counters_cache_size`` by 1 and the counters with label ``bucket1`` were the last to be updated, the counters for ``bucket1`` would be evicted from the cache. If S3 operations tracked by the op metrics were done on ``bucket1`` after eviction, all of the metrics in the cache for ``bucket1`` would start at 0. + +Cache sizing can depend on a number of factors. These factors include: + +#. Number of users in the cluster +#. Number of buckets in the cluster +#. Memory usage of the Ceph Object Gateway +#. Disk and memory usage of Promtheus. + +To help calculate the Ceph Object Gateway's memory usage of a cache, it should be noted that each cache entry, encompassing all of the op metrics, is 1360 bytes. This is an estimate and subject to change if metrics are added or removed from the op metrics list. + +Sending Metrics to Prometheus +============================= + +To get metrics from a Ceph Object Gateway into the time series database Prometheus, the ceph-exporter daemon must be running and configured to scrape the Radogw's admin socket. + +The ceph-exporter daemon scrapes the Ceph Object Gateway's admin socket at a regular interval, defined by the config variable ``exporter_stats_period``. + +Prometheus has a configurable interval in which it scrapes the exporter (see: https://prometheus.io/docs/prometheus/latest/configuration/configuration/). + +Config Reference +================ +The following rgw op metrics related settings can be set via ``ceph config set client.rgw CONFIG_VARIABLE VALUE``. + +.. confval:: rgw_user_counters_cache +.. confval:: rgw_user_counters_cache_size +.. confval:: rgw_bucket_counters_cache +.. confval:: rgw_bucket_counters_cache_size + +The following are notable ceph-exporter related settings can be set via ``ceph config set global CONFIG_VARIABLE VALUE``. + +.. confval:: exporter_stats_period diff --git a/qa/suites/rgw/verify/overrides.yaml b/qa/suites/rgw/verify/overrides.yaml index eac70f30c9f39..a881ce5cbe281 100644 --- a/qa/suites/rgw/verify/overrides.yaml +++ b/qa/suites/rgw/verify/overrides.yaml @@ -9,6 +9,8 @@ overrides: rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= testkey-2=aWIKTWFrZWZpbGUKbWFuCm91dApzcmMKVGVzdGluZwo= rgw crypt require ssl: false rgw torrent flag: true + rgw user counters cache: true + rgw bucket counters cache: true rgw: compression type: random storage classes: LUKEWARM, FROZEN diff --git a/src/common/options/rgw.yaml.in b/src/common/options/rgw.yaml.in index f2f85b648f8bf..1dfb96228944a 100644 --- a/src/common/options/rgw.yaml.in +++ b/src/common/options/rgw.yaml.in @@ -3829,26 +3829,8 @@ options: other form of policies that Amazon does, so if you are mirroring policies between RGW and AWS, you may wish to set this to false. default: true -- name: rgw_perf_counters_cache - type: bool - level: dev - default: false - desc: enable rgw labeled perf counters cache - long desc: If set to true, rgw creates labeled perf counters and stores them - in an rgw specific labeled perf counters cache. - see_also: - - rgw_perf_counters_cache_size - services: - - rgw - with_legacy: true -- name: rgw_perf_counters_cache_size - type: uint - level: advanced - desc: Number of labeled perf counters the rgw perf counters cache can store - default: 10000 services: - rgw - with_legacy: true - name: rgw_d4n_host type: str level: advanced @@ -3914,3 +3896,47 @@ options: services: - rgw with_legacy: true +- name: rgw_user_counters_cache + type: bool + level: dev + default: false + desc: enable a rgw perf counters cache for counters with user label + long desc: If set to true, rgw creates perf counters with a label for the user and stores them + in a perf counters cache. This perf counters cache contains only perf counters labeled by user. + see_also: + - rgw_user_counters_cache_size + services: + - rgw + with_legacy: true +- name: rgw_user_counters_cache_size + type: uint + level: advanced + desc: Number of labeled perf counters the user perf counters cache can store + default: 10000 + services: + - rgw + see_also: + - rgw_user_counters_cache + with_legacy: true +- name: rgw_bucket_counters_cache + type: bool + level: dev + default: false + desc: enable a rgw perf counters cache for counters with bucket label + long desc: If set to true, rgw creates perf counters with a label for the bucket and stores them + in a perf counters cache. This perf counters cache contains only perf counters labeled by bucket. + see_also: + - rgw_bucket_counters_cache_size + services: + - rgw + with_legacy: true +- name: rgw_bucket_counters_cache_size + type: uint + level: advanced + desc: Number of labeled perf counters the bucket perf counters cache can store + default: 10000 + services: + - rgw + see_also: + - rgw_bucket_counters_cache + with_legacy: true diff --git a/src/common/perf_counters.cc b/src/common/perf_counters.cc index 81bf3284bdaeb..b5e361b505cd3 100644 --- a/src/common/perf_counters.cc +++ b/src/common/perf_counters.cc @@ -135,12 +135,7 @@ void PerfCountersCollectionImpl::dump_formatted_generic( const std::string &counter) const { f->open_object_section("perfcounter_collection"); - // close out all of counters collection immediately if collection is empty - if (m_loggers.empty()) { - f->close_section(); // all of counters collection - return; - } - + if (dump_labeled) { std::string prev_key_name; for (auto l = m_loggers.begin(); l != m_loggers.end(); ++l) { diff --git a/src/common/perf_counters_cache.cc b/src/common/perf_counters_cache.cc index e0810508ce7f7..946b5f5b8edb4 100644 --- a/src/common/perf_counters_cache.cc +++ b/src/common/perf_counters_cache.cc @@ -5,17 +5,16 @@ namespace ceph::perf_counters { void PerfCountersCache::check_key(const std::string &key) { std::string_view key_name = ceph::perf_counters::key_name(key); - // return false for empty key name + // don't accept an empty key name assert(key_name != ""); - // if there are no labels key name is not valid + // if there are no labels, key name is not valid auto key_labels = ceph::perf_counters::key_labels(key); assert(key_labels.begin() != key_labels.end()); - // don't accept keys where any labels have an empty label name + // don't accept keys where any labels in the key have an empty key name for (auto key_label : key_labels) { assert(key_label.first != ""); - assert(key_label.second != ""); } } diff --git a/src/exporter/DaemonMetricCollector.cc b/src/exporter/DaemonMetricCollector.cc index 0ce5419a77647..23a0dd550f1c9 100644 --- a/src/exporter/DaemonMetricCollector.cc +++ b/src/exporter/DaemonMetricCollector.cc @@ -160,8 +160,9 @@ void DaemonMetricCollector::dump_asok_metrics() { labels.insert(multisite_labels_and_name.first.begin(), multisite_labels_and_name.first.end()); counter_name = multisite_labels_and_name.second; } - if (counters_values.find(counter_name_init) != counters_values.end()) { - auto perf_values = counters_values.at(counter_name_init); + auto counters_values_itr = counters_values.find(counter_name_init); + if (counters_values_itr != counters_values.end()) { + auto perf_values = counters_values_itr->value(); dump_asok_metric(counter_group, perf_values, counter_name, labels); } } diff --git a/src/rgw/rgw_file.cc b/src/rgw/rgw_file.cc index 92aa66f060eb4..ca149ad836b89 100644 --- a/src/rgw/rgw_file.cc +++ b/src/rgw/rgw_file.cc @@ -1838,7 +1838,8 @@ namespace rgw { ceph_assert(! dlo_manifest); ceph_assert(! slo_info); - rgw::op_counters::global_op_counters->inc(l_rgw_op_put); + counters = rgw::op_counters::get(state); + rgw::op_counters::inc(counters, l_rgw_op_put, 1); op_ret = -EINVAL; if (state->object->empty()) { @@ -1944,7 +1945,7 @@ namespace rgw { real_time appx_t = real_clock::now(); state->obj_size = bytes_written; - rgw::op_counters::global_op_counters->inc(l_rgw_op_put_b, state->obj_size); + rgw::op_counters::inc(counters, l_rgw_op_put_b, state->obj_size); // flush data in filters op_ret = filter->process({}, state->obj_size); @@ -2027,7 +2028,7 @@ namespace rgw { } done: - rgw::op_counters::global_op_counters->tinc(l_rgw_op_put_lat, state->time_elapsed()); + rgw::op_counters::tinc(counters, l_rgw_op_put_lat, state->time_elapsed()); return op_ret; } /* exec_finish */ diff --git a/src/rgw/rgw_file_int.h b/src/rgw/rgw_file_int.h index 6ecd4b2447da7..91c858e5b3bdd 100644 --- a/src/rgw/rgw_file_int.h +++ b/src/rgw/rgw_file_int.h @@ -36,6 +36,7 @@ #include "rgw_putobj_processor.h" #include "rgw_aio_throttle.h" #include "rgw_compression.h" +#include "rgw_perf_counters.h" /* XXX @@ -2485,6 +2486,7 @@ public: off_t real_ofs; size_t bytes_written; bool eio; + rgw::op_counters::CountersContainer counters; RGWWriteRequest(rgw::sal::Driver* driver, const RGWProcessEnv& penv, std::unique_ptr _user, diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index cd347502e76c5..29a1f36c0273d 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -1692,8 +1692,8 @@ int RGWGetObj::read_user_manifest_part(rgw::sal::Bucket* bucket, return 0; } - auto labeled_counters = rgw::op_counters::get({{"Bucket", s->bucket_name}, {"User", s->user->get_id().id}}); - rgw::op_counters::inc(labeled_counters, l_rgw_op_get_b, cur_end - cur_ofs); + auto counters = rgw::op_counters::get(s); + rgw::op_counters::inc(counters, l_rgw_op_get_b, cur_end - cur_ofs); filter->fixup_range(cur_ofs, cur_end); op_ret = read_op->iterate(this, cur_ofs, cur_end, filter, s->yield); if (op_ret >= 0) @@ -1766,7 +1766,8 @@ static int iterate_user_manifest_parts(const DoutPrefixProvider *dpp, found_end = true; } - rgw::op_counters::global_op_counters->tinc(l_rgw_op_get_lat, + rgw::op_counters::CountersContainer counters; + rgw::op_counters::tinc(counters, l_rgw_op_get_lat, (ceph_clock_now() - start_time)); if (found_start && !handled_end) { @@ -1862,8 +1863,9 @@ static int iterate_slo_parts(const DoutPrefixProvider *dpp, found_end = true; } - rgw::op_counters::global_op_counters->tinc(l_rgw_op_get_lat, - (ceph_clock_now() - start_time)); + rgw::op_counters::CountersContainer counters; + rgw::op_counters::tinc(counters, l_rgw_op_get_lat, + (ceph_clock_now() - start_time)); if (found_start) { if (cb) { @@ -2210,8 +2212,8 @@ void RGWGetObj::execute(optional_yield y) std::unique_ptr run_lua; map::iterator attr_iter; - auto labeled_counters = rgw::op_counters::get({{"Bucket", s->bucket_name}, {"User", s->user->get_id().id}}); - rgw::op_counters::inc(labeled_counters, l_rgw_op_get, 1); + auto counters = rgw::op_counters::get(s); + rgw::op_counters::inc(counters, l_rgw_op_get, 1); std::unique_ptr read_op(s->object->get_read_op()); @@ -2409,14 +2411,14 @@ void RGWGetObj::execute(optional_yield y) return; } - rgw::op_counters::inc(labeled_counters, l_rgw_op_get_b, end-ofs); + rgw::op_counters::inc(counters, l_rgw_op_get_b, end-ofs); op_ret = read_op->iterate(this, ofs_x, end_x, filter, s->yield); if (op_ret >= 0) op_ret = filter->flush(); - rgw::op_counters::tinc(labeled_counters, l_rgw_op_get_lat, s->time_elapsed()); + rgw::op_counters::tinc(counters, l_rgw_op_get_lat, s->time_elapsed()); if (op_ret < 0) { goto done_err; @@ -2493,8 +2495,8 @@ void RGWListBuckets::execute(optional_yield y) const uint64_t max_buckets = s->cct->_conf->rgw_list_buckets_max_chunk; - auto labeled_counters = rgw::op_counters::get({{"User", s->user->get_id().id}}); - rgw::op_counters::inc(labeled_counters, l_rgw_op_list_buckets, 1); + auto counters = rgw::op_counters::get(s); + rgw::op_counters::inc(counters, l_rgw_op_list_buckets, 1); auto g = make_scope_guard([this, &started] { if (!started) { @@ -2573,7 +2575,7 @@ void RGWListBuckets::execute(optional_yield y) handle_listing_chunk(listing.buckets); } while (!marker.empty() && !done); - rgw::op_counters::tinc(labeled_counters, l_rgw_op_list_buckets_lat, s->time_elapsed()); + rgw::op_counters::tinc(counters, l_rgw_op_list_buckets_lat, s->time_elapsed()); } void RGWGetUsage::execute(optional_yield y) @@ -3059,9 +3061,9 @@ void RGWListBucket::execute(optional_yield y) common_prefixes = std::move(results.common_prefixes); } - auto labeled_counters = rgw::op_counters::get({{"Bucket", s->bucket_name}, {"User", s->user->get_id().id}}); - rgw::op_counters::inc(labeled_counters, l_rgw_op_list_obj, 1); - rgw::op_counters::tinc(labeled_counters, l_rgw_op_list_obj_lat, s->time_elapsed()); + auto counters = rgw::op_counters::get(s); + rgw::op_counters::inc(counters, l_rgw_op_list_obj, 1); + rgw::op_counters::tinc(counters, l_rgw_op_list_obj_lat, s->time_elapsed()); } int RGWGetBucketLogging::verify_permission(optional_yield y) @@ -3595,9 +3597,9 @@ void RGWDeleteBucket::execute(optional_yield y) op_ret = 0; } - auto labeled_counters = rgw::op_counters::get({{"Bucket", s->bucket_name}, {"User", s->user->get_id().id}}); - rgw::op_counters::inc(labeled_counters, l_rgw_op_del_bucket, 1); - rgw::op_counters::tinc(labeled_counters, l_rgw_op_del_bucket_lat, s->time_elapsed()); + auto counters = rgw::op_counters::get(s); + rgw::op_counters::inc(counters, l_rgw_op_del_bucket, 1); + rgw::op_counters::tinc(counters, l_rgw_op_del_bucket_lat, s->time_elapsed()); return; } @@ -4025,14 +4027,14 @@ void RGWPutObj::execute(optional_yield y) off_t fst; off_t lst; - auto labeled_counters = rgw::op_counters::get({{"Bucket", s->bucket_name}, {"User", s->user->get_id().id}}); + auto counters = rgw::op_counters::get(s); bool need_calc_md5 = (dlo_manifest == NULL) && (slo_info == NULL); - rgw::op_counters::inc(labeled_counters, l_rgw_op_put, 1); + rgw::op_counters::inc(counters, l_rgw_op_put, 1); // report latency on return auto put_lat = make_scope_guard([&] { - rgw::op_counters::tinc(labeled_counters, l_rgw_op_put_lat, s->time_elapsed()); + rgw::op_counters::tinc(counters, l_rgw_op_put_lat, s->time_elapsed()); }); op_ret = -EINVAL; @@ -4307,7 +4309,7 @@ void RGWPutObj::execute(optional_yield y) s->obj_size = ofs; s->object->set_obj_size(ofs); - rgw::op_counters::inc(labeled_counters, l_rgw_op_put_b, s->obj_size); + rgw::op_counters::inc(counters, l_rgw_op_put_b, s->obj_size); op_ret = do_aws4_auth_completion(); if (op_ret < 0) { @@ -5264,10 +5266,10 @@ void RGWDeleteObj::execute(optional_yield y) op_ret = 0; } - auto labeled_counters = rgw::op_counters::get({{"Bucket", s->bucket_name}, {"User", s->user->get_id().id}}); - rgw::op_counters::inc(labeled_counters, l_rgw_op_del_obj, 1); - rgw::op_counters::inc(labeled_counters, l_rgw_op_del_obj_b, obj_size); - rgw::op_counters::tinc(labeled_counters, l_rgw_op_del_obj_lat, s->time_elapsed()); + auto counters = rgw::op_counters::get(s); + rgw::op_counters::inc(counters, l_rgw_op_del_obj, 1); + rgw::op_counters::inc(counters, l_rgw_op_del_obj_b, obj_size); + rgw::op_counters::tinc(counters, l_rgw_op_del_obj_lat, s->time_elapsed()); // send request to notification manager int ret = res->publish_commit(this, obj_size, ceph::real_clock::now(), etag, version_id); @@ -5729,10 +5731,10 @@ void RGWCopyObj::execute(optional_yield y) // too late to rollback operation, hence op_ret is not set here } - auto labeled_counters = rgw::op_counters::get({{"Bucket", s->bucket_name}, {"User", s->user->get_id().id}}); - rgw::op_counters::inc(labeled_counters, l_rgw_op_copy_obj, 1); - rgw::op_counters::inc(labeled_counters, l_rgw_op_copy_obj_b, obj_size); - rgw::op_counters::tinc(labeled_counters, l_rgw_op_copy_obj_lat, s->time_elapsed()); + auto counters = rgw::op_counters::get(s); + rgw::op_counters::inc(counters, l_rgw_op_copy_obj, 1); + rgw::op_counters::inc(counters, l_rgw_op_copy_obj_b, obj_size); + rgw::op_counters::tinc(counters, l_rgw_op_copy_obj_lat, s->time_elapsed()); } int RGWGetACLs::verify_permission(optional_yield y) diff --git a/src/rgw/rgw_perf_counters.cc b/src/rgw/rgw_perf_counters.cc index aca56a60946d2..f2d245c27d2ed 100644 --- a/src/rgw/rgw_perf_counters.cc +++ b/src/rgw/rgw_perf_counters.cc @@ -5,12 +5,14 @@ #include "common/perf_counters.h" #include "common/perf_counters_key.h" #include "common/ceph_context.h" +#include "rgw_sal.h" + +using namespace ceph::perf_counters; +using namespace rgw::op_counters; PerfCounters *perfcounter = NULL; -ceph::perf_counters::PerfCountersCache *perf_counters_cache = NULL; -std::string rgw_op_counters_key = "rgw_op"; -static void add_rgw_frontend_counters(PerfCountersBuilder *pcb) { +void add_rgw_frontend_counters(PerfCountersBuilder *pcb) { // RGW emits comparatively few metrics, so let's be generous // and mark them all USEFUL to get transmission to ceph-mgr by default. pcb->set_prio_default(PerfCountersBuilder::PRIO_USEFUL); @@ -58,7 +60,7 @@ static void add_rgw_frontend_counters(PerfCountersBuilder *pcb) { pcb->add_u64(l_rgw_lua_current_vms, "lua_current_vms", "Number of Lua VMs currently being executed"); } -static void add_rgw_op_counters(PerfCountersBuilder *lpcb) { +void add_rgw_op_counters(PerfCountersBuilder *lpcb) { // description must match general rgw counters description above lpcb->set_prio_default(PerfCountersBuilder::PRIO_USEFUL); @@ -88,23 +90,6 @@ static void add_rgw_op_counters(PerfCountersBuilder *lpcb) { lpcb->add_time_avg(l_rgw_op_list_buckets_lat, "list_buckets_lat", "List buckets latency"); } -std::shared_ptr create_rgw_counters(const std::string& name, CephContext *cct) { - std::string_view key = ceph::perf_counters::key_name(name); - if (rgw_op_counters_key.compare(key) == 0) { - PerfCountersBuilder pcb(cct, name, l_rgw_op_first, l_rgw_op_last); - add_rgw_op_counters(&pcb); - std::shared_ptr new_counters(pcb.create_perf_counters()); - cct->get_perfcounters_collection()->add(new_counters.get()); - return new_counters; - } else { - PerfCountersBuilder pcb(cct, name, l_rgw_first, l_rgw_last); - add_rgw_frontend_counters(&pcb); - std::shared_ptr new_counters(pcb.create_perf_counters()); - cct->get_perfcounters_collection()->add(new_counters.get()); - return new_counters; - } -} - void frontend_counters_init(CephContext *cct) { PerfCountersBuilder pcb(cct, "rgw", l_rgw_first, l_rgw_last); add_rgw_frontend_counters(&pcb); @@ -115,7 +100,20 @@ void frontend_counters_init(CephContext *cct) { namespace rgw::op_counters { +ceph::perf_counters::PerfCountersCache *user_counters_cache = NULL; +ceph::perf_counters::PerfCountersCache *bucket_counters_cache = NULL; PerfCounters *global_op_counters = NULL; +const std::string rgw_op_counters_key = "rgw_op"; + +std::shared_ptr create_rgw_op_counters(const std::string& name, CephContext *cct) { + std::string_view key = ceph::perf_counters::key_name(name); + ceph_assert(rgw_op_counters_key == key); + PerfCountersBuilder pcb(cct, name, l_rgw_op_first, l_rgw_op_last); + add_rgw_op_counters(&pcb); + std::shared_ptr new_counters(pcb.create_perf_counters()); + cct->get_perfcounters_collection()->add(new_counters.get()); + return new_counters; +} void global_op_counters_init(CephContext *cct) { PerfCountersBuilder pcb(cct, rgw_op_counters_key, l_rgw_op_first, l_rgw_op_last); @@ -125,30 +123,67 @@ void global_op_counters_init(CephContext *cct) { global_op_counters = new_counters; } -void inc(std::shared_ptr labeled_counters, int idx, uint64_t v) { - if (labeled_counters) { - PerfCounters *counter = labeled_counters.get(); - counter->inc(idx, v); +CountersContainer get(req_state *s) { + CountersContainer counters; + std::string key; + + if (user_counters_cache && !s->user->get_id().id.empty()) { + if (s->user->get_tenant().empty()) { + key = std::move(ceph::perf_counters::key_create(rgw_op_counters_key, {{"User", s->user->get_id().id}})); + } else { + key = std::move(ceph::perf_counters::key_create(rgw_op_counters_key, {{"User", s->user->get_id().id}, {"Tenant", s->user->get_tenant()}})); + } + counters.user_counters = user_counters_cache->get(key); + } + + if (bucket_counters_cache && !s->bucket_name.empty()) { + if (s->bucket_tenant.empty()) { + key = std::move(ceph::perf_counters::key_create(rgw_op_counters_key, {{"Bucket", s->bucket_name}})); + } else { + key = std::move(ceph::perf_counters::key_create(rgw_op_counters_key, {{"Bucket", s->bucket_name}, {"Tenant", s->bucket_tenant}})); + } + counters.bucket_counters = bucket_counters_cache->get(key); + } + + return counters; +} + +void inc(const CountersContainer &counters, int idx, uint64_t v) { + if (counters.user_counters) { + PerfCounters *user_counters = counters.user_counters.get(); + user_counters->inc(idx, v); + } + if (counters.bucket_counters) { + PerfCounters *bucket_counters = counters.bucket_counters.get(); + bucket_counters->inc(idx, v); } if (global_op_counters) { global_op_counters->inc(idx, v); } } -void tinc(std::shared_ptr labeled_counters, int idx, utime_t amt) { - if (labeled_counters) { - PerfCounters *counter = labeled_counters.get(); - counter->tinc(idx, amt); +void tinc(const CountersContainer &counters, int idx, utime_t amt) { + if (counters.user_counters) { + PerfCounters *user_counters = counters.user_counters.get(); + user_counters->tinc(idx, amt); + } + if (counters.bucket_counters) { + PerfCounters *bucket_counters = counters.bucket_counters.get(); + bucket_counters->tinc(idx, amt); } if (global_op_counters) { global_op_counters->tinc(idx, amt); } } -void tinc(std::shared_ptr labeled_counters, int idx, ceph::timespan amt) { - if (labeled_counters) { - PerfCounters *counter = labeled_counters.get(); - counter->tinc(idx, amt); +void tinc(const CountersContainer &counters, int idx, ceph::timespan amt) { + if (counters.user_counters) { + PerfCounters *user_counters = counters.user_counters.get(); + user_counters->tinc(idx, amt); + } + if (counters.bucket_counters) { + PerfCounters *bucket_counters = counters.bucket_counters.get(); + bucket_counters->tinc(idx, amt); } if (global_op_counters) { global_op_counters->tinc(idx, amt); @@ -161,13 +196,19 @@ int rgw_perf_start(CephContext *cct) { frontend_counters_init(cct); - bool cache_enabled = cct->_conf.get_val("rgw_perf_counters_cache"); - if (cache_enabled) { - uint64_t target_size = cct->_conf.get_val("rgw_perf_counters_cache_size"); - perf_counters_cache = new ceph::perf_counters::PerfCountersCache(cct, target_size, create_rgw_counters); + bool user_counters_cache_enabled = cct->_conf.get_val("rgw_user_counters_cache"); + if (user_counters_cache_enabled) { + uint64_t target_size = cct->_conf.get_val("rgw_user_counters_cache_size"); + user_counters_cache = new PerfCountersCache(cct, target_size, create_rgw_op_counters); + } + + bool bucket_counters_cache_enabled = cct->_conf.get_val("rgw_bucket_counters_cache"); + if (bucket_counters_cache_enabled) { + uint64_t target_size = cct->_conf.get_val("rgw_bucket_counters_cache_size"); + bucket_counters_cache = new PerfCountersCache(cct, target_size, create_rgw_op_counters); } - rgw::op_counters::global_op_counters_init(cct); + global_op_counters_init(cct); return 0; } @@ -176,5 +217,9 @@ void rgw_perf_stop(CephContext *cct) ceph_assert(perfcounter); cct->get_perfcounters_collection()->remove(perfcounter); delete perfcounter; - delete perf_counters_cache; + ceph_assert(global_op_counters); + cct->get_perfcounters_collection()->remove(global_op_counters); + delete global_op_counters; + delete user_counters_cache; + delete bucket_counters_cache; } diff --git a/src/rgw/rgw_perf_counters.h b/src/rgw/rgw_perf_counters.h index 49f7e4d4218b4..e9068f4c9ff21 100644 --- a/src/rgw/rgw_perf_counters.h +++ b/src/rgw/rgw_perf_counters.h @@ -4,17 +4,13 @@ #pragma once #include "include/common_fwd.h" +#include "rgw_common.h" #include "common/perf_counters_cache.h" #include "common/perf_counters_key.h" extern PerfCounters *perfcounter; -extern ceph::perf_counters::PerfCountersCache *perf_counters_cache; -extern std::string rgw_op_counters_key; - extern int rgw_perf_start(CephContext *cct); extern void rgw_perf_stop(CephContext *cct); -extern void frontend_counters_init(CephContext *cct); -extern std::shared_ptr create_rgw_counters(const std::string& name, CephContext *cct); enum { l_rgw_first = 15000, @@ -89,24 +85,17 @@ enum { namespace rgw::op_counters { -extern PerfCounters *global_op_counters; - -void global_op_counters_init(CephContext *cct); +struct CountersContainer { + std::shared_ptr user_counters; + std::shared_ptr bucket_counters; +}; -template -std::shared_ptr get(ceph::perf_counters::label_pair (&&labels)[Count]) { - if (perf_counters_cache) { - std::string key = ceph::perf_counters::key_create(rgw_op_counters_key, std::move(labels)); - return perf_counters_cache->get(key); - } else { - return std::shared_ptr(nullptr); - } -} +CountersContainer get(req_state *s); -void inc(std::shared_ptr labeled_counters, int idx, uint64_t v); +void inc(const CountersContainer &counters, int idx, uint64_t v); -void tinc(std::shared_ptr labeled_counters, int idx, utime_t); +void tinc(const CountersContainer &counters, int idx, utime_t); -void tinc(std::shared_ptr labeled_counters, int idx, ceph::timespan amt); +void tinc(const CountersContainer &counters, int idx, ceph::timespan amt); } // namespace rgw::op_counters diff --git a/src/rgw/vstart.sh.swift b/src/rgw/vstart.sh.swift new file mode 100755 index 0000000000000..46e46da0e4a5a --- /dev/null +++ b/src/rgw/vstart.sh.swift @@ -0,0 +1,1930 @@ +#!/usr/bin/env bash +# -*- mode:sh; tab-width:4; sh-basic-offset:4; indent-tabs-mode:nil -*- +# vim: softtabstop=4 shiftwidth=4 expandtab + +# abort on failure +set -e + +quoted_print() { + for s in "$@"; do + if [[ "$s" =~ \ ]]; then + printf -- "'%s' " "$s" + else + printf -- "$s " + fi + done + printf '\n' +} + +debug() { + "$@" >&2 +} + +prunb() { + debug quoted_print "$@" '&' + PATH=$CEPH_BIN:$PATH "$@" & +} + +prun() { + debug quoted_print "$@" + PATH=$CEPH_BIN:$PATH "$@" +} + + +if [ -n "$VSTART_DEST" ]; then + SRC_PATH=`dirname $0` + SRC_PATH=`(cd $SRC_PATH; pwd)` + + CEPH_DIR=$SRC_PATH + CEPH_BIN=${CEPH_BIN:-${PWD}/bin} + CEPH_LIB=${CEPH_LIB:-${PWD}/lib} + + CEPH_CONF_PATH=$VSTART_DEST + CEPH_DEV_DIR=$VSTART_DEST/dev + CEPH_OUT_DIR=$VSTART_DEST/out + CEPH_ASOK_DIR=$VSTART_DEST/asok + CEPH_OUT_CLIENT_DIR=${CEPH_OUT_CLIENT_DIR:-$CEPH_OUT_DIR} +fi + +get_cmake_variable() { + local variable=$1 + grep "${variable}:" CMakeCache.txt | cut -d "=" -f 2 +} + +# for running out of the CMake build directory +if [ -e CMakeCache.txt ]; then + # Out of tree build, learn source location from CMakeCache.txt + CEPH_ROOT=$(get_cmake_variable ceph_SOURCE_DIR) + CEPH_BUILD_DIR=`pwd` + [ -z "$MGR_PYTHON_PATH" ] && MGR_PYTHON_PATH=$CEPH_ROOT/src/pybind/mgr +fi + +# use CEPH_BUILD_ROOT to vstart from a 'make install' +if [ -n "$CEPH_BUILD_ROOT" ]; then + [ -z "$CEPH_BIN" ] && CEPH_BIN=$CEPH_BUILD_ROOT/bin + [ -z "$CEPH_LIB" ] && CEPH_LIB=$CEPH_BUILD_ROOT/lib + [ -z "$CEPH_EXT_LIB" ] && CEPH_EXT_LIB=$CEPH_BUILD_ROOT/external/lib + [ -z "$EC_PATH" ] && EC_PATH=$CEPH_LIB/erasure-code + [ -z "$OBJCLASS_PATH" ] && OBJCLASS_PATH=$CEPH_LIB/rados-classes + # make install should install python extensions into PYTHONPATH +elif [ -n "$CEPH_ROOT" ]; then + [ -z "$CEPHFS_SHELL" ] && CEPHFS_SHELL=$CEPH_ROOT/src/tools/cephfs/shell/cephfs-shell + [ -z "$PYBIND" ] && PYBIND=$CEPH_ROOT/src/pybind + [ -z "$CEPH_BIN" ] && CEPH_BIN=$CEPH_BUILD_DIR/bin + [ -z "$CEPH_ADM" ] && CEPH_ADM=$CEPH_BIN/ceph + [ -z "$INIT_CEPH" ] && INIT_CEPH=$CEPH_BIN/init-ceph + [ -z "$CEPH_LIB" ] && CEPH_LIB=$CEPH_BUILD_DIR/lib + [ -z "$CEPH_EXT_LIB" ] && CEPH_EXT_LIB=$CEPH_BUILD_DIR/external/lib + [ -z "$OBJCLASS_PATH" ] && OBJCLASS_PATH=$CEPH_LIB + [ -z "$EC_PATH" ] && EC_PATH=$CEPH_LIB + [ -z "$CEPH_PYTHON_COMMON" ] && CEPH_PYTHON_COMMON=$CEPH_ROOT/src/python-common +fi + +if [ -z "${CEPH_VSTART_WRAPPER}" ]; then + PATH=$(pwd):$PATH +fi + +[ -z "$PYBIND" ] && PYBIND=./pybind + +[ -n "$CEPH_PYTHON_COMMON" ] && CEPH_PYTHON_COMMON="$CEPH_PYTHON_COMMON:" +CYTHON_PYTHONPATH="$CEPH_LIB/cython_modules/lib.3" +export PYTHONPATH=$PYBIND:$CYTHON_PYTHONPATH:$CEPH_PYTHON_COMMON$PYTHONPATH + +export LD_LIBRARY_PATH=$CEPH_LIB:$CEPH_EXT_LIB:$LD_LIBRARY_PATH +export DYLD_LIBRARY_PATH=$CEPH_LIB:$CEPH_EXT_LIB:$DYLD_LIBRARY_PATH +# Suppress logging for regular use that indicated that we are using a +# development version. vstart.sh is only used during testing and +# development +export CEPH_DEV=1 + +[ -z "$CEPH_NUM_MON" ] && CEPH_NUM_MON="$MON" +[ -z "$CEPH_NUM_OSD" ] && CEPH_NUM_OSD="$OSD" +[ -z "$CEPH_NUM_MDS" ] && CEPH_NUM_MDS="$MDS" +[ -z "$CEPH_NUM_MGR" ] && CEPH_NUM_MGR="$MGR" +[ -z "$CEPH_NUM_FS" ] && CEPH_NUM_FS="$FS" +[ -z "$CEPH_NUM_RGW" ] && CEPH_NUM_RGW="$RGW" +[ -z "$GANESHA_DAEMON_NUM" ] && GANESHA_DAEMON_NUM="$NFS" + +# if none of the CEPH_NUM_* number is specified, kill the existing +# cluster. +if [ -z "$CEPH_NUM_MON" -a \ + -z "$CEPH_NUM_OSD" -a \ + -z "$CEPH_NUM_MDS" -a \ + -z "$CEPH_NUM_MGR" -a \ + -z "$GANESHA_DAEMON_NUM" ]; then + kill_all=1 +else + kill_all=0 +fi + +[ -z "$CEPH_NUM_MON" ] && CEPH_NUM_MON=3 +[ -z "$CEPH_NUM_OSD" ] && CEPH_NUM_OSD=3 +[ -z "$CEPH_NUM_MDS" ] && CEPH_NUM_MDS=3 +[ -z "$CEPH_NUM_MGR" ] && CEPH_NUM_MGR=1 +[ -z "$CEPH_NUM_FS" ] && CEPH_NUM_FS=1 +[ -z "$CEPH_MAX_MDS" ] && CEPH_MAX_MDS=1 +[ -z "$CEPH_NUM_RGW" ] && CEPH_NUM_RGW=0 +[ -z "$GANESHA_DAEMON_NUM" ] && GANESHA_DAEMON_NUM=0 + +[ -z "$CEPH_DIR" ] && CEPH_DIR="$PWD" +[ -z "$CEPH_DEV_DIR" ] && CEPH_DEV_DIR="$CEPH_DIR/dev" +[ -z "$CEPH_OUT_DIR" ] && CEPH_OUT_DIR="$CEPH_DIR/out" +[ -z "$CEPH_ASOK_DIR" ] && CEPH_ASOK_DIR="$CEPH_DIR/asok" +[ -z "$CEPH_RGW_PORT" ] && CEPH_RGW_PORT=8000 +[ -z "$CEPH_CONF_PATH" ] && CEPH_CONF_PATH=$CEPH_DIR +CEPH_OUT_CLIENT_DIR=${CEPH_OUT_CLIENT_DIR:-$CEPH_OUT_DIR} + +if [ $CEPH_NUM_OSD -gt 3 ]; then + OSD_POOL_DEFAULT_SIZE=3 +else + OSD_POOL_DEFAULT_SIZE=$CEPH_NUM_OSD +fi + +extra_conf="" +new=0 +standby=0 +debug=0 +trace=0 +ip="" +nodaemon=0 +redirect=0 +smallmds=0 +short=0 +crimson=0 +ec=0 +cephadm=0 +parallel=true +restart=1 +hitset="" +overwrite_conf=0 +cephx=1 #turn cephx on by default +gssapi_authx=0 +cache="" +if [ `uname` = FreeBSD ]; then + objectstore="memstore" +else + objectstore="bluestore" +fi +ceph_osd=ceph-osd +rgw_frontend="beast prefix=/swift" +rgw_compression="" +lockdep=${LOCKDEP:-1} +spdk_enabled=0 # disable SPDK by default +pmem_enabled=0 +zoned_enabled=0 +io_uring_enabled=0 +with_jaeger=0 + +with_mgr_dashboard=true +if [[ "$(get_cmake_variable WITH_MGR_DASHBOARD_FRONTEND)" != "ON" ]] || + [[ "$(get_cmake_variable WITH_RBD)" != "ON" ]]; then + debug echo "ceph-mgr dashboard not built - disabling." + with_mgr_dashboard=false +fi +with_mgr_restful=false + +kstore_path= +declare -a block_devs +declare -a bluestore_db_devs +declare -a bluestore_wal_devs +declare -a secondary_block_devs +secondary_block_devs_type="SSD" + +VSTART_SEC="client.vstart.sh" + +MON_ADDR="" +DASH_URLS="" +RESTFUL_URLS="" + +conf_fn="$CEPH_CONF_PATH/ceph.conf" +keyring_fn="$CEPH_CONF_PATH/keyring" +monmap_fn="/tmp/ceph_monmap.$$" +inc_osd_num=0 + +msgr="21" + +read -r -d '' usage <: bind to specific ip + -n, --new + --valgrind[_{osd,mds,mon,rgw}] 'toolname args...' + --nodaemon: use ceph-run as wrapper for mon/osd/mds + --redirect-output: only useful with nodaemon, directs output to log file + --smallmds: limit mds cache memory limit + -m ip:port specify monitor address + -k keep old configuration files (default) + -x enable cephx (on by default) + -X disable cephx + -g --gssapi enable Kerberos/GSSApi authentication + -G disable Kerberos/GSSApi authentication + --hitset : enable hitset tracking + -e : create an erasure pool + -o config add extra config parameters to all sections + --rgw_port specify ceph rgw http listen port + --rgw_frontend specify the rgw frontend configuration + --rgw_arrow_flight start arrow flight frontend + --rgw_compression specify the rgw compression plugin + --seastore use seastore as crimson osd backend + -b, --bluestore use bluestore as the osd objectstore backend (default) + -K, --kstore use kstore as the osd objectstore backend + --cyanstore use cyanstore as the osd objectstore backend + --memstore use memstore as the osd objectstore backend + --cache : enable cache tiering on pool + --short: short object names only; necessary for ext4 dev + --nolockdep disable lockdep + --multimds allow multimds with maximum active count + --without-dashboard: do not run using mgr dashboard + --bluestore-spdk: enable SPDK and with a comma-delimited list of PCI-IDs of NVME device (e.g, 0000:81:00.0) + --bluestore-pmem: enable PMEM and with path to a file mapped to PMEM + --msgr1: use msgr1 only + --msgr2: use msgr2 only + --msgr21: use msgr2 and msgr1 + --crimson: use crimson-osd instead of ceph-osd + --crimson-foreground: use crimson-osd, but run it in the foreground + --osd-args: specify any extra osd specific options + --bluestore-devs: comma-separated list of blockdevs to use for bluestore + --bluestore-db-devs: comma-separated list of db-devs to use for bluestore + --bluestore-wal-devs: comma-separated list of wal-devs to use for bluestore + --bluestore-zoned: blockdevs listed by --bluestore-devs are zoned devices (HM-SMR HDD or ZNS SSD) + --bluestore-io-uring: enable io_uring backend + --inc-osd: append some more osds into existing vcluster + --cephadm: enable cephadm orchestrator with ~/.ssh/id_rsa[.pub] + --no-parallel: dont start all OSDs in parallel + --no-restart: dont restart process when using ceph-run + --jaeger: use jaegertracing for tracing + --seastore-devs: comma-separated list of blockdevs to use for seastore + --seastore-secondary-devs: comma-separated list of secondary blockdevs to use for seastore + --seastore-secondary-devs-type: device type of all secondary blockdevs. HDD, SSD(default), ZNS or RANDOM_BLOCK_SSD + --crimson-smp: number of cores to use for crimson +\n +EOF + +usage_exit() { + printf "$usage" + exit +} + +parse_block_devs() { + local opt_name=$1 + shift + local devs=$1 + shift + local dev + IFS=',' read -r -a block_devs <<< "$devs" + for dev in "${block_devs[@]}"; do + if [ ! -b $dev ] || [ ! -w $dev ]; then + echo "All $opt_name must refer to writable block devices" + exit 1 + fi + done +} + +parse_bluestore_db_devs() { + local opt_name=$1 + shift + local devs=$1 + shift + local dev + IFS=',' read -r -a bluestore_db_devs <<< "$devs" + for dev in "${bluestore_db_devs[@]}"; do + if [ ! -b $dev ] || [ ! -w $dev ]; then + echo "All $opt_name must refer to writable block devices" + exit 1 + fi + done +} + +parse_bluestore_wal_devs() { + local opt_name=$1 + shift + local devs=$1 + shift + local dev + IFS=',' read -r -a bluestore_wal_devs <<< "$devs" + for dev in "${bluestore_wal_devs[@]}"; do + if [ ! -b $dev ] || [ ! -w $dev ]; then + echo "All $opt_name must refer to writable block devices" + exit 1 + fi + done +} + +parse_secondary_devs() { + local opt_name=$1 + shift + local devs=$1 + shift + local dev + IFS=',' read -r -a secondary_block_devs <<< "$devs" + for dev in "${secondary_block_devs[@]}"; do + if [ ! -b $dev ] || [ ! -w $dev ]; then + echo "All $opt_name must refer to writable block devices" + exit 1 + fi + done +} + +crimson_smp=1 +while [ $# -ge 1 ]; do +case $1 in + -d | --debug) + debug=1 + ;; + -t | --trace) + trace=1 + ;; + -s | --standby_mds) + standby=1 + ;; + -l | --localhost) + ip="127.0.0.1" + ;; + -i) + [ -z "$2" ] && usage_exit + ip="$2" + shift + ;; + -e) + ec=1 + ;; + --new | -n) + new=1 + ;; + --inc-osd) + new=0 + kill_all=0 + inc_osd_num=$2 + if [ "$inc_osd_num" == "" ]; then + inc_osd_num=1 + else + shift + fi + ;; + --short) + short=1 + ;; + --crimson) + crimson=1 + ceph_osd=crimson-osd + nodaemon=1 + msgr=2 + ;; + --crimson-foreground) + crimson=1 + ceph_osd=crimson-osd + nodaemon=0 + msgr=2 + ;; + --osd-args) + extra_osd_args="$2" + shift + ;; + --msgr1) + msgr="1" + ;; + --msgr2) + msgr="2" + ;; + --msgr21) + msgr="21" + ;; + --cephadm) + cephadm=1 + ;; + --no-parallel) + parallel=false + ;; + --no-restart) + restart=0 + ;; + --valgrind) + [ -z "$2" ] && usage_exit + valgrind=$2 + shift + ;; + --valgrind_args) + valgrind_args="$2" + shift + ;; + --valgrind_mds) + [ -z "$2" ] && usage_exit + valgrind_mds=$2 + shift + ;; + --valgrind_osd) + [ -z "$2" ] && usage_exit + valgrind_osd=$2 + shift + ;; + --valgrind_mon) + [ -z "$2" ] && usage_exit + valgrind_mon=$2 + shift + ;; + --valgrind_mgr) + [ -z "$2" ] && usage_exit + valgrind_mgr=$2 + shift + ;; + --valgrind_rgw) + [ -z "$2" ] && usage_exit + valgrind_rgw=$2 + shift + ;; + --nodaemon) + nodaemon=1 + ;; + --redirect-output) + redirect=1 + ;; + --smallmds) + smallmds=1 + ;; + --rgw_port) + CEPH_RGW_PORT=$2 + shift + ;; + --rgw_frontend) + rgw_frontend=$2 + shift + ;; + --rgw_arrow_flight) + rgw_flight_frontend="yes" + ;; + --rgw_compression) + rgw_compression=$2 + shift + ;; + --kstore_path) + kstore_path=$2 + shift + ;; + -m) + [ -z "$2" ] && usage_exit + MON_ADDR=$2 + shift + ;; + -x) + cephx=1 # this is on be default, flag exists for historical consistency + ;; + -X) + cephx=0 + ;; + + -g | --gssapi) + gssapi_authx=1 + ;; + -G) + gssapi_authx=0 + ;; + + -k) + if [ ! -r $conf_fn ]; then + echo "cannot use old configuration: $conf_fn not readable." >&2 + exit + fi + new=0 + ;; + --memstore) + objectstore="memstore" + ;; + --cyanstore) + objectstore="cyanstore" + ;; + --seastore) + objectstore="seastore" + ;; + -b | --bluestore) + objectstore="bluestore" + ;; + -K | --kstore) + objectstore="kstore" + ;; + --hitset) + hitset="$hitset $2 $3" + shift + shift + ;; + -o) + extra_conf+=$'\n'"$2" + shift + ;; + --cache) + if [ -z "$cache" ]; then + cache="$2" + else + cache="$cache $2" + fi + shift + ;; + --nolockdep) + lockdep=0 + ;; + --multimds) + CEPH_MAX_MDS="$2" + shift + ;; + --without-dashboard) + with_mgr_dashboard=false + ;; + --with-restful) + with_mgr_restful=true + ;; + --seastore-devs) + parse_block_devs --seastore-devs "$2" + shift + ;; + --seastore-secondary-devs) + parse_secondary_devs --seastore-devs "$2" + shift + ;; + --seastore-secondary-devs-type) + secondary_block_devs_type="$2" + shift + ;; + --crimson-smp) + crimson_smp=$2 + shift + ;; + --bluestore-spdk) + [ -z "$2" ] && usage_exit + IFS=',' read -r -a bluestore_spdk_dev <<< "$2" + spdk_enabled=1 + shift + ;; + --bluestore-pmem) + [ -z "$2" ] && usage_exit + bluestore_pmem_file="$2" + pmem_enabled=1 + shift + ;; + --bluestore-devs) + parse_block_devs --bluestore-devs "$2" + shift + ;; + --bluestore-db-devs) + parse_bluestore_db_devs --bluestore-db-devs "$2" + shift + ;; + --bluestore-wal-devs) + parse_bluestore_wal_devs --bluestore-wal-devs "$2" + shift + ;; + --bluestore-zoned) + zoned_enabled=1 + ;; + --bluestore-io-uring) + io_uring_enabled=1 + shift + ;; + --jaeger) + with_jaeger=1 + echo "with_jaeger $with_jaeger" + ;; + *) + usage_exit +esac +shift +done + +if [ $kill_all -eq 1 ]; then + $SUDO $INIT_CEPH stop +fi + +if [ "$new" -eq 0 ]; then + if [ -z "$CEPH_ASOK_DIR" ]; then + CEPH_ASOK_DIR=`dirname $($CEPH_BIN/ceph-conf -c $conf_fn --show-config-value admin_socket)` + fi + mkdir -p $CEPH_ASOK_DIR + MON=`$CEPH_BIN/ceph-conf -c $conf_fn --name $VSTART_SEC --lookup num_mon 2>/dev/null` && \ + CEPH_NUM_MON="$MON" + OSD=`$CEPH_BIN/ceph-conf -c $conf_fn --name $VSTART_SEC --lookup num_osd 2>/dev/null` && \ + CEPH_NUM_OSD="$OSD" + MDS=`$CEPH_BIN/ceph-conf -c $conf_fn --name $VSTART_SEC --lookup num_mds 2>/dev/null` && \ + CEPH_NUM_MDS="$MDS" + MGR=`$CEPH_BIN/ceph-conf -c $conf_fn --name $VSTART_SEC --lookup num_mgr 2>/dev/null` && \ + CEPH_NUM_MGR="$MGR" + RGW=`$CEPH_BIN/ceph-conf -c $conf_fn --name $VSTART_SEC --lookup num_rgw 2>/dev/null` && \ + CEPH_NUM_RGW="$RGW" + NFS=`$CEPH_BIN/ceph-conf -c $conf_fn --name $VSTART_SEC --lookup num_ganesha 2>/dev/null` && \ + GANESHA_DAEMON_NUM="$NFS" +else + # only delete if -n + if [ -e "$conf_fn" ]; then + asok_dir=`dirname $($CEPH_BIN/ceph-conf -c $conf_fn --show-config-value admin_socket)` + rm -- "$conf_fn" + if [ $asok_dir != /var/run/ceph ]; then + [ -d $asok_dir ] && rm -f $asok_dir/* && rmdir $asok_dir + fi + fi + if [ -z "$CEPH_ASOK_DIR" ]; then + CEPH_ASOK_DIR=`mktemp -u -d "${TMPDIR:-/tmp}/ceph-asok.XXXXXX"` + fi +fi + +ARGS="-c $conf_fn" + +run() { + type=$1 + shift + num=$1 + shift + eval "valg=\$valgrind_$type" + [ -z "$valg" ] && valg="$valgrind" + + if [ -n "$valg" ]; then + prunb valgrind --tool="$valg" $valgrind_args "$@" -f + sleep 1 + else + if [ "$nodaemon" -eq 0 ]; then + prun "$@" + else + if [ "$restart" -eq 0 ]; then + set -- '--no-restart' "$@" + fi + if [ "$redirect" -eq 0 ]; then + prunb ${CEPH_ROOT}/src/ceph-run "$@" -f + else + ( prunb ${CEPH_ROOT}/src/ceph-run "$@" -f ) >$CEPH_OUT_DIR/$type.$num.stdout 2>&1 + fi + fi + fi +} + +wconf() { + if [ "$new" -eq 1 -o "$overwrite_conf" -eq 1 ]; then + cat >> "$conf_fn" + fi +} + + +do_rgw_conf() { + + if [ $CEPH_NUM_RGW -eq 0 ]; then + return 0 + fi + + # setup each rgw on a sequential port, starting at $CEPH_RGW_PORT. + # individual rgw's ids will be their ports. + current_port=$CEPH_RGW_PORT + # allow only first rgw to start arrow_flight server/port + local flight_conf=$rgw_flight_frontend + for n in $(seq 1 $CEPH_NUM_RGW); do + wconf << EOF +[client.rgw.${current_port}] + rgw frontends = $rgw_frontend port=${current_port}${flight_conf:+,arrow_flight} + admin socket = ${CEPH_OUT_DIR}/radosgw.${current_port}.asok + debug rgw_flight = 20 + rgw keystone accepted admin roles = admin + rgw keystone accepted roles = admin,Member + rgw keystone admin domain = Default + rgw keystone admin password = ADMIN + rgw keystone admin project = admin + rgw keystone admin user = admin + rgw keystone api version = 3 + rgw keystone implicit tenants = true + rgw swift account in url = true + rgw swift enforce content length = true + rgw swift versioning enabled = true +EOF + current_port=$((current_port + 1)) + unset flight_conf +done + +} + +format_conf() { + local opts=$1 + local indent=" " + local opt + local formatted + while read -r opt; do + if [ -z "$formatted" ]; then + formatted="${opt}" + else + formatted+=$'\n'${indent}${opt} + fi + done <<< "$opts" + echo "$formatted" +} + +prepare_conf() { + local DAEMONOPTS=" + log file = $CEPH_OUT_DIR/\$name.log + admin socket = $CEPH_ASOK_DIR/\$name.asok + chdir = \"\" + pid file = $CEPH_OUT_DIR/\$name.pid + heartbeat file = $CEPH_OUT_DIR/\$name.heartbeat +" + + local mgr_modules="iostat nfs" + if $with_mgr_dashboard; then + mgr_modules+=" dashboard" + fi + if $with_mgr_restful; then + mgr_modules+=" restful" + fi + + local msgr_conf='' + if [ $msgr -eq 21 ]; then + msgr_conf="ms bind msgr2 = true + ms bind msgr1 = true" + fi + if [ $msgr -eq 2 ]; then + msgr_conf="ms bind msgr2 = true + ms bind msgr1 = false" + fi + if [ $msgr -eq 1 ]; then + msgr_conf="ms bind msgr2 = false + ms bind msgr1 = true" + fi + + wconf < $logrotate_conf_path + fi +} + +start_mon() { + local MONS="" + local count=0 + for f in a b c d e f g h i j k l m n o p q r s t u v w x y z + do + [ $count -eq $CEPH_NUM_MON ] && break; + count=$(($count + 1)) + if [ -z "$MONS" ]; then + MONS="$f" + else + MONS="$MONS $f" + fi + done + + if [ "$new" -eq 1 ]; then + if [ `echo $IP | grep '^127\\.'` ]; then + echo + echo "NOTE: hostname resolves to loopback; remote hosts will not be able to" + echo " connect. either adjust /etc/hosts, or edit this script to use your" + echo " machine's real IP." + echo + fi + + prun $SUDO "$CEPH_BIN/ceph-authtool" --create-keyring --gen-key --name=mon. "$keyring_fn" --cap mon 'allow *' + prun $SUDO "$CEPH_BIN/ceph-authtool" --gen-key --name=client.admin \ + --cap mon 'allow *' \ + --cap osd 'allow *' \ + --cap mds 'allow *' \ + --cap mgr 'allow *' \ + "$keyring_fn" + + # build a fresh fs monmap, mon fs + local params=() + local count=0 + local mon_host="" + for f in $MONS + do + if [ $msgr -eq 1 ]; then + A="v1:$IP:$(($CEPH_PORT+$count+1))" + fi + if [ $msgr -eq 2 ]; then + A="v2:$IP:$(($CEPH_PORT+$count+1))" + fi + if [ $msgr -eq 21 ]; then + A="[v2:$IP:$(($CEPH_PORT+$count)),v1:$IP:$(($CEPH_PORT+$count+1))]" + fi + params+=("--addv" "$f" "$A") + mon_host="$mon_host $A" + wconf < /dev/null; then + for f in $CEPH_DEV_DIR/osd$osd/*; do btrfs sub delete $f &> /dev/null || true; done + fi + if [ -n "$kstore_path" ]; then + ln -s $kstore_path $CEPH_DEV_DIR/osd$osd + else + mkdir -p $CEPH_DEV_DIR/osd$osd + if [ -n "${block_devs[$osd]}" ]; then + dd if=/dev/zero of=${block_devs[$osd]} bs=1M count=1 + ln -s ${block_devs[$osd]} $CEPH_DEV_DIR/osd$osd/block + fi + if [ -n "${bluestore_db_devs[$osd]}" ]; then + dd if=/dev/zero of=${bluestore_db_devs[$osd]} bs=1M count=1 + ln -s ${bluestore_db_devs[$osd]} $CEPH_DEV_DIR/osd$osd/block.db + fi + if [ -n "${bluestore_wal_devs[$osd]}" ]; then + dd if=/dev/zero of=${bluestore_wal_devs[$osd]} bs=1M count=1 + ln -s ${bluestore_wal_devs[$osd]} $CEPH_DEV_DIR/osd$osd/block.wal + fi + if [ -n "${secondary_block_devs[$osd]}" ]; then + dd if=/dev/zero of=${secondary_block_devs[$osd]} bs=1M count=1 + mkdir -p $CEPH_DEV_DIR/osd$osd/block.${secondary_block_devs_type}.1 + ln -s ${secondary_block_devs[$osd]} $CEPH_DEV_DIR/osd$osd/block.${secondary_block_devs_type}.1/block + fi + fi + if [ "$objectstore" == "bluestore" ]; then + wconf < $CEPH_DEV_DIR/osd$osd/new.json + ceph_adm osd new $uuid -i $CEPH_DEV_DIR/osd$osd/new.json + rm $CEPH_DEV_DIR/osd$osd/new.json + prun $SUDO $CEPH_BIN/$ceph_osd $extra_osd_args -i $osd $ARGS --mkfs --key $OSD_SECRET --osd-uuid $uuid $extra_seastar_args \ + 2>&1 | tee $CEPH_OUT_DIR/osd-mkfs.$osd.log + + local key_fn=$CEPH_DEV_DIR/osd$osd/keyring + cat > $key_fn< /dev/null; then + secret_file=`mktemp` + ceph_adm restful create-key admin -o $secret_file + RESTFUL_SECRET=`cat $secret_file` + rm $secret_file + else + debug echo MGR Restful is not working, perhaps the package is not installed? + fi +} + +start_mgr() { + local mgr=0 + local ssl=${DASHBOARD_SSL:-1} + # avoid monitors on nearby ports (which test/*.sh use extensively) + MGR_PORT=$(($CEPH_PORT + 1000)) + PROMETHEUS_PORT=9283 + for name in x y z a b c d e f g h i j k l m n o p + do + [ $mgr -eq $CEPH_NUM_MGR ] && break + mgr=$(($mgr + 1)) + if [ "$new" -eq 1 ]; then + mkdir -p $CEPH_DEV_DIR/mgr.$name + key_fn=$CEPH_DEV_DIR/mgr.$name/keyring + $SUDO $CEPH_BIN/ceph-authtool --create-keyring --gen-key --name=mgr.$name $key_fn + ceph_adm -i $key_fn auth add mgr.$name mon 'allow profile mgr' mds 'allow *' osd 'allow *' + + wconf < "${DASHBOARD_ADMIN_SECRET_FILE}" + ceph_adm dashboard ac-user-create admin -i "${DASHBOARD_ADMIN_SECRET_FILE}" \ + administrator --force-password + if [ "$ssl" != "0" ]; then + if ! ceph_adm dashboard create-self-signed-cert; then + debug echo dashboard module not working correctly! + fi + fi + fi + if $with_mgr_restful; then + create_mgr_restful_secret + fi + fi + + if [ "$cephadm" -eq 1 ]; then + debug echo Enabling cephadm orchestrator + if [ "$new" -eq 1 ]; then + digest=$(curl -s \ + https://hub.docker.com/v2/repositories/ceph/daemon-base/tags/latest-master-devel \ + | jq -r '.images[0].digest') + ceph_adm config set global container_image "docker.io/ceph/daemon-base@$digest" + fi + ceph_adm config-key set mgr/cephadm/ssh_identity_key -i ~/.ssh/id_rsa + ceph_adm config-key set mgr/cephadm/ssh_identity_pub -i ~/.ssh/id_rsa.pub + ceph_adm mgr module enable cephadm + ceph_adm orch set backend cephadm + ceph_adm orch host add "$(hostname)" + ceph_adm orch apply crash '*' + ceph_adm config set mgr mgr/cephadm/allow_ptrace true + fi +} + +start_mds() { + local mds=0 + for name in a b c d e f g h i j k l m n o p + do + [ $mds -eq $CEPH_NUM_MDS ] && break + mds=$(($mds + 1)) + + if [ "$new" -eq 1 ]; then + prun mkdir -p "$CEPH_DEV_DIR/mds.$name" + key_fn=$CEPH_DEV_DIR/mds.$name/keyring + wconf < m #--debug_ms 20 + #$CEPH_BIN/ceph-mds -d $ARGS --mds_thrash_fragments 0 --mds_thrash_exports 0 #--debug_ms 20 + #ceph_adm mds set max_mds 2 + done + + if [ $new -eq 1 ]; then + if [ "$CEPH_NUM_FS" -gt "0" ] ; then + sleep 5 # time for MDS to come up as standby to avoid health warnings on fs creation + if [ "$CEPH_NUM_FS" -gt "1" ] ; then + ceph_adm fs flag set enable_multiple true --yes-i-really-mean-it + fi + + # wait for volume module to load + while ! ceph_adm fs volume ls ; do sleep 1 ; done + local fs=0 + for name in a b c d e f g h i j k l m n o p + do + ceph_adm fs volume create ${name} + ceph_adm fs authorize ${name} "client.fs_${name}" / rwp >> "$keyring_fn" + fs=$(($fs + 1)) + [ $fs -eq $CEPH_NUM_FS ] && break + done + fi + fi + +} + +# Ganesha Daemons requires nfs-ganesha nfs-ganesha-ceph nfs-ganesha-rados-grace +# nfs-ganesha-rados-urls (version 3.3 and above) packages installed. On +# Fedora>=31 these packages can be installed directly with 'dnf'. For CentOS>=8 +# the packages are available at +# https://wiki.centos.org/SpecialInterestGroup/Storage +# Similarly for Ubuntu>=16.04 follow the instructions on +# https://launchpad.net/~nfs-ganesha + +start_ganesha() { + cluster_id="vstart" + GANESHA_PORT=$(($CEPH_PORT + 4000)) + local ganesha=0 + test_user="$cluster_id" + pool_name=".nfs" + namespace=$cluster_id + url="rados://$pool_name/$namespace/conf-nfs.$test_user" + + prun ceph_adm auth get-or-create client.$test_user \ + mon "allow r" \ + osd "allow rw pool=$pool_name namespace=$namespace, allow rw tag cephfs data=a" \ + mds "allow rw path=/" \ + >> "$keyring_fn" + + ceph_adm mgr module enable test_orchestrator + ceph_adm orch set backend test_orchestrator + ceph_adm test_orchestrator load_data -i $CEPH_ROOT/src/pybind/mgr/test_orchestrator/dummy_data.json + prun ceph_adm nfs cluster create $cluster_id + prun ceph_adm nfs export create cephfs --fsname "a" --cluster-id $cluster_id --pseudo-path "/cephfs" + + for name in a b c d e f g h i j k l m n o p + do + [ $ganesha -eq $GANESHA_DAEMON_NUM ] && break + + port=$(($GANESHA_PORT + ganesha)) + ganesha=$(($ganesha + 1)) + ganesha_dir="$CEPH_DEV_DIR/ganesha.$name" + prun rm -rf $ganesha_dir + prun mkdir -p $ganesha_dir + + echo "NFS_CORE_PARAM { + Enable_NLM = false; + Enable_RQUOTA = false; + Protocols = 4; + NFS_Port = $port; + } + + MDCACHE { + Dir_Chunk = 0; + } + + NFSv4 { + RecoveryBackend = rados_cluster; + Minor_Versions = 1, 2; + } + + RADOS_KV { + pool = '$pool_name'; + namespace = $namespace; + UserId = $test_user; + nodeid = $name; + } + + RADOS_URLS { + Userid = $test_user; + watch_url = '$url'; + } + + %url $url" > "$ganesha_dir/ganesha-$name.conf" + wconf <.+:${CEPH_PORT}\s+" 1>/dev/null 2>&1 || break + done +fi + +[ -z "$INIT_CEPH" ] && INIT_CEPH=$CEPH_BIN/init-ceph + +# sudo if btrfs +[ -d $CEPH_DEV_DIR/osd0/. ] && [ -e $CEPH_DEV_DIR/sudo ] && SUDO="sudo" + +if [ $inc_osd_num -eq 0 ]; then + prun $SUDO rm -f core* +fi + +[ -d $CEPH_ASOK_DIR ] || mkdir -p $CEPH_ASOK_DIR +[ -d $CEPH_OUT_DIR ] || mkdir -p $CEPH_OUT_DIR +[ -d $CEPH_DEV_DIR ] || mkdir -p $CEPH_DEV_DIR +[ -d $CEPH_OUT_CLIENT_DIR ] || mkdir -p $CEPH_OUT_CLIENT_DIR +if [ $inc_osd_num -eq 0 ]; then + $SUDO find "$CEPH_OUT_DIR" -type f -delete +fi +[ -d gmon ] && $SUDO rm -rf gmon/* + +[ "$cephx" -eq 1 ] && [ "$new" -eq 1 ] && [ -e $keyring_fn ] && rm $keyring_fn + + +# figure machine's ip +HOSTNAME=`hostname -s` +if [ -n "$ip" ]; then + IP="$ip" +else + echo hostname $HOSTNAME + if [ -x "$(which ip 2>/dev/null)" ]; then + IP_CMD="ip addr" + else + IP_CMD="ifconfig" + fi + # filter out IPv4 and localhost addresses + IP="$($IP_CMD | sed -En 's/127.0.0.1//;s/.*inet (addr:)?(([0-9]*\.){3}[0-9]*).*/\2/p' | head -n1)" + # if nothing left, try using localhost address, it might work + if [ -z "$IP" ]; then IP="127.0.0.1"; fi +fi +echo "ip $IP" +echo "port $CEPH_PORT" + + +[ -z $CEPH_ADM ] && CEPH_ADM=$CEPH_BIN/ceph + +ceph_adm() { + if [ "$cephx" -eq 1 ]; then + prun $SUDO "$CEPH_ADM" -c "$conf_fn" -k "$keyring_fn" "$@" + else + prun $SUDO "$CEPH_ADM" -c "$conf_fn" "$@" + fi +} + +if [ $inc_osd_num -gt 0 ]; then + start_osd + exit +fi + +if [ "$new" -eq 1 ]; then + prepare_conf +fi + +if [ $CEPH_NUM_MON -gt 0 ]; then + start_mon + + debug echo Populating config ... + cat <> "$keyring_fn" +fi + +# Don't set max_mds until all the daemons are started, otherwise +# the intended standbys might end up in active roles. +if [ "$CEPH_MAX_MDS" -gt 1 ]; then + sleep 5 # wait for daemons to make it into FSMap before increasing max_mds +fi +fs=0 +for name in a b c d e f g h i j k l m n o p +do + [ $fs -eq $CEPH_NUM_FS ] && break + fs=$(($fs + 1)) + if [ "$CEPH_MAX_MDS" -gt 1 ]; then + ceph_adm fs set "${name}" max_mds "$CEPH_MAX_MDS" + fi +done + +# mgr + +if [ "$ec" -eq 1 ]; then + ceph_adm < "$CEPH_OUT_DIR/$rgw_python_file" + prun python $CEPH_OUT_DIR/$rgw_python_file +} + +do_rgw_create_users() +{ + # Create S3 user + s3_akey='0555b35654ad1656d804' + s3_skey='h7GhxuBLTrlhVUyxSPUKUV8r/2EI4ngqJxD7iBdBYLhwluN30JaT3Q==' + debug echo "setting up user testid" + $CEPH_BIN/radosgw-admin user create --uid testid --access-key $s3_akey --secret $s3_skey --display-name 'M. Tester' --email tester@ceph.com -c $conf_fn > /dev/null + + # Create S3-test users + # See: https://github.com/ceph/s3-tests + debug echo "setting up s3-test users" + $CEPH_BIN/radosgw-admin user create \ + --uid 0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef \ + --access-key ABCDEFGHIJKLMNOPQRST \ + --secret abcdefghijklmnopqrstuvwxyzabcdefghijklmn \ + --display-name youruseridhere \ + --email s3@example.com --caps="user-policy=*" -c $conf_fn > /dev/null + $CEPH_BIN/radosgw-admin user create \ + --uid 56789abcdef0123456789abcdef0123456789abcdef0123456789abcdef01234 \ + --access-key NOPQRSTUVWXYZABCDEFG \ + --secret nopqrstuvwxyzabcdefghijklmnabcdefghijklm \ + --display-name john.doe \ + --email john.doe@example.com -c $conf_fn > /dev/null + $CEPH_BIN/radosgw-admin user create \ + --tenant testx \ + --uid 9876543210abcdef0123456789abcdef0123456789abcdef0123456789abcdef \ + --access-key HIJKLMNOPQRSTUVWXYZA \ + --secret opqrstuvwxyzabcdefghijklmnopqrstuvwxyzab \ + --display-name tenanteduser \ + --email tenanteduser@example.com -c $conf_fn > /dev/null + + # Create Swift user + debug echo "setting up user tester" + $CEPH_BIN/radosgw-admin user create -c $conf_fn --subuser=test:tester --display-name=Tester-Subuser --key-type=swift --secret=testing --access=full > /dev/null + + echo "" + echo "S3 User Info:" + echo " access key: $s3_akey" + echo " secret key: $s3_skey" + echo "" + echo "Swift User Info:" + echo " account : test" + echo " user : tester" + echo " password : testing" + echo "" +} + +do_rgw() +{ + if [ "$new" -eq 1 ]; then + do_rgw_create_users + if [ -n "$rgw_compression" ]; then + debug echo "setting compression type=$rgw_compression" + $CEPH_BIN/radosgw-admin zone placement modify -c $conf_fn --rgw-zone=default --placement-id=default-placement --compression=$rgw_compression > /dev/null + fi + fi + + if [ -n "$rgw_flight_frontend" ] ;then + debug echo "starting arrow_flight frontend on first rgw" + fi + + # Start server + if [ "$cephadm" -gt 0 ]; then + ceph_adm orch apply rgw rgwTest + return + fi + + RGWDEBUG="" + if [ "$debug" -ne 0 ]; then + RGWDEBUG="--debug-rgw=20 --debug-ms=1" + fi + + local CEPH_RGW_PORT_NUM="${CEPH_RGW_PORT}" + local CEPH_RGW_HTTPS="${CEPH_RGW_PORT: -1}" + if [[ "${CEPH_RGW_HTTPS}" = "s" ]]; then + CEPH_RGW_PORT_NUM="${CEPH_RGW_PORT::-1}" + else + CEPH_RGW_HTTPS="" + fi + RGWSUDO= + [ $CEPH_RGW_PORT_NUM -lt 1024 ] && RGWSUDO=sudo + + current_port=$CEPH_RGW_PORT + # allow only first rgw to start arrow_flight server/port + local flight_conf=$rgw_flight_frontend + for n in $(seq 1 $CEPH_NUM_RGW); do + rgw_name="client.rgw.${current_port}" + + ceph_adm auth get-or-create $rgw_name \ + mon 'allow rw' \ + osd 'allow rwx' \ + mgr 'allow rw' \ + >> "$keyring_fn" + + debug echo start rgw on http${CEPH_RGW_HTTPS}://localhost:${current_port} + run 'rgw' $current_port $RGWSUDO $CEPH_BIN/radosgw -c $conf_fn \ + --log-file=${CEPH_OUT_DIR}/radosgw.${current_port}.log \ + --admin-socket=${CEPH_OUT_DIR}/radosgw.${current_port}.asok \ + --pid-file=${CEPH_OUT_DIR}/radosgw.${current_port}.pid \ + --rgw_luarocks_location=${CEPH_OUT_DIR}/luarocks \ + --rgw_keystone_url=http://localhost:5000 \ + ${RGWDEBUG} \ + -n ${rgw_name} \ + "--rgw_frontends=${rgw_frontend} port=${current_port}${CEPH_RGW_HTTPS}${flight_conf:+,arrow_flight}" + + i=$(($i + 1)) + [ $i -eq $CEPH_NUM_RGW ] && break + + current_port=$((current_port+1)) + unset flight_conf + done +} +if [ "$CEPH_NUM_RGW" -gt 0 ]; then + do_rgw +fi + +# Ganesha Daemons +if [ $GANESHA_DAEMON_NUM -gt 0 ]; then + pseudo_path="/cephfs" + if [ "$cephadm" -gt 0 ]; then + cluster_id="vstart" + port="2049" + prun ceph_adm nfs cluster create $cluster_id + if [ $CEPH_NUM_MDS -gt 0 ]; then + prun ceph_adm nfs export create cephfs --fsname "a" --cluster-id $cluster_id --pseudo-path $pseudo_path + echo "Mount using: mount -t nfs -o port=$port $IP:$pseudo_path mountpoint" + fi + if [ "$CEPH_NUM_RGW" -gt 0 ]; then + pseudo_path="/rgw" + do_rgw_create_bucket + prun ceph_adm nfs export create rgw --cluster-id $cluster_id --pseudo-path $pseudo_path --bucket "nfs-bucket" + echo "Mount using: mount -t nfs -o port=$port $IP:$pseudo_path mountpoint" + fi + else + start_ganesha + echo "Mount using: mount -t nfs -o port= $IP:$pseudo_path mountpoint" + fi +fi + +docker_service(){ + local service='' + #prefer podman + if command -v podman > /dev/null; then + service="podman" + elif pgrep -f docker > /dev/null; then + service="docker" + fi + if [ -n "$service" ]; then + echo "using $service for deploying jaeger..." + #check for exited container, remove them and restart container + if [ "$($service ps -aq -f status=exited -f name=jaeger)" ]; then + $service rm jaeger + fi + if [ ! "$(podman ps -aq -f name=jaeger)" ]; then + $service "$@" + fi + else + echo "cannot find docker or podman, please restart service and rerun." + fi +} + +echo "" +if [ $with_jaeger -eq 1 ]; then + debug echo "Enabling jaegertracing..." + docker_service run -d --name jaeger \ + -p 5775:5775/udp \ + -p 6831:6831/udp \ + -p 6832:6832/udp \ + -p 5778:5778 \ + -p 16686:16686 \ + -p 14268:14268 \ + -p 14250:14250 \ + quay.io/jaegertracing/all-in-one +fi + +debug echo "vstart cluster complete. Use stop.sh to stop. See out/* (e.g. 'tail -f out/????') for debug output." + +echo "" +if [ "$new" -eq 1 ]; then + if $with_mgr_dashboard; then + cat < $CEPH_DIR/vstart_environment.sh +{ + echo "export PYTHONPATH=$PYBIND:$CYTHON_PYTHONPATH:$CEPH_PYTHON_COMMON\$PYTHONPATH" + echo "export LD_LIBRARY_PATH=$CEPH_LIB:\$LD_LIBRARY_PATH" + echo "export PATH=$CEPH_DIR/bin:\$PATH" + echo "export CEPH_CONF=$conf_fn" + # We cannot set CEPH_KEYRING if this is sourced by vstart_runner.py (API tests) + if [ "$CEPH_DIR" != "$PWD" ]; then + echo "export CEPH_KEYRING=$keyring_fn" + fi + + if [ -n "$CEPHFS_SHELL" ]; then + echo "alias cephfs-shell=$CEPHFS_SHELL" + fi +} | tee -a $CEPH_DIR/vstart_environment.sh + +echo "CEPH_DEV=1" + +# always keep this section at the very bottom of this file +STRAY_CONF_PATH="/etc/ceph/ceph.conf" +if [ -f "$STRAY_CONF_PATH" -a -n "$conf_fn" -a ! "$conf_fn" -ef "$STRAY_CONF_PATH" ]; then + echo "" + echo "" + echo "WARNING:" + echo " Please remove stray $STRAY_CONF_PATH if not needed." + echo " Your conf files $conf_fn and $STRAY_CONF_PATH may not be in sync" + echo " and may lead to undesired results." + echo "" + echo "NOTE:" + echo " Remember to restart cluster after removing $STRAY_CONF_PATH" +fi + +init_logrotate diff --git a/src/test/test_perf_counters_cache.cc b/src/test/test_perf_counters_cache.cc index 16d92bd7d431f..8867896c02f29 100644 --- a/src/test/test_perf_counters_cache.cc +++ b/src/test/test_perf_counters_cache.cc @@ -960,13 +960,31 @@ TEST(PerfCountersCache, TestLabelStrings) { // test empty val in a label pair will get the label pair added into the perf counters cache but empty key will not std::string label2 = key_create("bad_ctrs1", {{"label3", "val4"}, {"label1", ""}}); - EXPECT_DEATH(pcc->set_counter(label2, TEST_PERFCOUNTERS_COUNTER, 2), ""); + //EXPECT_DEATH(pcc->set_counter(label2, TEST_PERFCOUNTERS_COUNTER, 2), ""); + pcc->set_counter(label2, TEST_PERFCOUNTERS_COUNTER, 2); std::string label3 = key_create("bad_ctrs2", {{"", "val4"}, {"label1", "val1"}}); EXPECT_DEATH(pcc->set_counter(label3, TEST_PERFCOUNTERS_COUNTER, 2), ""); ASSERT_EQ("", client.do_request(R"({ "prefix": "counter dump", "format": "raw" })", &message)); ASSERT_EQ(R"({ + "bad_ctrs1": [ + { + "labels": { + "label1": "", + "label3": "val4" + }, + "counters": { + "test_counter": 2, + "test_time": 0.000000000, + "test_time_avg": { + "avgcount": 0, + "sum": 0.000000000, + "avgtime": 0.000000000 + } + } + } + ], "good_ctrs": [ { "labels": { @@ -990,6 +1008,23 @@ TEST(PerfCountersCache, TestLabelStrings) { // test empty keys in each of the label pairs will not get the label added into the perf counters cache ASSERT_EQ("", client.do_request(R"({ "prefix": "counter dump", "format": "raw" })", &message)); ASSERT_EQ(R"({ + "bad_ctrs1": [ + { + "labels": { + "label1": "", + "label3": "val4" + }, + "counters": { + "test_counter": 2, + "test_time": 0.000000000, + "test_time_avg": { + "avgcount": 0, + "sum": 0.000000000, + "avgtime": 0.000000000 + } + } + } + ], "good_ctrs": [ { "labels": { @@ -1023,6 +1058,23 @@ TEST(PerfCountersCache, TestLabelStrings) { ASSERT_EQ("", client.do_request(R"({ "prefix": "counter dump", "format": "raw" })", &message)); ASSERT_EQ(R"({ + "bad_ctrs1": [ + { + "labels": { + "label1": "", + "label3": "val4" + }, + "counters": { + "test_counter": 2, + "test_time": 0.000000000, + "test_time_avg": { + "avgcount": 0, + "sum": 0.000000000, + "avgtime": 0.000000000 + } + } + } + ], "good_ctrs": [ { "labels": { -- 2.39.5