From 429967917b4f9b5f71214fa3c7718e780728249b Mon Sep 17 00:00:00 2001 From: kchheda3 Date: Wed, 6 Mar 2024 18:13:57 -0500 Subject: [PATCH] rgw/notification: add rgw notification specific debug log subsystem. decorate the events with event details while logging. Signed-off-by: kchheda3 --- doc/rados/troubleshooting/log-and-debug.rst | 2 + .../perf/settings/optimized.yaml | 1 + qa/suites/perf-basic/settings/optimized.yaml | 1 + qa/suites/rados/perf/settings/optimized.yaml | 1 + qa/suites/rgw/multisite/overrides.yaml | 1 + src/common/subsys.h | 1 + src/rgw/driver/rados/rgw_notify.cc | 83 +++++++++++-------- src/rgw/rgw_amqp.cc | 2 +- src/rgw/rgw_kafka.cc | 2 +- 9 files changed, 58 insertions(+), 36 deletions(-) diff --git a/doc/rados/troubleshooting/log-and-debug.rst b/doc/rados/troubleshooting/log-and-debug.rst index a553269cb8e3e..56bf448432fad 100644 --- a/doc/rados/troubleshooting/log-and-debug.rst +++ b/doc/rados/troubleshooting/log-and-debug.rst @@ -302,6 +302,8 @@ to their default level or to a level suitable for normal operations. +--------------------------+-----------+--------------+ | ``rgw lifecycle`` | 1 | 5 | +--------------------------+-----------+--------------+ +| ``rgw notification`` | 1 | 5 | ++--------------------------+-----------+--------------+ | ``javaclient`` | 1 | 5 | +--------------------------+-----------+--------------+ | ``asok`` | 1 | 5 | diff --git a/qa/suites/crimson-rados/perf/settings/optimized.yaml b/qa/suites/crimson-rados/perf/settings/optimized.yaml index deefc61d3d83b..65a8c7f56ea31 100644 --- a/qa/suites/crimson-rados/perf/settings/optimized.yaml +++ b/qa/suites/crimson-rados/perf/settings/optimized.yaml @@ -55,6 +55,7 @@ overrides: debug rgw: "0/0" debug rgw sync: "0/0" debug rgw lifecycle: "0/0" + debug rgw notification: "0/0" debug civetweb: "0/0" debug javaclient: "0/0" debug asok: "0/0" diff --git a/qa/suites/perf-basic/settings/optimized.yaml b/qa/suites/perf-basic/settings/optimized.yaml index 3983ed9d26e63..bba1ddcf7dd8a 100644 --- a/qa/suites/perf-basic/settings/optimized.yaml +++ b/qa/suites/perf-basic/settings/optimized.yaml @@ -59,6 +59,7 @@ overrides: debug rgw: "0/0" debug rgw sync: "0/0" debug rgw lifecycle: "0/0" + debug rgw notification: "0/0" debug civetweb: "0/0" debug javaclient: "0/0" debug asok: "0/0" diff --git a/qa/suites/rados/perf/settings/optimized.yaml b/qa/suites/rados/perf/settings/optimized.yaml index 8f05f59163ac5..46351c0d578aa 100644 --- a/qa/suites/rados/perf/settings/optimized.yaml +++ b/qa/suites/rados/perf/settings/optimized.yaml @@ -55,6 +55,7 @@ overrides: debug rgw: "0/0" debug rgw sync: "0/0" debug rgw lifecycle: "0/0" + debug rgw notification: "0/0" debug civetweb: "0/0" debug javaclient: "0/0" debug asok: "0/0" diff --git a/qa/suites/rgw/multisite/overrides.yaml b/qa/suites/rgw/multisite/overrides.yaml index 8f0d376f7d826..13b498d167f60 100644 --- a/qa/suites/rgw/multisite/overrides.yaml +++ b/qa/suites/rgw/multisite/overrides.yaml @@ -7,6 +7,7 @@ overrides: debug rgw: 20 debug rgw sync: 20 debug rgw lifecycle: 20 + debug rgw notification: 20 rgw crypt s3 kms backend: testing rgw crypt s3 kms encryption keys: testkey-1=YmluCmJvb3N0CmJvb3N0LWJ1aWxkCmNlcGguY29uZgo= rgw crypt require ssl: false diff --git a/src/common/subsys.h b/src/common/subsys.h index d52018c880dd3..6022c1fcaf1f4 100644 --- a/src/common/subsys.h +++ b/src/common/subsys.h @@ -65,6 +65,7 @@ SUBSYS(rgw_access, 1, 5) SUBSYS(rgw_dbstore, 1, 5) SUBSYS(rgw_flight, 1, 5) SUBSYS(rgw_lifecycle, 1, 5) +SUBSYS(rgw_notification, 1, 5) SUBSYS(javaclient, 1, 5) SUBSYS(asok, 1, 5) SUBSYS(throttle, 1, 1) diff --git a/src/rgw/driver/rados/rgw_notify.cc b/src/rgw/driver/rados/rgw_notify.cc index 9baed182ed946..4be0524587e07 100644 --- a/src/rgw/driver/rados/rgw_notify.cc +++ b/src/rgw/driver/rados/rgw_notify.cc @@ -18,7 +18,7 @@ #include "common/dout.h" #include -#define dout_subsys ceph_subsys_rgw +#define dout_subsys ceph_subsys_rgw_notification namespace rgw::notify { @@ -66,6 +66,16 @@ struct event_entry_t { }; WRITE_CLASS_ENCODER(event_entry_t) +static inline std::ostream& operator<<(std::ostream& out, + const event_entry_t& e) { + return out << "notification id: '" << e.event.configurationId + << "', topic: '" << e.arn_topic + << "', endpoint: '" << e.push_endpoint + << "', bucket_owner: '" << e.event.bucket_ownerIdentity + << "', bucket: '" << e.event.bucket_name + << "', object: '" << e.event.object_key + << "', event type: '" << e.event.eventName << "'"; +} struct persistency_tracker { ceph::coarse_real_time last_retry_time {ceph::coarse_real_clock::zero()}; @@ -244,15 +254,12 @@ private: if ( (topic_persistency_ttl != 0 && event_entry.creation_time != ceph::coarse_real_clock::zero() && time_now - event_entry.creation_time > std::chrono::seconds(topic_persistency_ttl)) || ( topic_persistency_max_retries != 0 && entry_persistency_tracker.retires_num > topic_persistency_max_retries) ) { - ldpp_dout(this, 1) << "Expiring entry for topic= " - << event_entry.arn_topic << " bucket_owner= " - << event_entry.event.bucket_ownerIdentity - << " bucket= " << event_entry.event.bucket_name - << " object_name= " << event_entry.event.object_key - << " entry retry_number=" + ldpp_dout(this, 1) << "WARNING: Expiring entry marker: " << entry.marker + << " for event with " << event_entry + << " entry retry_number: " << entry_persistency_tracker.retires_num - << " creation_time=" << event_entry.creation_time - << " time_now=" << time_now << dendl; + << " creation_time: " << event_entry.creation_time + << " time_now: " << time_now << dendl; return EntryProcessingResult::Expired; } if (time_now - entry_persistency_tracker.last_retry_time < std::chrono::seconds(topic_persistency_sleep_duration) ) { @@ -261,7 +268,10 @@ private: ++entry_persistency_tracker.retires_num; entry_persistency_tracker.last_retry_time = time_now; - ldpp_dout(this, 20) << "Processing entry retry_number=" << entry_persistency_tracker.retires_num << " time=" << dendl; + ldpp_dout(this, 20) << "Processing event entry with " << event_entry + << " retry_number: " + << entry_persistency_tracker.retires_num + << " current time: " << time_now << dendl; try { // TODO move endpoint creation to queue level const auto push_endpoint = RGWPubSubEndpoint::create(event_entry.push_endpoint, event_entry.arn_topic, @@ -271,12 +281,14 @@ private: " for entry: " << entry.marker << dendl; const auto ret = push_endpoint->send_to_completion_async(cct, event_entry.event, optional_yield(io_context, yield)); if (ret < 0) { - ldpp_dout(this, 5) << "WARNING: push entry: " << entry.marker << " to endpoint: " << event_entry.push_endpoint - << " failed. error: " << ret << " (will retry)" << dendl; + ldpp_dout(this, 5) << "WARNING: push entry marker: " << entry.marker + << " failed. error: " << ret + << " (will retry) for event with " << event_entry + << dendl; return EntryProcessingResult::Failure; } else { - ldpp_dout(this, 20) << "INFO: push entry: " << entry.marker << " to endpoint: " << event_entry.push_endpoint - << " ok" << dendl; + ldpp_dout(this, 5) << "INFO: push entry marker: " << entry.marker + << " ok for event with " << event_entry << dendl; if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_ok); return EntryProcessingResult::Successful; } @@ -304,12 +316,15 @@ private: auto ret = rgw_rados_operate(this, rados_store.getRados()->get_notif_pool_ctx(), queue_name, &op, optional_yield(io_context, yield)); if (ret == -ENOENT) { // queue was deleted - ldpp_dout(this, 5) << "INFO: queue: " - << queue_name << ". was removed. cleanup will stop" << dendl; + ldpp_dout(this, 10) << "INFO: queue: " << queue_name + << ". was removed. cleanup will stop" << dendl; return; } if (ret == -EBUSY) { - ldpp_dout(this, 5) << "WARNING: queue: " << queue_name << " ownership moved to another daemon. processing will stop" << dendl; + ldpp_dout(this, 10) + << "WARNING: queue: " << queue_name + << " ownership moved to another daemon. processing will stop" + << dendl; return; } if (ret < 0) { @@ -367,13 +382,16 @@ private: if (ret == -ENOENT) { // queue was deleted topics_persistency_tracker.erase(queue_name); - ldpp_dout(this, 5) << "INFO: queue: " - << queue_name << ". was removed. processing will stop" << dendl; + ldpp_dout(this, 10) << "INFO: queue: " << queue_name + << ". was removed. processing will stop" << dendl; return; } if (ret == -EBUSY) { topics_persistency_tracker.erase(queue_name); - ldpp_dout(this, 5) << "WARNING: queue: " << queue_name << " ownership moved to another daemon. processing will stop" << dendl; + ldpp_dout(this, 10) + << "WARNING: queue: " << queue_name + << " ownership moved to another daemon. processing will stop" + << dendl; return; } if (ret < 0) { @@ -468,11 +486,15 @@ private: auto ret = rgw_rados_operate(this, rados_ioctx, queue_name, &op, optional_yield(io_context, yield)); if (ret == -ENOENT) { // queue was deleted - ldpp_dout(this, 5) << "INFO: queue: " << queue_name << ". was removed. processing will stop" << dendl; + ldpp_dout(this, 10) << "INFO: queue: " << queue_name + << ". was removed. processing will stop" << dendl; return; } if (ret == -EBUSY) { - ldpp_dout(this, 5) << "WARNING: queue: " << queue_name << " ownership moved to another daemon. processing will stop" << dendl; + ldpp_dout(this, 10) + << "WARNING: queue: " << queue_name + << " ownership moved to another daemon. processing will stop" + << dendl; return; } if (ret < 0) { @@ -1133,23 +1155,16 @@ int publish_commit(rgw::sal::Object* obj, dpp->get_cct(), event_entry.event, res.yield); if (ret < 0) { ldpp_dout(dpp, 1) - << "ERROR: push to endpoint " << topic.cfg.dest.push_endpoint - << " bucket: " << event_entry.event.bucket_name - << " bucket_owner: " << event_entry.event.bucket_ownerIdentity - << " object_name: " << event_entry.event.object_key - << " failed. error: " << ret << dendl; + << "ERROR: failed to push sync notification event with error: " + << ret << " for event with " << event_entry << dendl; if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_failed); return ret; } if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_ok); } catch (const RGWPubSubEndpoint::configuration_error& e) { - ldpp_dout(dpp, 1) << "ERROR: failed to create push endpoint: " - << topic.cfg.dest.push_endpoint - << " bucket: " << event_entry.event.bucket_name - << " bucket_owner: " - << event_entry.event.bucket_ownerIdentity - << " object_name: " << event_entry.event.object_key - << ". error: " << e.what() << dendl; + ldpp_dout(dpp, 1) << "ERROR: failed to create push endpoint for sync " + "notification event with error: " + << e.what() << " event with " << event_entry << dendl; if (perfcounter) perfcounter->inc(l_rgw_pubsub_push_failed); return -EINVAL; } diff --git a/src/rgw/rgw_amqp.cc b/src/rgw/rgw_amqp.cc index 67cbbe5c67715..46ab9c575bd67 100644 --- a/src/rgw/rgw_amqp.cc +++ b/src/rgw/rgw_amqp.cc @@ -20,7 +20,7 @@ #include "common/dout.h" #include -#define dout_subsys ceph_subsys_rgw +#define dout_subsys ceph_subsys_rgw_notification // TODO investigation, not necessarily issues: // (1) in case of single threaded writer context use spsc_queue diff --git a/src/rgw/rgw_kafka.cc b/src/rgw/rgw_kafka.cc index 0d6e773750144..65a1aed0cf912 100644 --- a/src/rgw/rgw_kafka.cc +++ b/src/rgw/rgw_kafka.cc @@ -16,7 +16,7 @@ #include #include "common/dout.h" -#define dout_subsys ceph_subsys_rgw +#define dout_subsys ceph_subsys_rgw_notification // TODO investigation, not necessarily issues: // (1) in case of single threaded writer context use spsc_queue -- 2.39.5