Revert "msg,mon,common: log when DispatchQueue throttle limit is reached"

author Kefu Chai <tchaikov@gmail.com>

Sat, 6 Feb 2021 06:47:56 +0000 (14:47 +0800)

committer Kefu Chai <kchai@redhat.com>

Mon, 8 Feb 2021 10:03:14 +0000 (18:03 +0800)
author Kefu Chai <tchaikov@gmail.com>
Sat, 6 Feb 2021 06:47:56 +0000 (14:47 +0800)
committer Kefu Chai <kchai@redhat.com>
Mon, 8 Feb 2021 10:03:14 +0000 (18:03 +0800)
diff --git a/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml b/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml

index d9d5d3ca39d5dc5a970a3bed785196a828c3727c..40d63ba792b11ea5110d73e8de0cff99488bc55d 100644 (file)
--- a/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml
+++ b/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml
@@ -11,4 +11,3 @@ overrides:
        - has not responded to cap revoke by MDS for over
        - MDS_CLIENT_LATE_RELEASE
        - responding to mclientcaps
-      - Throttler Limit has been hit. Some message processing may be significantly delayed.
diff --git a/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml

index a151d7a49cf0f68e164ecc7dfdadde3c77376838..33748cea5cdce40f1965a4b0f2429740dfaa756c 100644 (file)
--- a/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml
+++ b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml
@@ -5,4 +5,3 @@ overrides:
    ceph:
      log-ignorelist:
        - Replacing daemon mds
-      - Throttler Limit has been hit. Some message processing may be significantly delayed.
diff --git a/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml

index 370fd66f79fb8c817f468e65fd66312c7f541b72..fbbe16151ce90eeb56464882caa160550f22b19b 100644 (file)
--- a/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml
+++ b/qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml
@@ -3,7 +3,6 @@ overrides:
      log-ignorelist:
        - overall HEALTH_
        - \(MON_DOWN\)
-      - Throttler Limit has been hit. Some message processing may be significantly delayed.
  tasks:
  - mon_thrash:
      check_mds_failover: True
diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml

index a151d7a49cf0f68e164ecc7dfdadde3c77376838..33748cea5cdce40f1965a4b0f2429740dfaa756c 100644 (file)
--- a/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml
+++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml
@@ -5,4 +5,3 @@ overrides:
    ceph:
      log-ignorelist:
        - Replacing daemon mds
-      - Throttler Limit has been hit. Some message processing may be significantly delayed.
diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml

index 370fd66f79fb8c817f468e65fd66312c7f541b72..fbbe16151ce90eeb56464882caa160550f22b19b 100644 (file)
--- a/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml
+++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml
@@ -3,7 +3,6 @@ overrides:
      log-ignorelist:
        - overall HEALTH_
        - \(MON_DOWN\)
-      - Throttler Limit has been hit. Some message processing may be significantly delayed.
  tasks:
  - mon_thrash:
      check_mds_failover: True
diff --git a/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml b/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml

index 968ac782d5b4d205742487f7d199446edc31b4be..62e30ba230fa4c94fa3c882116c10356b053aea9 100644 (file)
--- a/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml
+++ b/qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml
@@ -4,6 +4,5 @@ overrides:
        - but it is still running
        - objects unfound and apparently lost
        - MDS_SLOW_METADATA_IO
-      - Throttler Limit has been hit. Some message processing may be significantly delayed.
  tasks:
  - thrashosds:
diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py

index 6aa222f9a617733afe996e7e8f1db1d8ccc81af8..6a295bbfdf1aca7dcf442385b994c1e3e4e69c6e 100644 (file)
--- a/qa/tasks/cephfs/test_misc.py
+++ b/qa/tasks/cephfs/test_misc.py
@@ -198,18 +198,6 @@ class TestMisc(CephFSTestCase):
          info = self.fs.mds_asok(['dump', 'inode', hex(ino)])
          assert info['path'] == "/foo"
  
-    def test_dispatch_queue_throttle_message(self):
-        """
-        That cluster log a warning when the Dispatch Queue Throttle Limit hits
-        """
-        self.config_set('mds', 'ms_dispatch_throttle_log_interval', 5)
-        self.config_set('mds', 'ms_dispatch_throttle_bytes', 10240)
-
-        # Create files & split across 10 directories, 1000 each.
-        with self.assert_cluster_log("Throttler Limit has been hit. Some message processing may be significantly delayed.",
-                                     invert_match=False, watch_channel="cluster"):
-            for i in range(0, 10):
-                self.mount_a.create_n_files("dir{0}/file".format(i), 1000, sync=False)
  
  class TestCacheDrop(CephFSTestCase):
      CLIENTS_REQUIRED = 1
diff --git a/src/common/options.cc b/src/common/options.cc

index 625dc9d4cf89f53099fc0829559fe06e30093796..3412f92d994c51149edb1a468a52fa3782bd05ad 100644 (file)
--- a/src/common/options.cc
+++ b/src/common/options.cc
@@ -1020,11 +1020,6 @@ std::vector<Option> get_global_options() {
      .set_default(100_M)
      .set_description("Limit messages that are read off the network but still being processed"),
  
-    Option("ms_dispatch_throttle_log_interval", Option::TYPE_SECS, Option::LEVEL_ADVANCED)
-    .set_default(30)
-    .set_min(0)
-    .set_description("Interval in seconds for high verbosity debug log message when the dispatch throttle limit are hit"),
-
      Option("ms_bind_ipv4", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
      .set_default(true)
      .set_description("Bind servers to IPv4 address(es)")
diff --git a/src/mon/MonClient.cc b/src/mon/MonClient.cc

index 1e9087aff82e6582f778b1dfcdd550a3af117a13..a1c67ad44990329f42addd021362584887efc06f 100644 (file)
--- a/src/mon/MonClient.cc
+++ b/src/mon/MonClient.cc
@@ -508,7 +508,6 @@ int MonClient::init()
  
    initialized = true;
  
-  cct->_conf.add_observer(this);
    messenger->set_auth_client(this);
    messenger->add_dispatcher_head(this);
  
@@ -547,7 +546,6 @@ void MonClient::shutdown()
    if (initialized) {
      initialized = false;
    }
-  cct->_conf.remove_observer(this);
    monc_lock.lock();
    timer.shutdown();
    stopping = false;
@@ -856,29 +854,6 @@ bool MonClient::ms_handle_reset(Connection *con)
    }
  }
  
-bool MonClient::ms_handle_throttle(ms_throttle_t ttype) {
-  switch (ttype) {
-  case ms_throttle_t::MESSAGE:
-    break; // TODO
-  case ms_throttle_t::BYTES:
-    break; // TODO
-  case ms_throttle_t::DISPATCH_QUEUE:
-    {
-      //cluster log a warning that Dispatch Queue Throttle Limit hit
-      if (!log_client) {
-        return false; //cannot handle if the daemon didn't setup a log_client for me
-      }
-      LogChannelRef clog = log_client->create_channel(CLOG_CHANNEL_CLUSTER);
-      clog->warn() << "Throttler Limit has been hit. "
-                   << "Some message processing may be significantly delayed.";
-    }
-    break;
-  default:
-    return false;
-  }
-  return true;
-}
-
  bool MonClient::_opened() const
  {
    ceph_assert(ceph_mutex_is_locked(monc_lock));
@@ -1640,24 +1615,6 @@ int MonClient::handle_auth_request(
    return -EACCES;
  }
  
-const char** MonClient::get_tracked_conf_keys() const {
-  static const char* KEYS[] = {
-    "ms_dispatch_throttle_bytes",
-    "ms_dispatch_throttle_log_interval",
-    NULL
-  };
-  return KEYS;
-}
-
-void MonClient::handle_conf_change(const ConfigProxy& conf, const std::set<std::string> &changed) {
-  if (changed.count("ms_dispatch_throttle_bytes") || changed.count("ms_dispatch_throttle_log_interval")) {
-    if (messenger) {
-      messenger->dispatch_throttle_bytes = cct->_conf.get_val<Option::size_t>("ms_dispatch_throttle_bytes");
-      messenger->dispatch_throttle_log_interval = cct->_conf.get_val<std::chrono::seconds>("ms_dispatch_throttle_log_interval");
-    }
-  }
-}
-
  AuthAuthorizer* MonClient::build_authorizer(int service_id) const {
    std::lock_guard l(monc_lock);
    if (auth) {
diff --git a/src/mon/MonClient.h b/src/mon/MonClient.h

index 7231d1a655495da45a330cc00b9a21d8f7602d00..cc7805d0675e9bb852979ed18ea26de7f5dc1265 100644 (file)
--- a/src/mon/MonClient.h
+++ b/src/mon/MonClient.h
@@ -268,9 +268,8 @@ inline boost::system::error_condition make_error_condition(monc_errc e) noexcept
  const boost::system::error_category& monc_category() noexcept;
  
  class MonClient : public Dispatcher,
-                  public AuthClient,
-                  public AuthServer, /* for mgr, osd, mds */
-                  public md_config_obs_t {
+                 public AuthClient,
+                 public AuthServer /* for mgr, osd, mds */ {
    static constexpr auto dout_subsys = ceph_subsys_monc;
  public:
    // Error, Newest, Oldest
@@ -310,7 +309,6 @@ private:
    bool ms_handle_reset(Connection *con) override;
    void ms_handle_remote_reset(Connection *con) override {}
    bool ms_handle_refused(Connection *con) override { return false; }
-  bool ms_handle_throttle(ms_throttle_t ttype) override;
  
    void handle_monmap(MMonMap *m);
    void handle_config(MConfig *m);
@@ -404,11 +402,6 @@ public:
      uint32_t auth_method,
      const ceph::buffer::list& bl,
      ceph::buffer::list *reply) override;
-  // md_config_obs_t (config observer)
-  const char** get_tracked_conf_keys() const override;
-  void handle_conf_change(
-    const ConfigProxy& conf,
-    const std::set<std::string> &changed) override;
  
    void set_entity_name(EntityName name) { entity_name = name; }
    void set_handle_authentication_dispatcher(Dispatcher *d) {
diff --git a/src/msg/DispatchQueue.h b/src/msg/DispatchQueue.h

index c951df5370dfc346f9ebadc1ba6f827eb6b65ab0..de0cb7d1a0833dde7260d0eedf89f5a339b5167c 100644 (file)
--- a/src/msg/DispatchQueue.h
+++ b/src/msg/DispatchQueue.h
@@ -212,11 +212,6 @@ class DispatchQueue {
    uint64_t get_id() {
      return next_id++;
    }
-
-  Messenger* get_messenger() const {
-    return msgr;
-  }
-
    void start();
    void entry();
    void wait();
diff --git a/src/msg/Dispatcher.h b/src/msg/Dispatcher.h

index 36141571202a27e421c76bb28266d633d9f0b0d7..5e025437b53570ad78844cb38e7e6c3c6bfeab74 100644 (file)
--- a/src/msg/Dispatcher.h
+++ b/src/msg/Dispatcher.h
@@ -214,16 +214,6 @@ public:
      return 0;
    }
  
-  /**
-   * handle throttle limit hit and cluster log it.
-   *
-   * return true if handled
-   * return false if not handled
-   */
-  virtual bool ms_handle_throttle(ms_throttle_t ttype) {
-    return false;
-  }
-
    /**
     * @} //Authentication
     */
diff --git a/src/msg/Messenger.cc b/src/msg/Messenger.cc

index b701d17f912427f0a01c54050c478db062b23dc3..8064a10a0d9b8d4b4fdbc217a8886c67f4616ea1 100644 (file)
--- a/src/msg/Messenger.cc
+++ b/src/msg/Messenger.cc
@@ -66,8 +66,6 @@ Messenger::Messenger(CephContext *cct_, entity_name_t w)
      auth_registry(cct)
  {
    auth_registry.refresh_config();
-  dispatch_throttle_bytes = cct->_conf.get_val<Option::size_t>("ms_dispatch_throttle_bytes");
-  dispatch_throttle_log_interval = cct->_conf.get_val<std::chrono::seconds>("ms_dispatch_throttle_log_interval");
  }
  
  void Messenger::set_endpoint_addr(const entity_addr_t& a,
diff --git a/src/msg/Messenger.h b/src/msg/Messenger.h

index 7dcaeb75ba482dfb281b5f21dc42c7c0766b414a..e87f3196b1c9b58e2ab8c2f3a1c93c9b12b23d94 100644 (file)
--- a/src/msg/Messenger.h
+++ b/src/msg/Messenger.h
@@ -112,8 +112,6 @@ protected:
  public:
    AuthClient *auth_client = 0;
    AuthServer *auth_server = 0;
-  uint64_t dispatch_throttle_bytes;
-  std::chrono::seconds dispatch_throttle_log_interval;
  
  #ifdef UNIT_TESTS_BUILT
    Interceptor *interceptor = nullptr;
@@ -815,18 +813,6 @@ public:
    void set_require_authorizer(bool b) {
      require_authorizer = b;
    }
-  /**
-   * Notify each Dispatcher that the Throttle Limit has been hit. Call
-   * this function whenever the connections are getting throttled.
-   *
-   * @param ttype Throttle type
-   */
-  void ms_deliver_throttle(ms_throttle_t ttype) {
-    for (const auto &dispatcher : dispatchers) {
-      if (dispatcher->ms_handle_throttle(ttype))
-        return;
-    }
-  }
  
    /**
     * @} // Dispatcher Interfacing
diff --git a/src/msg/async/Protocol.h b/src/msg/async/Protocol.h

index d9f3db7a778cd1a05f62084b18f66dcedaf1b291..10436307ebf8362c76932328c16ee55f61893652 100644 (file)
--- a/src/msg/async/Protocol.h
+++ b/src/msg/async/Protocol.h
@@ -106,7 +106,6 @@ protected:
    AsyncConnection *connection;
    AsyncMessenger *messenger;
    CephContext *cct;
-  ceph::mono_time throttle_prev = ceph::mono_clock::zero();
  public:
    std::shared_ptr<AuthConnectionMeta> auth_meta;
  
diff --git a/src/msg/async/ProtocolV1.cc b/src/msg/async/ProtocolV1.cc

index 3f2100968dd4b53e13edd726c2aacebc6cb880e2..43363371bc35d05eb342a4113881ee9936e8dc07 100644 (file)
--- a/src/msg/async/ProtocolV1.cc
+++ b/src/msg/async/ProtocolV1.cc
@@ -718,10 +718,6 @@ CtPtr ProtocolV1::throttle_dispatch_queue() {
    ldout(cct, 20) << __func__ << dendl;
  
    if (cur_msg_size) {
-    Messenger* msgr = connection->dispatch_queue->get_messenger();
-    //update max if it's changed in the conf. Expecting qa tests would change ms_dispatch_throttle_bytes.
-    connection->dispatch_queue->dispatch_throttler.reset_max(msgr->dispatch_throttle_bytes);
-
      if (!connection->dispatch_queue->dispatch_throttler.get_or_fail(
              cur_msg_size)) {
        ldout(cct, 10)
@@ -730,16 +726,6 @@ CtPtr ProtocolV1::throttle_dispatch_queue() {
            << connection->dispatch_queue->dispatch_throttler.get_current() << "/"
            << connection->dispatch_queue->dispatch_throttler.get_max()
            << " failed, just wait." << dendl;
-      ceph::mono_time throttle_now = ceph::mono_clock::now();
-      auto duration = std::chrono::duration_cast<std::chrono::seconds>(throttle_now - throttle_prev);
-      if (duration >= msgr->dispatch_throttle_log_interval) {
-        ldout(cct, 1) << __func__ << " Throttler Limit has been hit. "
-                      << "Some message processing may be significantly delayed." << dendl;
-        throttle_prev = throttle_now;
-
-        //Cluster logging that throttling is occurring.
-        msgr->ms_deliver_throttle(ms_throttle_t::DISPATCH_QUEUE);
-      }
        // following thread pool deal with th full message queue isn't a
        // short time, so we can wait a ms.
        if (connection->register_time_events.empty()) {
diff --git a/src/msg/async/ProtocolV2.cc b/src/msg/async/ProtocolV2.cc

index 2de83a042380c1caef2efdbe97e8eb74f652ee72..855006447f7964e7ea745b78e99b621774228f37 100644 (file)
--- a/src/msg/async/ProtocolV2.cc
+++ b/src/msg/async/ProtocolV2.cc
@@ -1572,10 +1572,6 @@ CtPtr ProtocolV2::throttle_dispatch_queue() {
  
    const size_t cur_msg_size = get_current_msg_size();
    if (cur_msg_size) {
-    Messenger* msgr = connection->dispatch_queue->get_messenger();
-    //update max if it's changed in the conf. Expecting qa tests would change ms_dispatch_throttle_bytes.
-    connection->dispatch_queue->dispatch_throttler.reset_max(msgr->dispatch_throttle_bytes);
-
      if (!connection->dispatch_queue->dispatch_throttler.get_or_fail(
              cur_msg_size)) {
        ldout(cct, 10)
@@ -1584,16 +1580,6 @@ CtPtr ProtocolV2::throttle_dispatch_queue() {
            << connection->dispatch_queue->dispatch_throttler.get_current() << "/"
            << connection->dispatch_queue->dispatch_throttler.get_max()
            << " failed, just wait." << dendl;
-      ceph::mono_time throttle_now = ceph::mono_clock::now();
-      auto duration = std::chrono::duration_cast<std::chrono::seconds>(throttle_now - throttle_prev);
-      if (duration >= msgr->dispatch_throttle_log_interval) {
-        ldout(cct, 1) << __func__ << " Throttler Limit has been hit. "
-                      << "Some message processing may be significantly delayed." << dendl;
-        throttle_prev = throttle_now;
-
-        //Cluster logging that throttling is occurring.
-        msgr->ms_deliver_throttle(ms_throttle_t::DISPATCH_QUEUE);
-      }
        // following thread pool deal with th full message queue isn't a
        // short time, so we can wait a ms.
        if (connection->register_time_events.empty()) {
diff --git a/src/msg/msg_types.h b/src/msg/msg_types.h

index acde32f6ecfe83ed9aaac69a67fb707a6721da76..76a1c1ac2a7665a465f41d2610c755a8b3993b14 100644 (file)
--- a/src/msg/msg_types.h
+++ b/src/msg/msg_types.h
@@ -811,10 +811,4 @@ inline std::ostream& operator<<(std::ostream& out, const ceph_entity_inst &i)
    return out << n;
  }
  
-enum class ms_throttle_t {
-    MESSAGE,
-    BYTES,
-    DISPATCH_QUEUE
-};
-
  #endif
author	Kefu Chai <tchaikov@gmail.com>
	Sat, 6 Feb 2021 06:47:56 +0000 (14:47 +0800)
committer	Kefu Chai <kchai@redhat.com>
	Mon, 8 Feb 2021 10:03:14 +0000 (18:03 +0800)
qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml		patch \| blob \| history
qa/suites/fs/thrash/multifs/tasks/1-thrash/mds.yaml		patch \| blob \| history
qa/suites/fs/thrash/multifs/tasks/1-thrash/mon.yaml		patch \| blob \| history
qa/suites/fs/thrash/workloads/tasks/1-thrash/mds.yaml		patch \| blob \| history
qa/suites/fs/thrash/workloads/tasks/1-thrash/mon.yaml		patch \| blob \| history
qa/suites/fs/thrash/workloads/tasks/1-thrash/osd.yaml		patch \| blob \| history
qa/tasks/cephfs/test_misc.py		patch \| blob \| history
src/common/options.cc		patch \| blob \| history
src/mon/MonClient.cc		patch \| blob \| history
src/mon/MonClient.h		patch \| blob \| history
src/msg/DispatchQueue.h		patch \| blob \| history
src/msg/Dispatcher.h		patch \| blob \| history
src/msg/Messenger.cc		patch \| blob \| history
src/msg/Messenger.h		patch \| blob \| history
src/msg/async/Protocol.h		patch \| blob \| history
src/msg/async/ProtocolV1.cc		patch \| blob \| history
src/msg/async/ProtocolV2.cc		patch \| blob \| history
src/msg/msg_types.h		patch \| blob \| history