]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: log long aio operations in KernelDevice 26674/head
authorIgor Fedotov <ifedotov@suse.com>
Wed, 27 Feb 2019 14:58:45 +0000 (17:58 +0300)
committerIgor Fedotov <ifedotov@suse.com>
Wed, 27 Feb 2019 15:56:55 +0000 (18:56 +0300)
Signed-off-by: Igor Fedotov <ifedotov@suse.com>
src/common/legacy_config_opts.h
src/common/options.cc
src/os/bluestore/KernelDevice.cc

index 0d0f6fc65585ce76b1495a7ceefcd15b853a5fae..6ba9d011504056c15747c1b771d178f570cdb35a 100644 (file)
@@ -916,7 +916,7 @@ OPTION(bdev_aio_reap_max, OPT_INT)
 OPTION(bdev_block_size, OPT_INT)
 OPTION(bdev_debug_aio, OPT_BOOL)
 OPTION(bdev_debug_aio_suicide_timeout, OPT_FLOAT)
-OPTION(bdev_log_op_age, OPT_DOUBLE)
+OPTION(bdev_debug_aio_log_age, OPT_DOUBLE)
 
 // if yes, osd will unbind all NVMe devices from kernel driver and bind them
 // to the uio_pci_generic driver. The purpose is to prevent the case where
index 69cc13d58c456bc4137313aca17eedbbb5e3332a..015644d6c6126897e9a8a6b867b2d1f1a8df3961 100644 (file)
@@ -4096,6 +4096,10 @@ std::vector<Option> get_global_options() {
     .set_default(60.0)
     .set_description(""),
 
+    Option("bdev_debug_aio_log_age", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+    .set_default(5.0)
+    .set_description(""),
+
     Option("bdev_nvme_unbind_from_kernel", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
     .set_default(false)
     .set_description(""),
@@ -4112,11 +4116,6 @@ std::vector<Option> get_global_options() {
     .set_default(false)
     .set_description(""),
 
-    Option("bdev_log_op_age", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
-    .set_default(1)
-    .set_description("log operation details if it's slower than this age (seconds)"),
-
-
     Option("bluefs_alloc_size", Option::TYPE_SIZE, Option::LEVEL_ADVANCED)
     .set_default(1_M)
     .set_description(""),
index 56a5de838e577e7a6ead0f72d14f33f0fff7cf96..5812d84b1774121143adb22eb61a5cfb174f9a93 100644 (file)
@@ -546,15 +546,17 @@ void KernelDevice::_aio_thread()
       if (debug_oldest) {
        if (debug_stall_since == utime_t()) {
          debug_stall_since = now;
-       } else if (cct->_conf->bdev_debug_aio_suicide_timeout) {
-         utime_t cutoff = now;
-         cutoff -= cct->_conf->bdev_debug_aio_suicide_timeout;
-         if (debug_stall_since < cutoff) {
-           derr << __func__ << " stalled aio " << debug_oldest
-                << " since " << debug_stall_since << ", timeout is "
-                << cct->_conf->bdev_debug_aio_suicide_timeout
-                << "s, suicide" << dendl;
-           ceph_abort_msg("stalled aio... buggy kernel or bad device?");
+       } else {
+         if (cct->_conf->bdev_debug_aio_suicide_timeout) {
+            utime_t cutoff = now;
+           cutoff -= cct->_conf->bdev_debug_aio_suicide_timeout;
+           if (debug_stall_since < cutoff) {
+             derr << __func__ << " stalled aio " << debug_oldest
+                  << " since " << debug_stall_since << ", timeout is "
+                  << cct->_conf->bdev_debug_aio_suicide_timeout
+                  << "s, suicide" << dendl;
+             ceph_abort_msg("stalled aio... buggy kernel or bad device?");
+           }
          }
        }
       }
@@ -656,6 +658,18 @@ void KernelDevice::debug_aio_unlink(aio_t& aio)
   if (aio.queue_item.is_linked()) {
     debug_queue.erase(debug_queue.iterator_to(aio));
     if (debug_oldest == &aio) {
+      auto age = cct->_conf->bdev_debug_aio_log_age;
+      if (age && debug_stall_since != utime_t()) {
+        utime_t cutoff = ceph_clock_now();
+       cutoff -= age;
+       if (debug_stall_since < cutoff) {
+         derr << __func__ << " stalled aio " << debug_oldest
+               << " since " << debug_stall_since << ", timeout is "
+               << age
+               << "s" << dendl;
+       }
+      }
+
       if (debug_queue.empty()) {
        debug_oldest = nullptr;
       } else {
@@ -866,9 +880,21 @@ int KernelDevice::read(uint64_t off, uint64_t len, bufferlist *pbl,
 
   _aio_log_start(ioc, off, len);
 
+  auto start1 = mono_clock::now();
+
   auto p = buffer::ptr_node::create(buffer::create_small_page_aligned(len));
   int r = ::pread(buffered ? fd_buffereds[WRITE_LIFE_NOT_SET] : fd_directs[WRITE_LIFE_NOT_SET],
                  p->c_str(), len, off);
+  auto age = cct->_conf->bdev_debug_aio_log_age;
+  if (mono_clock::now() - start1 >= make_timespan(age)) {
+    derr << __func__ << " stalled read "
+         << " 0x" << std::hex << off << "~" << len << std::dec
+         << (buffered ? " (buffered)" : " (direct)")
+        << " since " << start1 << ", timeout is "
+        << age
+        << "s" << dendl;
+  }
+
   if (r < 0) {
     if (ioc->allow_eio && is_expected_ioerr(r)) {
       r = -EIO;
@@ -927,7 +953,17 @@ int KernelDevice::direct_read_unaligned(uint64_t off, uint64_t len, char *buf)
   bufferptr p = buffer::create_small_page_aligned(aligned_len);
   int r = 0;
 
+  auto start1 = mono_clock::now();
   r = ::pread(fd_directs[WRITE_LIFE_NOT_SET], p.c_str(), aligned_len, aligned_off);
+  auto age = cct->_conf->bdev_debug_aio_log_age;
+  if (mono_clock::now() - start1 >= make_timespan(age)) {
+    derr << __func__ << " stalled read "
+         << " 0x" << std::hex << off << "~" << len << std::dec
+        << " since " << start1 << ", timeout is "
+        << age
+        << "s" << dendl;
+  }
+
   if (r < 0) {
     r = -errno;
     derr << __func__ << " 0x" << std::hex << off << "~" << len << std::dec
@@ -956,6 +992,7 @@ int KernelDevice::read_random(uint64_t off, uint64_t len, char *buf,
   ceph_assert(off < size);
   ceph_assert(off + len <= size);
   int r = 0;
+  auto age = cct->_conf->bdev_debug_aio_log_age;
 
   //if it's direct io and unaligned, we have to use a internal buffer
   if (!buffered && ((off % block_size != 0)
@@ -963,8 +1000,10 @@ int KernelDevice::read_random(uint64_t off, uint64_t len, char *buf,
                     || (uintptr_t(buf) % CEPH_PAGE_SIZE != 0)))
     return direct_read_unaligned(off, len, buf);
 
+  auto start1 = mono_clock::now();
   if (buffered) {
     //buffered read
+    auto off0 = off;
     char *t = buf;
     uint64_t left = len;
     while (left > 0) {
@@ -979,9 +1018,23 @@ int KernelDevice::read_random(uint64_t off, uint64_t len, char *buf,
       t += r;
       left -= r;
     }
+    if (mono_clock::now() - start1 >= make_timespan(age)) {
+      derr << __func__ << " stalled read "
+          << " 0x" << std::hex << off0 << "~" << len << std::dec
+           << " (buffered) since " << start1 << ", timeout is "
+          << age
+          << "s" << dendl;
+    }
   } else {
     //direct and aligned read
     r = ::pread(fd_directs[WRITE_LIFE_NOT_SET], buf, len, off);
+    if (mono_clock::now() - start1 >= make_timespan(age)) {
+      derr << __func__ << " stalled read "
+          << " 0x" << std::hex << off << "~" << len << std::dec
+           << " (direct) since " << start1 << ", timeout is "
+          << age
+          << "s" << dendl;
+    }
     if (r < 0) {
       r = -errno;
       derr << __func__ << " direct_aligned_read" << " 0x" << std::hex