]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: introduce ool related metrics with misc improvements 43327/head
authorYingxin Cheng <yingxin.cheng@intel.com>
Tue, 28 Sep 2021 07:38:52 +0000 (15:38 +0800)
committerYingxin Cheng <yingxin.cheng@intel.com>
Tue, 28 Sep 2021 08:31:08 +0000 (16:31 +0800)
* The number of ool records written;
* Write overhead from journal/ool records;
* Wasted writes from invalided ool records;
* Wasted writes from erased inline extents;
* Distinguish ool and inline extents from metrics;

Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/extent_placement_manager.cc
src/crimson/os/seastore/extent_placement_manager.h
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/transaction.h

index 1c6783fd41be37734b2e3289de33a190cff1a78c..4f810003b90e51682d78ab8fd5433bb670a8db9e 100644 (file)
@@ -169,15 +169,16 @@ void Cache::register_metrics()
      * efforts discarded/committed
      */
     auto effort_label = sm::label("effort");
+
+    // invalidated efforts
     using namespace std::literals::string_view_literals;
-    const string_view effort_names[] = {
+    const string_view invalidated_effort_names[] = {
       "READ"sv,
       "MUTATE"sv,
       "RETIRE"sv,
       "FRESH"sv,
+      "FRESH_OOL_WRITTEN"sv,
     };
-
-    // invalidated efforts
     for (auto& [src, src_label] : labels_by_src) {
       auto& efforts = get_by_src(stats.invalidated_efforts_by_src, src);
       for (auto& [ext, ext_label] : labels_by_ext) {
@@ -219,7 +220,7 @@ void Cache::register_metrics()
       }
 
       // non READ invalidated efforts
-      for (auto& effort_name : effort_names) {
+      for (auto& effort_name : invalidated_effort_names) {
         auto& effort = [&effort_name, &efforts]() -> effort_t& {
           if (effort_name == "READ") {
             return efforts.read;
@@ -227,9 +228,11 @@ void Cache::register_metrics()
             return efforts.mutate;
           } else if (effort_name == "RETIRE") {
             return efforts.retire;
-          } else {
-            assert(effort_name == "FRESH");
+          } else if (effort_name == "FRESH") {
             return efforts.fresh;
+          } else {
+            assert(effort_name == "FRESH_OOL_WRITTEN");
+            return efforts.fresh_ool_written;
           }
         }();
         metrics.add_group(
@@ -260,11 +263,31 @@ void Cache::register_metrics()
             sm::description("delta bytes of invalidated transactions"),
             {src_label}
           ),
+          sm::make_counter(
+            "invalidated_ool_records",
+            efforts.num_ool_records,
+            sm::description("number of ool-records from invalidated transactions"),
+            {src_label}
+          ),
+          sm::make_counter(
+            "invalidated_ool_record_overhead_bytes",
+            efforts.ool_record_overhead_bytes,
+            sm::description("bytes of ool-record overhead from invalidated transactions"),
+            {src_label}
+          ),
         }
       );
     } // src
 
     // committed efforts
+    const string_view committed_effort_names[] = {
+      "READ"sv,
+      "MUTATE"sv,
+      "RETIRE"sv,
+      "FRESH_INVALID"sv,
+      "FRESH_INLINE"sv,
+      "FRESH_OOL"sv,
+    };
     for (auto& [src, src_label] : labels_by_src) {
       if (src == src_t::READ) {
         // READ transaction won't commit
@@ -280,9 +303,27 @@ void Cache::register_metrics()
             sm::description("total number of transaction committed"),
             {src_label}
           ),
+          sm::make_counter(
+            "committed_ool_records",
+            efforts.num_ool_records,
+            sm::description("number of ool-records from committed transactions"),
+            {src_label}
+          ),
+          sm::make_counter(
+            "committed_ool_record_overhead_bytes",
+            efforts.ool_record_overhead_bytes,
+            sm::description("bytes of ool-record overhead from committed transactions"),
+            {src_label}
+          ),
+          sm::make_counter(
+            "committed_inline_record_overhead_bytes",
+            efforts.inline_record_overhead_bytes,
+            sm::description("bytes of inline-record overhead from committed transactions"),
+            {src_label}
+          ),
         }
       );
-      for (auto& effort_name : effort_names) {
+      for (auto& effort_name : committed_effort_names) {
         auto& effort_by_ext = [&efforts, &effort_name]()
             -> counter_by_extent_t<effort_t>& {
           if (effort_name == "READ") {
@@ -291,9 +332,13 @@ void Cache::register_metrics()
             return efforts.mutate_by_ext;
           } else if (effort_name == "RETIRE") {
             return efforts.retire_by_ext;
+          } else if (effort_name == "FRESH_INVALID") {
+            return efforts.fresh_invalid_by_ext;
+          } else if (effort_name == "FRESH_INLINE") {
+            return efforts.fresh_inline_by_ext;
           } else {
-            assert(effort_name == "FRESH");
-            return efforts.fresh_by_ext;
+            assert(effort_name == "FRESH_OOL");
+            return efforts.fresh_ool_by_ext;
           }
         }();
         for (auto& [ext, ext_label] : labels_by_ext) {
@@ -627,6 +672,12 @@ void Cache::mark_transaction_conflicted(
       efforts.mutate_delta_bytes += i->get_delta().length();
     }
 
+    auto& ool_stats = t.get_ool_write_stats();
+    efforts.fresh_ool_written.extents += ool_stats.extents.num;
+    efforts.fresh_ool_written.bytes += ool_stats.extents.bytes;
+    efforts.num_ool_records += ool_stats.num_records;
+    efforts.ool_record_overhead_bytes += ool_stats.overhead_bytes;
+
     if (t.get_src() == Transaction::src_t::CLEANER) {
       // CLEANER transaction won't contain any onode tree operations
       assert(t.onode_tree_stats.is_clear());
@@ -642,6 +693,7 @@ void Cache::mark_transaction_conflicted(
     assert(t.retired_set.empty());
     assert(t.get_fresh_block_stats().num == 0);
     assert(t.mutated_block_list.empty());
+    assert(t.get_ool_write_stats().num_records == 0);
     assert(t.onode_tree_stats.is_clear());
     assert(t.lba_tree_stats.is_clear());
   }
@@ -845,8 +897,14 @@ record_t Cache::prepare_record(Transaction &t)
 
   record.extents.reserve(t.inline_block_list.size());
   for (auto &i: t.inline_block_list) {
-    DEBUGT("fresh block {}", t, *i);
-    get_by_ext(efforts.fresh_by_ext,
+    if (!i->is_valid()) {
+      DEBUGT("fresh inline block (invalid) {}", t, *i);
+      get_by_ext(efforts.fresh_invalid_by_ext,
+                 i->get_type()).increment(i->get_length());
+    } else {
+      DEBUGT("fresh inline block {}", t, *i);
+    }
+    get_by_ext(efforts.fresh_inline_by_ext,
                i->get_type()).increment(i->get_length());
     assert(i->is_inline());
 
@@ -867,11 +925,27 @@ record_t Cache::prepare_record(Transaction &t)
       });
   }
 
+  for (auto &i: t.ool_block_list) {
+    ceph_assert(i->is_valid());
+    DEBUGT("fresh ool block {}", t, *i);
+    get_by_ext(efforts.fresh_ool_by_ext,
+               i->get_type()).increment(i->get_length());
+  }
+
   ceph_assert(t.get_fresh_block_stats().num ==
               t.inline_block_list.size() +
               t.ool_block_list.size() +
               t.num_delayed_invalid_extents);
 
+  auto& ool_stats = t.get_ool_write_stats();
+  ceph_assert(ool_stats.extents.num == t.ool_block_list.size());
+  efforts.num_ool_records += ool_stats.num_records;
+  efforts.ool_record_overhead_bytes += ool_stats.overhead_bytes;
+  auto record_size = get_encoded_record_length(
+      record, segment_manager.get_block_size());
+  auto inline_overhead =
+      record_size.mdlength + record_size.dlength - record.get_raw_data_size();
+  efforts.inline_record_overhead_bytes += inline_overhead;
   return record;
 }
 
@@ -891,9 +965,7 @@ void Cache::complete_commit(
     i->last_committed_crc = i->get_crc32c();
     i->on_initial_write();
 
-    if (!i->is_valid()) {
-      DEBUGT("invalid {}", t, *i);
-    } else {
+    if (i->is_valid()) {
       i->state = CachedExtent::extent_state_t::CLEAN;
       DEBUGT("fresh {}", t, *i);
       add_extent(i);
index c8049ca29ca4be8b06cd494456ae4857969fe2ed..f1449adb22119682ff1494a79d460676d9e8e3fc 100644 (file)
@@ -659,7 +659,10 @@ private:
     uint64_t mutate_delta_bytes = 0;
     effort_t retire;
     effort_t fresh;
+    effort_t fresh_ool_written;
     counter_by_extent_t<uint64_t> num_trans_invalidated;
+    uint64_t num_ool_records = 0;
+    uint64_t ool_record_overhead_bytes = 0;
   };
 
   struct commit_trans_efforts_t {
@@ -667,8 +670,13 @@ private:
     counter_by_extent_t<effort_t> mutate_by_ext;
     counter_by_extent_t<uint64_t> delta_bytes_by_ext;
     counter_by_extent_t<effort_t> retire_by_ext;
-    counter_by_extent_t<effort_t> fresh_by_ext;
-    uint64_t num_trans = 0;
+    counter_by_extent_t<effort_t> fresh_invalid_by_ext;
+    counter_by_extent_t<effort_t> fresh_inline_by_ext;
+    counter_by_extent_t<effort_t> fresh_ool_by_ext;
+    uint64_t num_trans = 0; // the number of inline records
+    uint64_t num_ool_records = 0;
+    uint64_t ool_record_overhead_bytes = 0;
+    uint64_t inline_record_overhead_bytes = 0;
   };
 
   struct success_read_trans_efforts_t {
index 9b069138d73374a3619836d2a0ed9de6a76d96f4..43e6235ddabe974cbc22c09a98e664735bb0d141 100644 (file)
@@ -84,6 +84,15 @@ SegmentedAllocator::Writer::_write(
     current_segment->segment->get_segment_id(),
     record.get_base());
 
+  // account transactional ool writes before write()
+  auto& stats = t.get_ool_write_stats();
+  stats.extents.num += record.get_num_extents();
+  auto extent_bytes = record.get_raw_data_size();
+  stats.extents.bytes += extent_bytes;
+  assert(bl.length() > extent_bytes);
+  stats.overhead_bytes += (bl.length() - extent_bytes);
+  stats.num_records += 1;
+
   return trans_intr::make_interruptible(
     current_segment->segment->write(record.get_base(), bl).safe_then(
       [this, pr=std::move(pr), &t,
index 8a1fa5b2309b750c22f71b504a71178bf8b3831f..5c55b9e1f723757f57a68990c95dc0e4e4578608 100644 (file)
@@ -95,9 +95,13 @@ public:
     extent_buf_len = 0;
     base = MAX_SEG_OFF;
   }
-  uint64_t get_num_extents() {
+  uint64_t get_num_extents() const {
     return extents.size();
   }
+  uint64_t get_raw_data_size() const {
+    assert(extents.size() == record.extents.size());
+    return record.get_raw_data_size();
+  }
 private:
   std::vector<OolExtent> extents;
   record_t record;
index f5e052975912759e2b857097c40fecbd759cc7e4..b4184e01c69fa6aff5f11fb7d5fdb5c93a4a65b5 100644 (file)
@@ -4,6 +4,7 @@
 #pragma once
 
 #include <limits>
+#include <numeric>
 #include <iostream>
 
 #include "include/byteorder.h"
@@ -434,6 +435,22 @@ std::ostream &operator<<(std::ostream &lhs, const delta_info_t &rhs);
 struct record_t {
   std::vector<extent_t> extents;
   std::vector<delta_info_t> deltas;
+
+  std::size_t get_raw_data_size() const {
+    auto extent_size = std::accumulate(
+        extents.begin(), extents.end(), 0,
+        [](uint64_t sum, auto& extent) {
+          return sum + extent.bl.length();
+        }
+    );
+    auto delta_size = std::accumulate(
+        deltas.begin(), deltas.end(), 0,
+        [](uint64_t sum, auto& delta) {
+          return sum + delta.bl.length();
+        }
+    );
+    return extent_size + delta_size;
+  }
 };
 
 class object_data_t {
index 91ac1cd286925ff82b865383d49918558b9c2084..af26ae63c5845384e68bccb64e16a9bb36cd6877 100644 (file)
@@ -264,6 +264,7 @@ public:
     retired_set.clear();
     onode_tree_stats = {};
     lba_tree_stats = {};
+    ool_write_stats = {};
     to_release = NULL_SEG_ID;
     conflicted = false;
     if (!has_reset) {
@@ -293,6 +294,15 @@ public:
     return lba_tree_stats;
   }
 
+  struct ool_write_stats_t {
+    io_stat_t extents;
+    uint64_t overhead_bytes = 0;
+    uint64_t num_records = 0;
+  };
+  ool_write_stats_t& get_ool_write_stats() {
+    return ool_write_stats;
+  }
+
   void increment_delayed_invalid_extents() {
     ++num_delayed_invalid_extents;
   }
@@ -356,6 +366,7 @@ private:
   /// stats to collect when commit or invalidate
   tree_stats_t onode_tree_stats;
   tree_stats_t lba_tree_stats;
+  ool_write_stats_t ool_write_stats;
 
   ///< if != NULL_SEG_ID, release this segment after completion
   segment_id_t to_release = NULL_SEG_ID;