]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: improve and fix tree metrics
authorYingxin Cheng <yingxin.cheng@intel.com>
Mon, 16 May 2022 05:59:26 +0000 (13:59 +0800)
committerYingxin Cheng <yingxin.cheng@intel.com>
Tue, 17 May 2022 08:37:45 +0000 (16:37 +0800)
Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
src/crimson/os/seastore/btree/fixed_kv_btree.h
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc
src/crimson/os/seastore/onode_manager/staged-fltree/node.cc
src/crimson/os/seastore/onode_manager/staged-fltree/node.h
src/crimson/os/seastore/transaction.h

index c04d91424094437866d838bb76190ca811bfed24..189b50a9d43d17a82f5eff331a46aaf3645a3e75 100644 (file)
@@ -318,7 +318,7 @@ public:
     fixed_kv_node_meta_t<node_key_t> meta{min_max_t<node_key_t>::min, min_max_t<node_key_t>::max, 1};
     root_leaf->set_meta(meta);
     root_leaf->pin.set_range(meta);
-    c.trans.get_lba_tree_stats().depth = 1u;
+    get_tree_stats<self_type>(c.trans).depth = 1u;
     return phy_tree_root_t{root_leaf->get_paddr(), 1u};
   }
 
@@ -518,7 +518,7 @@ public:
               interruptible::ready_future_marker{},
               std::make_pair(ret, false));
           } else {
-            ++(c.trans.get_lba_tree_stats().num_inserts);
+            ++(get_tree_stats<self_type>(c.trans).num_inserts);
             return handle_split(
               c, ret
             ).si_then([c, laddr, val, &ret] {
@@ -584,6 +584,7 @@ public:
       );
       iter.leaf.node = mut->cast<leaf_node_t>();
     }
+    ++(get_tree_stats<self_type>(c.trans).num_updates);
     iter.leaf.node->update(
       iter.leaf.node->iter_idx(iter.leaf.pos),
       val);
@@ -614,7 +615,7 @@ public:
       c.trans,
       iter.is_end() ? min_max_t<node_key_t>::max : iter.get_key());
     assert(!iter.is_end());
-    ++(c.trans.get_lba_tree_stats().num_erases);
+    ++(get_tree_stats<self_type>(c.trans).num_erases);
     return seastar::do_with(
       iter,
       [this, c](auto &ret) {
@@ -1400,7 +1401,7 @@ private:
 
       root.set_location(nroot->get_paddr());
       root.set_depth(iter.get_depth());
-      c.trans.get_lba_tree_stats().depth = iter.get_depth();
+      get_tree_stats<self_type>(c.trans).depth = iter.get_depth();
       root_dirty = true;
     }
 
index 35ac8b62b9971f1c940f1f87dac90be76f702d71..8634e923f822b88847d9f6ea91c12f50782dde46 100644 (file)
@@ -492,8 +492,10 @@ void Cache::register_metrics()
    */
   auto tree_label = sm::label("tree");
   auto onode_label = tree_label("ONODE");
+  auto omap_label = tree_label("OMAP");
   auto lba_label = tree_label("LBA");
-  auto register_tree_metrics = [&labels_by_src, &onode_label, this](
+  auto backref_label = tree_label("BACKREF");
+  auto register_tree_metrics = [&labels_by_src, &onode_label, &omap_label, this](
       const sm::label_instance& tree_label,
       uint64_t& tree_depth,
       counter_by_src_t<tree_efforts_t>& committed_tree_efforts,
@@ -515,8 +517,9 @@ void Cache::register_metrics()
         continue;
       }
       if (is_cleaner_transaction(src) &&
-          tree_label == onode_label) {
-        // CLEANER transaction won't contain any onode tree operations
+          (tree_label == onode_label ||
+           tree_label == omap_label)) {
+        // CLEANER transaction won't contain any onode/omap tree operations
         continue;
       }
       auto& committed_efforts = get_by_src(committed_tree_efforts, src);
@@ -536,6 +539,12 @@ void Cache::register_metrics()
             sm::description("total number of committed erase operations"),
             {tree_label, src_label}
           ),
+          sm::make_counter(
+            "tree_updates_committed",
+            committed_efforts.num_updates,
+            sm::description("total number of committed update operations"),
+            {tree_label, src_label}
+          ),
           sm::make_counter(
             "tree_inserts_invalidated",
             invalidated_efforts.num_inserts,
@@ -548,6 +557,12 @@ void Cache::register_metrics()
             sm::description("total number of invalidated erase operations"),
             {tree_label, src_label}
           ),
+          sm::make_counter(
+            "tree_updates_invalidated",
+            invalidated_efforts.num_updates,
+            sm::description("total number of invalidated update operations"),
+            {tree_label, src_label}
+          ),
         }
       );
     }
@@ -557,11 +572,21 @@ void Cache::register_metrics()
       stats.onode_tree_depth,
       stats.committed_onode_tree_efforts,
       stats.invalidated_onode_tree_efforts);
+  register_tree_metrics(
+      omap_label,
+      stats.omap_tree_depth,
+      stats.committed_omap_tree_efforts,
+      stats.invalidated_omap_tree_efforts);
   register_tree_metrics(
       lba_label,
       stats.lba_tree_depth,
       stats.committed_lba_tree_efforts,
       stats.invalidated_lba_tree_efforts);
+  register_tree_metrics(
+      backref_label,
+      stats.backref_tree_depth,
+      stats.committed_backref_tree_efforts,
+      stats.invalidated_backref_tree_efforts);
 
   /**
    * conflict combinations
@@ -794,15 +819,20 @@ void Cache::mark_transaction_conflicted(
     efforts.ool_record_bytes += ool_record_bytes;
 
     if (is_cleaner_transaction(t.get_src())) {
-      // CLEANER transaction won't contain any onode tree operations
+      // CLEANER transaction won't contain any onode/omap tree operations
       assert(t.onode_tree_stats.is_clear());
+      assert(t.omap_tree_stats.is_clear());
     } else {
       get_by_src(stats.invalidated_onode_tree_efforts, t.get_src()
           ).increment(t.onode_tree_stats);
+      get_by_src(stats.invalidated_omap_tree_efforts, t.get_src()
+          ).increment(t.omap_tree_stats);
     }
 
     get_by_src(stats.invalidated_lba_tree_efforts, t.get_src()
         ).increment(t.lba_tree_stats);
+    get_by_src(stats.invalidated_backref_tree_efforts, t.get_src()
+        ).increment(t.backref_tree_stats);
 
     SUBDEBUGT(seastore_t,
         "discard {} read, {} fresh, {} delta, {} retire, {}({}B) ool-records",
@@ -820,7 +850,9 @@ void Cache::mark_transaction_conflicted(
     assert(t.mutated_block_list.empty());
     assert(t.get_ool_write_stats().is_clear());
     assert(t.onode_tree_stats.is_clear());
+    assert(t.omap_tree_stats.is_clear());
     assert(t.lba_tree_stats.is_clear());
+    assert(t.backref_tree_stats.is_clear());
     SUBDEBUGT(seastore_t, "discard {} read", t, read_stat);
   }
 }
@@ -848,7 +880,9 @@ void Cache::on_transaction_destruct(Transaction& t)
     assert(t.get_fresh_block_stats().is_clear());
     assert(t.mutated_block_list.empty());
     assert(t.onode_tree_stats.is_clear());
+    assert(t.omap_tree_stats.is_clear());
     assert(t.lba_tree_stats.is_clear());
+    assert(t.backref_tree_stats.is_clear());
   }
 }
 
@@ -1159,7 +1193,9 @@ record_t Cache::prepare_record(
     SUBINFOT(seastore_t,
         "record to submit is empty, src={}", t, trans_src);
     assert(t.onode_tree_stats.is_clear());
+    assert(t.omap_tree_stats.is_clear());
     assert(t.lba_tree_stats.is_clear());
+    assert(t.backref_tree_stats.is_clear());
     assert(ool_stats.is_clear());
   }
 
@@ -1182,12 +1218,15 @@ record_t Cache::prepare_record(
   if (is_cleaner_transaction(trans_src)) {
     // CLEANER transaction won't contain any onode tree operations
     assert(t.onode_tree_stats.is_clear());
+    assert(t.omap_tree_stats.is_clear());
   } else {
     if (t.onode_tree_stats.depth) {
       stats.onode_tree_depth = t.onode_tree_stats.depth;
     }
     get_by_src(stats.committed_onode_tree_efforts, trans_src
         ).increment(t.onode_tree_stats);
+    get_by_src(stats.committed_omap_tree_efforts, trans_src
+        ).increment(t.omap_tree_stats);
   }
 
   if (t.lba_tree_stats.depth) {
@@ -1195,6 +1234,11 @@ record_t Cache::prepare_record(
   }
   get_by_src(stats.committed_lba_tree_efforts, trans_src
       ).increment(t.lba_tree_stats);
+  if (t.backref_tree_stats.depth) {
+    stats.backref_tree_depth = t.backref_tree_stats.depth;
+  }
+  get_by_src(stats.committed_backref_tree_efforts, trans_src
+      ).increment(t.backref_tree_stats);
 
   ++(efforts.num_trans);
   efforts.num_ool_records += ool_stats.num_records;
index 751d0d04f46e200fe178f902f0d18b312aa24bd8..1f92ba25570c861d59675decde52a26e30735e36 100644 (file)
@@ -1124,10 +1124,12 @@ private:
   struct tree_efforts_t {
     uint64_t num_inserts = 0;
     uint64_t num_erases = 0;
+    uint64_t num_updates = 0;
 
     void increment(const Transaction::tree_stats_t& incremental) {
       num_inserts += incremental.num_inserts;
       num_erases += incremental.num_erases;
+      num_updates += incremental.num_updates;
     }
   };
 
@@ -1149,10 +1151,18 @@ private:
     counter_by_src_t<tree_efforts_t> committed_onode_tree_efforts;
     counter_by_src_t<tree_efforts_t> invalidated_onode_tree_efforts;
 
+    uint64_t omap_tree_depth = 0;
+    counter_by_src_t<tree_efforts_t> committed_omap_tree_efforts;
+    counter_by_src_t<tree_efforts_t> invalidated_omap_tree_efforts;
+
     uint64_t lba_tree_depth = 0;
     counter_by_src_t<tree_efforts_t> committed_lba_tree_efforts;
     counter_by_src_t<tree_efforts_t> invalidated_lba_tree_efforts;
 
+    uint64_t backref_tree_depth = 0;
+    counter_by_src_t<tree_efforts_t> committed_backref_tree_efforts;
+    counter_by_src_t<tree_efforts_t> invalidated_backref_tree_efforts;
+
     std::array<uint64_t, NUM_SRC_COMB> trans_conflicts_by_srcs;
     counter_by_src_t<uint64_t> trans_conflicts_by_unknown;
   } stats;
index dafe3222d1b2d96911a3a4e7c58aba94f3a71bf7..e94c8287aafa4e066b31d2130bad5b3c43af07b9 100644 (file)
@@ -445,8 +445,10 @@ OMapLeafNode::insert(
     }
     auto replace_pt = find_string_key(key);
     if (replace_pt != iter_end()) {
+      ++(oc.t.get_omap_tree_stats().num_updates);
       journal_leaf_update(replace_pt, key, value, maybe_get_delta_buffer());
     } else {
+      ++(oc.t.get_omap_tree_stats().num_inserts);
       auto insert_pt = string_lower_bound(key);
       journal_leaf_insert(insert_pt, key, value, maybe_get_delta_buffer());
 
@@ -462,6 +464,7 @@ OMapLeafNode::insert(
       auto [left, right, pivot] = tuple;
       auto replace_pt = find_string_key(key);
       if (replace_pt != iter_end()) {
+        ++(oc.t.get_omap_tree_stats().num_updates);
         if (key < pivot) {  //left
           auto mut_iter = left->iter_idx(replace_pt->get_index());
           left->journal_leaf_update(mut_iter, key, value, left->maybe_get_delta_buffer());
@@ -470,6 +473,7 @@ OMapLeafNode::insert(
           right->journal_leaf_update(mut_iter, key, value, right->maybe_get_delta_buffer());
         }
       } else {
+        ++(oc.t.get_omap_tree_stats().num_inserts);
         auto insert_pt = string_lower_bound(key);
         if (key < pivot) {  //left
           auto mut_iter = left->iter_idx(insert_pt->get_index());
@@ -500,6 +504,7 @@ OMapLeafNode::rm_key(omap_context_t oc, const std::string &key)
 
   auto rm_pt = find_string_key(key);
   if (rm_pt != iter_end()) {
+    ++(oc.t.get_omap_tree_stats().num_erases);
     journal_leaf_remove(rm_pt, maybe_get_delta_buffer());
     if (extent_is_below_min()) {
       return rm_key_ret(
index f068bf2461ec562f4ada6a44c20c33dfcebfc4d5..6ff6cad786a7f0b663ce95ff9bd1ea818a3e62de 100644 (file)
@@ -22,13 +22,17 @@ namespace crimson::os::seastore::onode {
  * tree_cursor_t
  */
 
+// create from insert
 tree_cursor_t::tree_cursor_t(Ref<LeafNode> node, const search_position_t& pos)
       : ref_leaf_node{node}, position{pos}, cache{ref_leaf_node}
 {
   assert(is_tracked());
   ref_leaf_node->do_track_cursor<true>(*this);
+  // do not account updates for the inserted values
+  is_mutated = true;
 }
 
+// create from lookup
 tree_cursor_t::tree_cursor_t(
     Ref<LeafNode> node, const search_position_t& pos,
     const key_view_t& key_view, const value_header_t* p_value_header)
@@ -39,6 +43,7 @@ tree_cursor_t::tree_cursor_t(
   ref_leaf_node->do_track_cursor<true>(*this);
 }
 
+// lookup reaches the end, contain leaf node for further insert
 tree_cursor_t::tree_cursor_t(Ref<LeafNode> node)
       : ref_leaf_node{node}, position{search_position_t::end()}, cache{ref_leaf_node}
 {
@@ -46,6 +51,7 @@ tree_cursor_t::tree_cursor_t(Ref<LeafNode> node)
   assert(ref_leaf_node->is_level_tail());
 }
 
+// create an invalid tree_cursor_t
 tree_cursor_t::~tree_cursor_t()
 {
   if (is_tracked()) {
@@ -2149,7 +2155,8 @@ Ref<tree_cursor_t> LeafNode::get_or_track_cursor(
   Ref<tree_cursor_t> p_cursor;
   auto found = tracked_cursors.find(position);
   if (found == tracked_cursors.end()) {
-    p_cursor = tree_cursor_t::create(this, position, key, p_value_header);
+    p_cursor = tree_cursor_t::create_tracked(
+        this, position, key, p_value_header);
   } else {
     p_cursor = found->second;
     assert(p_cursor->get_leaf_node() == this);
@@ -2199,7 +2206,8 @@ Ref<tree_cursor_t> LeafNode::track_insert(
   // track insert
   // TODO: getting key_view_t from stage::proceed_insert() and
   // stage::append_insert() has not supported yet
-  return tree_cursor_t::create(this, insert_pos);
+  return tree_cursor_t::create_inserted(
+      this, insert_pos);
 }
 
 void LeafNode::track_split(
index 4abcf341ce811c9c6836508e1ea5ce4f67028299..51e8c54085f8b03650d7038fa8de10e102e8cd89 100644 (file)
@@ -137,6 +137,10 @@ class tree_cursor_t final
   std::pair<NodeExtentMutable&, ValueDeltaRecorder*>
   prepare_mutate_value_payload(context_t c) {
     assert(is_tracked());
+    if (!is_mutated) {
+      is_mutated = true;
+      ++(c.t.get_onode_tree_stats().num_updates);
+    }
     return cache.prepare_mutate_value_payload(c, position);
   }
 
@@ -152,7 +156,9 @@ class tree_cursor_t final
   }
 
  private:
+  // create from insert
   tree_cursor_t(Ref<LeafNode>, const search_position_t&);
+  // create from lookup
   tree_cursor_t(Ref<LeafNode>, const search_position_t&,
                 const key_view_t&, const value_header_t*);
   // lookup reaches the end, contain leaf node for further insert
@@ -168,12 +174,14 @@ class tree_cursor_t final
                               const value_header_t*) const;
   void invalidate();
 
-  static Ref<tree_cursor_t> create(Ref<LeafNode> node, const search_position_t& pos) {
+  static Ref<tree_cursor_t> create_inserted(
+      Ref<LeafNode> node, const search_position_t& pos) {
     return new tree_cursor_t(node, pos);
   }
 
-  static Ref<tree_cursor_t> create(Ref<LeafNode> node, const search_position_t& pos,
-                                   const key_view_t& key, const value_header_t* p_header) {
+  static Ref<tree_cursor_t> create_tracked(
+      Ref<LeafNode> node, const search_position_t& pos,
+      const key_view_t& key, const value_header_t* p_header) {
     return new tree_cursor_t(node, pos, key, p_header);
   }
 
@@ -190,6 +198,9 @@ class tree_cursor_t final
   Ref<LeafNode> ref_leaf_node;
   search_position_t position;
 
+  // account 1 update even if there are multiple updates to the same value
+  bool is_mutated = false;
+
   /** Cache
    *
    * Cached memory pointers or views which may be outdated due to
index c5b7f9e291dbf5a5bc299a188adf0fca0680655e..5c4e261e3969e6d7e2c7bae79def9c1d7a43276e 100644 (file)
@@ -319,6 +319,7 @@ public:
     retired_set.clear();
     no_release_delta_retired_set.clear();
     onode_tree_stats = {};
+    omap_tree_stats = {};
     lba_tree_stats = {};
     backref_tree_stats = {};
     ool_write_stats = {};
@@ -337,16 +338,21 @@ public:
     uint64_t depth = 0;
     uint64_t num_inserts = 0;
     uint64_t num_erases = 0;
+    uint64_t num_updates = 0;
 
     bool is_clear() const {
       return (depth == 0 &&
               num_inserts == 0 &&
-              num_erases == 0);
+              num_erases == 0 &&
+              num_updates == 0);
     }
   };
   tree_stats_t& get_onode_tree_stats() {
     return onode_tree_stats;
   }
+  tree_stats_t& get_omap_tree_stats() {
+    return omap_tree_stats;
+  }
   tree_stats_t& get_lba_tree_stats() {
     return lba_tree_stats;
   }
@@ -435,6 +441,7 @@ private:
 
   /// stats to collect when commit or invalidate
   tree_stats_t onode_tree_stats;
+  tree_stats_t omap_tree_stats; // exclude omap tree depth
   tree_stats_t lba_tree_stats;
   tree_stats_t backref_tree_stats;
   ool_write_stats_t ool_write_stats;