]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd/cache/pwl/ssd: don't count log entries
authorIlya Dryomov <idryomov@gmail.com>
Sat, 8 May 2021 08:24:37 +0000 (10:24 +0200)
committerDeepika Upadhyay <dupadhya@redhat.com>
Fri, 5 Nov 2021 09:22:01 +0000 (14:52 +0530)
In ssd mode log entries are variable size.  Attempting to count and
impose watermarks on the number of log entries is bogus because the
total number of entries it would take to fill the cache to capacity
is also variable and can't be precisely estimated.

had conflicts, but no new changes
Fixes: https://tracker.ceph.com/issues/50669
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
(cherry picked from commit ea65553b4a9ee1349c6da8452d861afe579e99e9)

src/librbd/cache/pwl/ssd/Request.cc
src/librbd/cache/pwl/ssd/WriteLog.cc

index 69951e7601fbd2b1941844040c6c86003b2c996f..994674ff43f902e5fa7b271401d73f3242400b3e 100644 (file)
@@ -22,7 +22,6 @@ void C_WriteRequest<T>::setup_buffer_resources(
   auto image_extents_size = this->image_extents.size();
   *bytes_cached = 0;
   *bytes_allocated = 0;
-  *number_lanes = image_extents_size;
   *number_log_entries = image_extents_size;
 
   for (auto &extent : this->image_extents) {
index 91cce80716da654b57f3db7a788f5169d764e95a..b1c2d534e1dce986a4c83cb71233e97a1479dcb3 100644 (file)
@@ -132,13 +132,7 @@ void WriteLog<I>::initialize_pool(Context *on_finish,
     m_cache_state->clean = true;
     m_cache_state->empty = true;
     /* new pool, calculate and store metadata */
-    size_t small_write_size = MIN_WRITE_ALLOC_SSD_SIZE + sizeof(struct WriteLogCacheEntry);
 
-    uint64_t num_small_writes = (uint64_t)(this->m_log_pool_size / small_write_size);
-    if (num_small_writes > MAX_LOG_ENTRIES) {
-      num_small_writes = MAX_LOG_ENTRIES;
-    }
-    assert(num_small_writes > 2);
     /* Size of ring buffer */
     this->m_bytes_allocated_cap =
         this->m_log_pool_size - DATA_RING_BUFFER_OFFSET;
@@ -152,46 +146,29 @@ void WriteLog<I>::initialize_pool(Context *on_finish,
     new_root->block_size = MIN_WRITE_ALLOC_SSD_SIZE;
     new_root->first_free_entry = m_first_free_entry;
     new_root->first_valid_entry = m_first_valid_entry;
-    new_root->num_log_entries = num_small_writes;
+    new_root->num_log_entries = 0;
     pool_root = *new_root;
 
     r = update_pool_root_sync(new_root);
     if (r != 0) {
-      this->m_total_log_entries = 0;
-      this->m_free_log_entries = 0;
       lderr(m_image_ctx.cct) << "failed to initialize pool ("
                              << this->m_log_pool_name << ")" << dendl;
       on_finish->complete(r);
     }
-    this->m_total_log_entries = new_root->num_log_entries;
-    this->m_free_log_entries = new_root->num_log_entries - 1;
-   } else {
-     m_cache_state->present = true;
-     bdev = BlockDevice::create(
-         cct, this->m_log_pool_name, aio_cache_cb,
-         static_cast<void*>(this), nullptr, static_cast<void*>(this));
-     int r = bdev->open(this->m_log_pool_name);
-     if (r < 0) {
-       delete bdev;
-       on_finish->complete(r);
-       return;
-     }
-     load_existing_entries(later);
-     if (m_first_free_entry < m_first_valid_entry) {
-      /* Valid entries wrap around the end of the ring, so first_free is lower
-       * than first_valid.  If first_valid was == first_free+1, the entry at
-       * first_free would be empty. The last entry is never used, so in
-       * that case there would be zero free log entries. */
-       this->m_free_log_entries = this->m_total_log_entries -
-         (m_first_valid_entry - m_first_free_entry) - 1;
-     } else {
-      /* first_valid is <= first_free. If they are == we have zero valid log
-       * entries, and n-1 free log entries */
-       this->m_free_log_entries = this->m_total_log_entries -
-         (m_first_free_entry - m_first_valid_entry) - 1;
-     }
-     m_cache_state->clean = this->m_dirty_log_entries.empty();
-     m_cache_state->empty = m_log_entries.empty();
+  } else {
+    m_cache_state->present = true;
+    bdev = BlockDevice::create(
+        cct, this->m_log_pool_name, aio_cache_cb,
+        static_cast<void*>(this), nullptr, static_cast<void*>(this));
+    int r = bdev->open(this->m_log_pool_name);
+    if (r < 0) {
+      delete bdev;
+      on_finish->complete(r);
+      return false;
+    }
+    load_existing_entries(later);
+    m_cache_state->clean = this->m_dirty_log_entries.empty();
+    m_cache_state->empty = m_log_entries.empty();
   }
 }
 
@@ -240,7 +217,6 @@ void WriteLog<I>::load_existing_entries(pwl::DeferredContexts &later) {
   pool_root = current_pool_root;
   m_first_free_entry = first_free_entry;
   m_first_valid_entry = next_log_pos;
-  this->m_total_log_entries = current_pool_root.num_log_entries;
   this->m_flushed_sync_gen = current_pool_root.flushed_sync_gen;
   this->m_log_pool_size = current_pool_root.pool_size;
 
@@ -298,7 +274,12 @@ bool WriteLog<I>::alloc_resources(C_BlockIORequestT *req) {
                               &num_lanes, &num_log_entries,
                               &num_unpublished_reserves);
 
-  bytes_allocated += num_log_entries * MIN_WRITE_ALLOC_SSD_SIZE;
+  ceph_assert(!num_lanes);
+  if (num_log_entries) {
+    bytes_allocated += num_log_entries * MIN_WRITE_ALLOC_SSD_SIZE;
+    num_log_entries = 0;
+  }
+  ceph_assert(!num_unpublished_reserves);
 
   alloc_succeeds = this->check_allocation(req, bytes_cached, bytes_dirtied,
                                           bytes_allocated, num_lanes,
@@ -535,9 +516,7 @@ void WriteLog<I>::process_work() {
   bool wake_up_requested = false;
   uint64_t aggressive_high_water_bytes =
       this->m_bytes_allocated_cap * AGGRESSIVE_RETIRE_HIGH_WATER;
-  uint64_t aggressive_high_water_entries = this->m_total_log_entries * AGGRESSIVE_RETIRE_HIGH_WATER;
   uint64_t high_water_bytes = this->m_bytes_allocated_cap * RETIRE_HIGH_WATER;
-  uint64_t high_water_entries = this->m_total_log_entries * RETIRE_HIGH_WATER;
 
   ldout(cct, 20) << dendl;
 
@@ -547,17 +526,13 @@ void WriteLog<I>::process_work() {
       this->m_wake_up_requested = false;
     }
     if (this->m_alloc_failed_since_retire || (this->m_shutting_down) ||
-        this->m_invalidating || m_bytes_allocated > high_water_bytes ||
-        (m_log_entries.size() > high_water_entries)) {
+        this->m_invalidating || m_bytes_allocated > high_water_bytes) {
       ldout(m_image_ctx.cct, 10) << "alloc_fail=" << this->m_alloc_failed_since_retire
                                  << ", allocated > high_water="
                                  << (m_bytes_allocated > high_water_bytes)
-                                 << ", allocated_entries > high_water="
-                                 << (m_log_entries.size() > high_water_entries)
                                  << dendl;
       retire_entries((this->m_shutting_down || this->m_invalidating ||
-                    (m_bytes_allocated > aggressive_high_water_bytes) ||
-                    (m_log_entries.size() > aggressive_high_water_entries))
+                      m_bytes_allocated > aggressive_high_water_bytes)
                     ? MAX_ALLOC_PER_TRANSACTION : MAX_FREE_PER_TRANSACTION);
     }
     this->dispatch_deferred_writes();
@@ -706,7 +681,6 @@ bool WriteLog<I>::retire_entries(const unsigned long int frees_per_tx) {
           std::lock_guard locker(m_lock);
           m_first_valid_entry = first_valid_entry;
           ceph_assert(m_first_valid_entry % MIN_WRITE_ALLOC_SSD_SIZE == 0);
-          this->m_free_log_entries += retiring_entries.size();
           ceph_assert(this->m_bytes_allocated >= allocated_bytes);
           this->m_bytes_allocated -= allocated_bytes;
           ceph_assert(this->m_bytes_cached >= cached_bytes);