]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
librbd/pwl: fix memory leaks in discard operations
authorKefu Chai <k.chai@proxmox.com>
Tue, 17 Feb 2026 11:41:32 +0000 (19:41 +0800)
committerKefu Chai <k.chai@proxmox.com>
Sat, 21 Feb 2026 07:23:59 +0000 (15:23 +0800)
Fix memory leak in librbd persistent write log (PWL) cache discard
operations by properly completing request objects.

ASan reported the following leaks in unittest_librbd:

  Direct leak of 240 byte(s) in 1 object(s) allocated from:
    #0 operator new(unsigned long)
    #1 librbd::cache::pwl::AbstractWriteLog<librbd::MockImageCtx>::discard(...)
       /ceph/src/librbd/cache/pwl/AbstractWriteLog.cc:935:5
    #2 TestMockCacheReplicatedWriteLog_discard_Test::TestBody()
       /ceph/src/test/librbd/cache/pwl/test_mock_ReplicatedWriteLog.cc:534:7

  Plus multiple indirect leaks totaling 2,076 bytes through the
  shared_ptr reference chain.

Root cause:

C_DiscardRequest objects were never deleted because their complete()
method was never called. The on_write_persist callback released the
BlockGuard cell but didn't call complete() to trigger self-deletion.

Write requests use WriteLogOperationSet which takes the request as
its on_finish callback, ensuring complete() is eventually called.
Discard requests don't use WriteLogOperationSet and must explicitly
call complete() in their on_write_persist callback.

Solution:

Call discard_req->complete(r) in the on_write_persist callback and
move cell release into finish_req() -- mirroring how C_WriteRequest
handles it. The complete() -> finish() -> finish_req() chain ensures
the cell is released after the user request is completed, preserving
the same ordering as write requests.

Test results:
- Before: 2,316 bytes leaked in 15 allocations
- After: 0 bytes leaked
- unittest_librbd discard tests pass successfully with ASan

Fixes: https://tracker.ceph.com/issues/74972
Signed-off-by: Kefu Chai <k.chai@proxmox.com>
src/librbd/cache/pwl/Request.cc
src/librbd/cache/pwl/Request.h

index e424c2285a1b143358bcaad2f729b0669b2b591c..11322568eb9dccb395c051d64cf24baffcdd520e 100644 (file)
@@ -408,6 +408,14 @@ C_DiscardRequest<T>::~C_DiscardRequest() {
   ldout(pwl.get_context(), 20) << this << dendl;
 }
 
+template <typename T>
+void C_DiscardRequest<T>::finish_req(int r) {
+  ldout(pwl.get_context(), 20) << "discard_req=" << this
+                               << " cell=" << this->get_cell() << dendl;
+  ceph_assert(this->get_cell());
+  this->release_cell();
+}
+
 template <typename T>
 bool C_DiscardRequest<T>::alloc_resources() {
   ldout(pwl.get_context(), 20) << "req type=" << get_name()
@@ -436,12 +444,8 @@ void C_DiscardRequest<T>::setup_log_operations() {
   Context *on_write_append = pwl.get_current_sync_point()->prior_persisted_gather_new_sub();
 
   Context *on_write_persist = new LambdaContext(
-    [this, discard_req](int r) {
-      ldout(pwl.get_context(), 20) << "discard_req=" << discard_req
-                                   << " cell=" << discard_req->get_cell() << dendl;
-      ceph_assert(discard_req->get_cell());
-      discard_req->complete_user_request(r);
-      discard_req->release_cell();
+    [discard_req](int r) {
+      discard_req->complete(r);
     });
   op->init_op(current_sync_gen, persist_on_flush, pwl.get_last_op_sequence_num(),
               on_write_persist, on_write_append);
index 5052abdccc01f979effdccefb9888e064c68675c..8fe5360c2307fa5429b5a8d5d1ae85aba8112bd7 100644 (file)
@@ -246,7 +246,7 @@ public:
                    PerfCounters *perfcounter, Context *user_req);
 
   ~C_DiscardRequest() override;
-  void finish_req(int r) override {}
+  void finish_req(int r) override;
 
   bool alloc_resources() override;