]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore/BlueFS: fix async compaction txn
authorSage Weil <sage@redhat.com>
Fri, 2 Sep 2016 16:06:49 +0000 (12:06 -0400)
committerSage Weil <sage@redhat.com>
Fri, 2 Sep 2016 18:00:10 +0000 (14:00 -0400)
Our transaction writes are labeled with a seq and uuid
to avoid replaying over garbage.

Two bugs, one real, one potential.

1) The second async compaction transactoin didn't have
its seq and uuid set, so replay always stopped.

2) We were writing two separate transactions, one with
all the new metadata, and the next one with a jump to
the new log offset.  If the first write completed but
it was torn and the second transaction didn't hit disk,
we might see an old transaction with seq == 2 and the
same uuid and replay that instead.

Fix both of these by making the async log txn one single
transaction that jumps directly to the new log offset.

Signed-off-by: Sage Weil <sage@redhat.com>
src/os/bluestore/BlueFS.cc

index 9c7e21eb72b4ec72f8127a29af4b5cb9493a74fd..202c836be0be5c736378611cdd962ce32e8bd9fc 100644 (file)
@@ -1141,16 +1141,15 @@ void BlueFS::_compact_log_async(std::unique_lock<std::mutex>& l)
   bluefs_transaction_t t;
   _compact_log_dump_metadata(&t);
 
+  // conservative estimate for final encoded size
+  new_log_jump_to = ROUND_UP_TO(t.op_bl.length() + super.block_size * 2,
+                                g_conf->bluefs_alloc_size);
+  t.op_jump(log_seq, new_log_jump_to);
+
   bufferlist bl;
   ::encode(t, bl);
   _pad_bl(bl);
 
-  new_log_jump_to = ROUND_UP_TO(bl.length() + super.block_size,
-                                g_conf->bluefs_alloc_size);
-  bluefs_transaction_t t2;
-  t2.op_jump(log_seq, new_log_jump_to);
-  ::encode(t2, bl);
-  _pad_bl(bl);
   dout(10) << __func__ << " new_log_jump_to 0x" << std::hex << new_log_jump_to
           << std::dec << dendl;