From d2175ee830414d541b4fdb0ed77fe6fb7f01ec22 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 8 Oct 2010 10:45:51 -0700 Subject: [PATCH] filestore: don't start commit if nothing new is _applied_ We were starting a commit if we had started a new op, but that left a window in which the op could be being journaled, and nothing new has been applied to disk. With this fix we only commit if committing/committed will increase. Now the check matches the committing_seq = applied_seq; a few lines down, and all is well. The actual crash this fixes was: 2010-10-07 16:20:36.245301 7f07e66d3710 filestore(/mnt/osd3) taking snap 'snap_23230' 2010-10-07 16:20:36.245428 7f07e66d3710 filestore(/mnt/osd3) snap create 'snap_23230' got -1 File exists os/FileStore.cc: In function 'void FileStore::sync_entry()': os/FileStore.cc:1738: FAILED assert(r == 0) ceph version 0.22~rc (1d77c14bc310aed31d6cfeb2c87e87187d3527ea) 1: (FileStore::sync_entry()+0x6ee) [0x793148] 2: (FileStore::SyncThread::entry()+0x19) [0x761d43] 3: (Thread::_entry_func(void*)+0x20) [0x667822] 4: (()+0x68ba) [0x7f07eac248ba] 5: (clone()+0x6d) [0x7f07e9bd802d] Signed-off-by: Sage Weil --- src/os/JournalingObjectStore.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/os/JournalingObjectStore.cc b/src/os/JournalingObjectStore.cc index c82dc3f91c545..e152c08496984 100644 --- a/src/os/JournalingObjectStore.cc +++ b/src/os/JournalingObjectStore.cc @@ -148,14 +148,16 @@ bool JournalingObjectStore::commit_start() // suspend new ops... Mutex::Locker l(lock); - dout(10) << "commit_start" << dendl; + dout(10) << "commit_start op_seq " << op_seq + << ", applied_seq " << applied_seq + << ", committed_seq " << committed_seq << dendl; blocked = true; while (open_ops > 0) { dout(10) << "commit_start blocked, waiting for " << open_ops << " open ops" << dendl; cond.Wait(lock); } - if (op_seq == committed_seq) { + if (applied_seq == committed_seq) { dout(10) << "commit_start nothing to do" << dendl; blocked = false; cond.Signal(); -- 2.39.5