]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
cls/rgw: rgw_dir_suggest_changes detects race with completion
authorCasey Bodley <cbodley@redhat.com>
Thu, 10 Mar 2022 20:32:48 +0000 (15:32 -0500)
committerJ. Eric Ivancich <ivancich@redhat.com>
Wed, 13 Apr 2022 17:45:59 +0000 (13:45 -0400)
if bucket listing races with a pending index transaction, its suggested
removal may be mistakenly applied if that index transaction completes
before the osd receives this suggestion

in `rgw_dir_suggest_changes()`, the sole condition for applying a
suggested change is that the `cur_disk.pending_map` is empty. this is
true after rgw_bucket_complete_op()

on index completion, `rgw_bucket_dir_entry::index_ver` is updated to match
the new value of `rgw_bucket_dir_header::ver`. because most of `struct
rgw_bucket_dir_entry` makes the round trip through bucket listing ->
dir_suggest, we have access to the index_ver of the suggested entry. by
comparing this against the stored entry, we can ignore any suggestions
that were sent before the most recent completion

Fixes: https://tracker.ceph.com/issues/54528
Signed-off-by: Casey Bodley <cbodley@redhat.com>
(cherry picked from commit aa381b6765b0fb316976c4af7a45f32a157a4f75)

src/cls/rgw/cls_rgw.cc

index 432b3aef71a41ff889f33f2eb000a4903e42846f..d0046e87dc1f868618e1823afb69db07dd9a5d1f 100644 (file)
@@ -2205,6 +2205,8 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx,
         return -EINVAL;
       }
 
+      // remove any pending entries whose tag timeout has expired. until expiry,
+      // these pending entries will prevent us from applying suggested changes
       real_time cur_time = real_clock::now();
       auto iter = cur_disk.pending_map.begin();
       while(iter != cur_disk.pending_map.end()) {
@@ -2215,9 +2217,18 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx,
       }
     }
 
-    CLS_LOG(20, "cur_disk.pending_map.empty()=%d op=%d cur_disk.exists=%d cur_change.pending_map.size()=%d cur_change.exists=%d",
+    CLS_LOG(20, "cur_disk.pending_map.empty()=%d op=%d cur_disk.exists=%d "
+            "cur_disk.index_ver=%d cur_change.exists=%d cur_change.index_ver=%d",
            cur_disk.pending_map.empty(), (int)op, cur_disk.exists,
-           (int)cur_change.pending_map.size(), cur_change.exists);
+            (int)cur_disk.index_ver, cur_change.exists,
+            (int)cur_change.index_ver);
+
+    if (cur_change.index_ver < cur_disk.index_ver) {
+      // a pending on-disk entry was completed since this suggestion was made,
+      // don't apply it yet. if the index really is inconsistent, the next
+      // listing will get the latest version and resend the suggestion
+      continue;
+    }
 
     if (cur_disk.pending_map.empty()) {
       if (cur_disk.exists) {