]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: auto-clean reshard queue entries for non-existent buckets 32055/head
authorJ. Eric Ivancich <ivancich@redhat.com>
Fri, 1 Nov 2019 18:17:53 +0000 (14:17 -0400)
committerNathan Cutler <ncutler@suse.com>
Fri, 6 Dec 2019 11:56:02 +0000 (12:56 +0100)
It is possible for a bucket to be added to the reshard queue and then
to be removed before its entry in the reshard queue is processed. When
this is now encountered, processing of the reshard queue errors out.

This fix recognizes when the reshard queue entry refers to a
non-existent bucket and remove the entry from the reshard queue,
allowing processing of the queue to continue.

Signed-off-by: J. Eric Ivancich <ivancich@redhat.com>
(cherry picked from commit 304ad8803292e92911e86b84c39a6c2ff0d0121b)

Conflicts:
src/rgw/rgw_reshard.cc
- omit white-space change in get_bucket_info method call

src/rgw/rgw_reshard.cc

index 12ba93bd9d86e285de07aed5fb6b91239e0bf2b6..356179462aad0105f939e784b6ca1d07c4251e04 100644 (file)
@@ -1011,9 +1011,31 @@ int RGWReshard::process_single_logshard(int logshard_num)
        ret = store->get_bucket_info(obj_ctx, entry.tenant, entry.bucket_name,
                                     bucket_info, nullptr, &attrs);
        if (ret < 0) {
-         ldout(cct, 0) <<  __func__ << ": Error in get_bucket_info: " <<
-           cpp_strerror(-ret) << dendl;
-         return -ret;
+         ldout(cct, 0) <<  __func__ <<
+           ": Error in get_bucket_info for bucket " << entry.bucket_name <<
+           ": " << cpp_strerror(-ret) << dendl;
+         if (ret != -ENOENT) {
+           // any error other than ENOENT will abort
+           return ret;
+         }
+
+         // we've encountered a reshard queue entry for an apparently
+         // non-existent bucket; let's try to recover by cleaning up
+         ldout(cct, 0) <<  __func__ <<
+           ": removing reshard queue entry for non-existent bucket " <<
+           entry.bucket_name << dendl;
+
+         ret = remove(entry);
+         if (ret < 0) {
+           ldout(cct, 0) << __func__ <<
+             ": Error removing non-existent bucket " <<
+             entry.bucket_name << " from resharding queue: " <<
+             cpp_strerror(-ret) << dendl;
+           return ret;
+         }
+
+         // we cleaned up, move on to the next entry
+         goto finished_entry;
        }
 
        RGWBucketReshard br(store, bucket_info, attrs, nullptr);
@@ -1023,23 +1045,26 @@ int RGWReshard::process_single_logshard(int logshard_num)
        ret = br.execute(entry.new_num_shards, max_entries, true, nullptr,
                         formatter, this);
        if (ret < 0) {
-         ldout (store->ctx(), 0) <<  __func__ <<
-           "ERROR in reshard_bucket " << entry.bucket_name << ":" <<
+         ldout(store->ctx(), 0) <<  __func__ <<
+           ": Error during resharding bucket " << entry.bucket_name << ":" <<
            cpp_strerror(-ret)<< dendl;
          return ret;
        }
 
-       ldout (store->ctx(), 20) <<  " removing entry" << entry.bucket_name <<
+       ldout(store->ctx(), 20) << __func__ <<
+         " removing reshard queue entry for bucket " << entry.bucket_name <<
          dendl;
 
        ret = remove(entry);
        if (ret < 0) {
-         ldout(cct, 0)<< __func__ << ":Error removing bucket " <<
-           entry.bucket_name << " for resharding queue: " <<
+         ldout(cct, 0) << __func__ << ": Error removing bucket " <<
+           entry.bucket_name << " from resharding queue: " <<
            cpp_strerror(-ret) << dendl;
          return ret;
        }
-      }
+      } // if new instance id is empty
+
+    finished_entry:
 
       Clock::time_point now = Clock::now();
       if (logshard_lock.should_renew(now)) {
@@ -1050,7 +1075,7 @@ int RGWReshard::process_single_logshard(int logshard_num)
       }
 
       entry.get_key(&marker);
-    }
+    } // entry for loop
   } while (truncated);
 
   logshard_lock.unlock();