From: Casey Bodley Date: Sun, 10 Jun 2018 14:10:05 +0000 (-0400) Subject: rgw: bucket sync only allows one olh op at a time X-Git-Tag: v14.0.1~844^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d120ca1734d247a1b8c78e1a8bfd7441f4fef8b4;p=ceph.git rgw: bucket sync only allows one olh op at a time if olh operations on different instances of an object race to sync, their olh epochs could be applied in the wrong order and result in one operation being rejected by the olh log. serialize olh operations on the same object name to prevent this from happening Signed-off-by: Casey Bodley --- diff --git a/src/rgw/rgw_data_sync.cc b/src/rgw/rgw_data_sync.cc index 1a35051fc42d..c7f2a9ce0989 100644 --- a/src/rgw/rgw_data_sync.cc +++ b/src/rgw/rgw_data_sync.cc @@ -2454,18 +2454,28 @@ class RGWBucketIncSyncShardMarkerTrack : public RGWSyncShardMarkerTrack key_to_marker; - map marker_to_key; + + struct operation { + rgw_obj_key key; + bool is_olh; + }; + map marker_to_op; + std::set pending_olh; // object names with pending olh operations RGWSyncTraceNodeRef tn; void handle_finish(const string& marker) override { - map::iterator iter = marker_to_key.find(marker); - if (iter == marker_to_key.end()) { + auto iter = marker_to_op.find(marker); + if (iter == marker_to_op.end()) { return; } - key_to_marker.erase(iter->second); - reset_need_retry(iter->second); - marker_to_key.erase(iter); + auto& op = iter->second; + key_to_marker.erase(op.key); + reset_need_retry(op.key); + if (op.is_olh) { + pending_olh.erase(op.key.name); + } + marker_to_op.erase(iter); } public: @@ -2504,17 +2514,26 @@ public: * Also, we should make sure that we don't run concurrent operations on the same key with * different ops. */ - bool index_key_to_marker(const rgw_obj_key& key, const string& marker) { - if (key_to_marker.find(key) != key_to_marker.end()) { + bool index_key_to_marker(const rgw_obj_key& key, const string& marker, bool is_olh) { + auto result = key_to_marker.emplace(key, marker); + if (!result.second) { // exists set_need_retry(key); return false; } - key_to_marker[key] = marker; - marker_to_key[marker] = key; + marker_to_op[marker] = operation{key, is_olh}; + if (is_olh) { + // prevent other olh ops from starting on this object name + pending_olh.insert(key.name); + } return true; } - bool can_do_op(const rgw_obj_key& key) { + bool can_do_op(const rgw_obj_key& key, bool is_olh) { + // serialize olh ops on the same object name + if (is_olh && pending_olh.count(key.name)) { + tn->log(20, SSTR("sync of " << key << " waiting for pending olh op")); + return false; + } return (key_to_marker.find(key) == key_to_marker.end()); } @@ -3016,7 +3035,7 @@ int RGWBucketShardIncrementalSyncCR::operate() tn->log(20, SSTR("syncing object: " << bucket_shard_str{bs} << "/" << key)); updated_status = false; - while (!marker_tracker.can_do_op(key)) { + while (!marker_tracker.can_do_op(key, has_olh_epoch(entry->op))) { if (!updated_status) { set_status() << "can't do op, conflicting inflight operation"; updated_status = true; @@ -3033,7 +3052,7 @@ int RGWBucketShardIncrementalSyncCR::operate() } } } - if (!marker_tracker.index_key_to_marker(key, cur_id)) { + if (!marker_tracker.index_key_to_marker(key, cur_id, has_olh_epoch(entry->op))) { set_status() << "can't do op, sync already in progress for object"; tn->log(20, SSTR("skipping sync of entry: " << cur_id << ":" << key << " sync already in progress for object")); marker_tracker.try_update_high_marker(cur_id, 0, entry->timestamp);