]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw/multisite/datalog: add_entry() failures are fatal, actually
authorAdam Emerson <aemerson@redhat.com>
Sat, 30 Mar 2024 01:34:36 +0000 (21:34 -0400)
committerAdam C. Emerson <aemerson@redhat.com>
Tue, 1 Apr 2025 15:10:14 +0000 (11:10 -0400)
Except in one particular case.

This is necessary to close the data replication write hole, so that if
we fail to write to the semaphore set, the client will receive an
error.

Signed-off-by: Adam Emerson <aemerson@redhat.com>
src/rgw/driver/rados/rgw_rados.cc
src/rgw/services/svc_bi_rados.cc

index 76fec9236dcaa0ef25b18549f086b9080c36d5b0..1e695360336617095dfc18ed665d8e5a4a5fe146 100644 (file)
@@ -771,19 +771,22 @@ int RGWRados::get_max_chunk_size(const rgw_placement_rule& placement_rule, const
   return get_max_chunk_size(pool, max_chunk_size, dpp, palignment);
 }
 
-void add_datalog_entry(const DoutPrefixProvider* dpp,
-                       RGWDataChangesLog* datalog,
-                       const RGWBucketInfo& bucket_info,
-                       uint32_t shard_id, optional_yield y)
+[[nodiscard]] int add_datalog_entry(const DoutPrefixProvider* dpp,
+                                   RGWDataChangesLog* datalog,
+                                   const RGWBucketInfo& bucket_info,
+                                   uint32_t shard_id, optional_yield y)
 {
   const auto& logs = bucket_info.layout.logs;
   if (logs.empty()) {
-    return;
+    return 0;
   }
   int r = datalog->add_entry(dpp, bucket_info, logs.back(), shard_id, y);
   if (r < 0) {
     ldpp_dout(dpp, -1) << "ERROR: failed writing data log" << dendl;
-  } // datalog error is not fatal
+  }
+  // DataLog error *is* fatal so that if we fail to write to the
+  // semaphore set we return an error.
+  return r;
 }
 
 class RGWIndexCompletionManager;
@@ -962,8 +965,11 @@ void RGWIndexCompletionManager::process()
 
       if (c->log_op) {
         // This null_yield can stay, for now, since we're in our own thread
-        add_datalog_entry(&dpp, store->svc.datalog_rados, bucket_info,
-                          bs.shard_id, null_yield);
+        r = add_datalog_entry(&dpp, store->svc.datalog_rados, bucket_info,
+                             bs.shard_id, null_yield);
+       ldpp_dout(&dpp, 0) << "ERROR: " << __func__ << "(): write to datalog failed, obj=" << c->obj << " r=" << r << dendl;
+
+        /* ignoring error, can't do anything about it */
       }
     }
   }
@@ -6150,8 +6156,10 @@ int RGWRados::Object::Delete::delete_obj(optional_yield y, const DoutPrefixProvi
     }
 
     if (add_log) {
-      add_datalog_entry(dpp, store->svc.datalog_rados,
-                        target->get_bucket_info(), bs->shard_id, y);
+      r = add_datalog_entry(dpp, store->svc.datalog_rados,
+                           target->get_bucket_info(), bs->shard_id, y);
+      ldpp_dout(dpp, 0) << "failed to write datalog for object: r=" << r << dendl;
+      return r;
     }
 
     return 0;
@@ -7481,8 +7489,8 @@ int RGWRados::Bucket::UpdateIndex::complete(const DoutPrefixProvider *dpp, int64
 
   ret = store->cls_obj_complete_add(*bs, obj, optag, poolid, epoch, ent, category, remove_objs, bilog_flags, zones_trace, add_log);
   if (add_log) {
-    add_datalog_entry(dpp, store->svc.datalog_rados,
-                      target->bucket_info, bs->shard_id, y);
+    ret = add_datalog_entry(dpp, store->svc.datalog_rados,
+                           target->bucket_info, bs->shard_id, y);
   }
 
   return ret;
@@ -7512,8 +7520,8 @@ int RGWRados::Bucket::UpdateIndex::complete_del(const DoutPrefixProvider *dpp,
   ret = store->cls_obj_complete_del(*bs, optag, poolid, epoch, obj, removed_mtime, remove_objs, bilog_flags, zones_trace, add_log);
 
   if (add_log) {
-    add_datalog_entry(dpp, store->svc.datalog_rados,
-                      target->bucket_info, bs->shard_id, y);
+    ret = add_datalog_entry(dpp, store->svc.datalog_rados,
+                           target->bucket_info, bs->shard_id, y);
   }
 
   return ret;
@@ -7543,8 +7551,8 @@ int RGWRados::Bucket::UpdateIndex::cancel(const DoutPrefixProvider *dpp,
      * for following the specific bucket shard log. Otherwise they end up staying behind, and users
      * have no way to tell that they're all caught up
      */
-    add_datalog_entry(dpp, store->svc.datalog_rados,
-                      target->bucket_info, bs->shard_id, y);
+    ret = add_datalog_entry(dpp, store->svc.datalog_rados,
+                           target->bucket_info, bs->shard_id, y);
   }
 
   return ret;
@@ -8373,10 +8381,10 @@ int RGWRados::bucket_index_link_olh(const DoutPrefixProvider *dpp, RGWBucketInfo
   }
 
   if (log_data_change) {
-    add_datalog_entry(dpp, svc.datalog_rados, bucket_info, bs.shard_id, y);
+    r = add_datalog_entry(dpp, svc.datalog_rados, bucket_info, bs.shard_id, y);
   }
 
-  return 0;
+  return r;
 }
 
 void RGWRados::bucket_index_guard_olh_op(const DoutPrefixProvider *dpp, RGWObjState& olh_state, ObjectOperation& op)
index f63ee050680bd9aea4bfc82376a31a1f1ad389ad..ec0113c190093465343e15cedadd71349f1638b6 100644 (file)
@@ -514,8 +514,8 @@ int RGWSI_BucketIndex_RADOS::handle_overwrite(const DoutPrefixProvider *dpp,
     ret = svc.datalog_rados->add_entry(dpp, info, bilog, i, y);
     if (ret < 0) {
       ldpp_dout(dpp, -1) << "ERROR: failed writing data log (info.bucket=" << info.bucket << ", shard_id=" << i << ")" << dendl;
-    } // datalog error is not fatal
+    } // datalog error is now fatal, so we'll error on semaphore increment failure
   }
 
-  return 0;
+  return ret;
 }