From 6a58a7335972ea2f3500a97aaa69f1f250f39f40 Mon Sep 17 00:00:00 2001 From: Or Friedmann Date: Wed, 16 Dec 2020 18:09:24 +0200 Subject: [PATCH] rgw: distribute cache for exclusive put Distribute cache for exclusive put Fixes: https://tracker.ceph.com/issues/48632 Thanks to Casey Bodley for finding the solution Signed-off-by: Or Friedmann (cherry picked from commit 4fb51bc125b4ec23c2f47e7326d67f885f7d268c) Conflicts: src/rgw/services/svc_notify.cc - robust_notify() call has different argument list in nautilus src/rgw/services/svc_sys_obj_cache.cc - distribute_cache() call has different argument list in nautilus --- src/rgw/services/svc_notify.cc | 20 +++++++++++++++----- src/rgw/services/svc_sys_obj_cache.cc | 15 +++------------ 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/rgw/services/svc_notify.cc b/src/rgw/services/svc_notify.cc index e557fa0fb0b9..9ee7f2953028 100644 --- a/src/rgw/services/svc_notify.cc +++ b/src/rgw/services/svc_notify.cc @@ -147,6 +147,7 @@ string RGWSI_Notify::get_control_oid(int i) return string(buf); } +// do not call pick_obj_control before init_watch RGWSI_RADOS::Obj RGWSI_Notify::pick_control_obj(const string& key) { uint32_t r = ceph_str_hash_linux(key.c_str(), key.size()); @@ -355,11 +356,20 @@ void RGWSI_Notify::_set_enabled(bool status) int RGWSI_Notify::distribute(const string& key, bufferlist& bl) { - RGWSI_RADOS::Obj notify_obj = pick_control_obj(key); - - ldout(cct, 10) << "distributing notification oid=" << notify_obj.get_ref().obj - << " bl.length()=" << bl.length() << dendl; - return robust_notify(notify_obj, bl); + /* The RGW uses the control pool to store the watch notify objects. + The precedence in RGWSI_Notify::do_start is to call to zone_svc->start and later to init_watch(). + The first time, RGW starts in the cluster, the RGW will try to create zone and zonegroup system object. + In that case RGW will try to distribute the cache before it ran init_watch, + which will lead to division by 0 in pick_obj_control (num_watchers is 0). + */ + if (num_watchers > 0) { + RGWSI_RADOS::Obj notify_obj = pick_control_obj(key); + + ldout(cct, 10) << "distributing notification oid=" << notify_obj.get_ref().obj + << " bl.length()=" << bl.length() << dendl; + return robust_notify(notify_obj, bl); + } + return 0; } int RGWSI_Notify::robust_notify(RGWSI_RADOS::Obj& notify_obj, bufferlist& bl) diff --git a/src/rgw/services/svc_sys_obj_cache.cc b/src/rgw/services/svc_sys_obj_cache.cc index 8ffe5f8a7696..9130e0541547 100644 --- a/src/rgw/services/svc_sys_obj_cache.cc +++ b/src/rgw/services/svc_sys_obj_cache.cc @@ -282,18 +282,9 @@ int RGWSI_SysObj_Cache::write(const rgw_raw_obj& obj, string name = normal_name(pool, oid); if (ret >= 0) { cache.put(name, info, NULL); - // Only distribute the cache information if we did not just create - // the object with the exclusive flag. Note: PUT_OBJ_EXCL implies - // PUT_OBJ_CREATE. Generally speaking, when successfully creating - // a system object with the exclusive flag it is not necessary to - // call distribute_cache, as a) it's unclear whether other RGWs - // will need that system object in the near-term and b) it - // generates additional network traffic. - if (!exclusive) { - int r = distribute_cache(name, obj, info, UPDATE_OBJ); - if (r < 0) - ldout(cct, 0) << "ERROR: failed to distribute cache for " << obj << dendl; - } + int r = distribute_cache(name, obj, info, UPDATE_OBJ); + if (r < 0) + ldout(cct, 0) << "ERROR: failed to distribute cache for " << obj << dendl; } else { cache.remove(name); } -- 2.47.3