]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: svc_sysobj: split core out
authorYehuda Sadeh <yehuda@redhat.com>
Mon, 27 Aug 2018 23:02:03 +0000 (16:02 -0700)
committerYehuda Sadeh <yehuda@redhat.com>
Thu, 8 Nov 2018 17:19:29 +0000 (09:19 -0800)
Split core sysobj interface from sysobj service so that cache could be
implemented.

Signed-off-by: Yehuda Sadeh <yehuda@redhat.com>
src/rgw/CMakeLists.txt
src/rgw/services/svc_sys_obj.cc
src/rgw/services/svc_sys_obj.h
src/rgw/services/svc_sys_obj_core.cc [new file with mode: 0644]
src/rgw/services/svc_sys_obj_core.h [new file with mode: 0644]

index 2ae150227d23e77c6f28b3051f21194aec950c08..5ace7d82b7063eeba9173eaca1d978189c616d99 100644 (file)
@@ -42,6 +42,7 @@ set(librgw_common_srcs
   services/svc_quota.cc
   services/svc_rados.cc
   services/svc_sys_obj.cc
+  services/svc_sys_obj_core.cc
   services/svc_zone.cc
   services/svc_zone_utils.cc
   rgw_service.cc
index 53e19dde39f748a2907fa71d74aea8de0908a47f..85da7a0e743d99fc2152d6eda892108e55d7b40a 100644 (file)
@@ -1,19 +1,18 @@
 #include "svc_sys_obj.h"
+#include "svc_sys_obj_core.h"
 #include "svc_rados.h"
 #include "svc_zone.h"
 
-#include "rgw/rgw_tools.h"
-
 #define dout_subsys ceph_subsys_rgw
 
-RGWSysObjectCtx&& RGWSI_SysObj::init_obj_ctx()
+RGWSysObjectCtx RGWSI_SysObj::init_obj_ctx()
 {
-  return std::move(RGWSysObjectCtx(this));
+  return RGWSysObjectCtx(this);
 }
 
-RGWSI_SysObj::Obj&& RGWSI_SysObj::get_obj(RGWSysObjectCtx& obj_ctx, const rgw_raw_obj& obj)
+RGWSI_SysObj::Obj RGWSI_SysObj::get_obj(RGWSysObjectCtx& obj_ctx, const rgw_raw_obj& obj)
 {
-  return std::move(Obj(this, obj_ctx, obj));
+  return Obj(core_svc.get(), obj_ctx, obj);
 }
 
 int RGWS_SysObj::create_instance(const string& conf, RGWServiceInstanceRef *instance)
@@ -26,11 +25,11 @@ std::map<string, RGWServiceInstance::dependency> RGWSI_SysObj::get_deps()
 {
   RGWServiceInstance::dependency dep1 = { .name = "rados",
                                           .conf = "{}" };
-  RGWServiceInstance::dependency dep2 = { .name = "zone",
+  RGWServiceInstance::dependency dep2 = { .name = "sysobj_core",
                                           .conf = "{}" };
   map<string, RGWServiceInstance::dependency> deps;
   deps["rados_dep"] = dep1;
-  deps["zone_dep"] = dep2;
+  deps["sysobj_core_dep"] = dep2;
   return deps;
 }
 
@@ -39,494 +38,20 @@ int RGWSI_SysObj::load(const string& conf, std::map<std::string, RGWServiceInsta
   rados_svc = static_pointer_cast<RGWSI_RADOS>(dep_refs["rados_dep"]);
   assert(rados_svc);
 
-  zone_svc = static_pointer_cast<RGWSI_Zone>(dep_refs["zone_dep"]);
-  assert(zone_svc);
-
-  return 0;
-}
-
-int RGWSI_SysObj::get_rados_obj(RGWSI_Zone *zone_svc,
-                                rgw_raw_obj& obj,
-                                RGWSI_RADOS::Obj *pobj)
-{
-  zone_svc->canonicalize_raw_obj(&obj);
-
-  *pobj = std::move(rados_svc->obj(obj));
-  int r = pobj->open();
-  if (r < 0) {
-    return r;
-  }
-
-  return 0;
-}
-
-int RGWSI_SysObj::get_system_obj_state_impl(RGWSysObjectCtx *rctx, rgw_raw_obj& obj, RGWSysObjState **state, RGWObjVersionTracker *objv_tracker)
-{
-  if (obj.empty()) {
-    return -EINVAL;
-  }
-
-  RGWSysObjState *s = rctx->get_state(obj);
-  ldout(cct, 20) << "get_system_obj_state: rctx=" << (void *)rctx << " obj=" << obj << " state=" << (void *)s << " s->prefetch_data=" << s->prefetch_data << dendl;
-  *state = s;
-  if (s->has_attrs) {
-    return 0;
-  }
-
-  s->obj = obj;
-
-  int r = raw_stat(obj, &s->size, &s->mtime, &s->epoch, &s->attrset, (s->prefetch_data ? &s->data : nullptr), objv_tracker);
-  if (r == -ENOENT) {
-    s->exists = false;
-    s->has_attrs = true;
-    s->mtime = real_time();
-    return 0;
-  }
-  if (r < 0)
-    return r;
-
-  s->exists = true;
-  s->has_attrs = true;
-  s->obj_tag = s->attrset[RGW_ATTR_ID_TAG];
-
-  if (s->obj_tag.length())
-    ldout(cct, 20) << "get_system_obj_state: setting s->obj_tag to "
-                   << s->obj_tag.c_str() << dendl;
-  else
-    ldout(cct, 20) << "get_system_obj_state: s->obj_tag was set empty" << dendl;
-
-  return 0;
-}
-
-int RGWSI_SysObj::get_system_obj_state(RGWSysObjectCtx *rctx, rgw_raw_obj& obj, RGWSysObjState **state, RGWObjVersionTracker *objv_tracker)
-{
-  int ret;
-
-  do {
-    ret = get_system_obj_state_impl(rctx, obj, state, objv_tracker);
-  } while (ret == -EAGAIN);
-
-  return ret;
-}
-
-int RGWSI_SysObj::raw_stat(rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, uint64_t *epoch,
-                           map<string, bufferlist> *attrs, bufferlist *first_chunk,
-                           RGWObjVersionTracker *objv_tracker)
-{
-  RGWSI_RADOS::Obj rados_obj;
-  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
-  if (r < 0) {
-    return r;
-  }
-
-  map<string, bufferlist> unfiltered_attrset;
-  uint64_t size = 0;
-  struct timespec mtime_ts;
-
-  librados::ObjectReadOperation op;
-  if (objv_tracker) {
-    objv_tracker->prepare_op_for_read(&op);
-  }
-  if (attrs) {
-    op.getxattrs(&unfiltered_attrset, nullptr);
-  }
-  if (psize || pmtime) {
-    op.stat2(&size, &mtime_ts, nullptr);
-  }
-  if (first_chunk) {
-    op.read(0, cct->_conf->rgw_max_chunk_size, first_chunk, nullptr);
-  }
-  bufferlist outbl;
-  r = rados_obj.operate(&op, &outbl);
-
-  if (epoch) {
-    *epoch = rados_obj.get_last_version();
-  }
-
-  if (r < 0)
-    return r;
-
-  if (psize)
-    *psize = size;
-  if (pmtime)
-    *pmtime = ceph::real_clock::from_timespec(mtime_ts);
-  if (attrs) {
-    rgw_filter_attrset(unfiltered_attrset, RGW_ATTR_PREFIX, attrs);
-  }
+  core_svc = static_pointer_cast<RGWSI_SysObj_Core>(dep_refs["sysobj_core_dep"]);
+  assert(core_svc);
 
   return 0;
 }
 
-int RGWSI_SysObj::stat(RGWSysObjectCtx& obj_ctx,
-                       RGWSI_SysObj::Obj::ROp::GetObjState& state,
-                       rgw_raw_obj& obj,
-                       map<string, bufferlist> *attrs,
-                       real_time *lastmod,
-                       uint64_t *obj_size,
-                       RGWObjVersionTracker *objv_tracker)
-{
-  RGWSysObjState *astate = nullptr;
-
-  int r = get_system_obj_state(&obj_ctx, obj, &astate, objv_tracker);
-  if (r < 0)
-    return r;
-
-  if (!astate->exists) {
-    return -ENOENT;
-  }
-
-  if (attrs) {
-    *attrs = astate->attrset;
-    if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
-      map<string, bufferlist>::iterator iter;
-      for (iter = attrs->begin(); iter != attrs->end(); ++iter) {
-        ldout(cct, 20) << "Read xattr: " << iter->first << dendl;
-      }
-    }
-  }
-
-  if (obj_size)
-    *obj_size = astate->size;
-  if (lastmod)
-    *lastmod = astate->mtime;
-
-  return 0;
-}
-
-int RGWSI_SysObj::read(RGWSysObjectCtx& obj_ctx,
-                       Obj::ROp::GetObjState& read_state,
-                       RGWObjVersionTracker *objv_tracker,
-                       rgw_raw_obj& obj,
-                       bufferlist *bl, off_t ofs, off_t end,
-                       map<string, bufferlist> *attrs,
-                       boost::optional<obj_version>)
-{
-  uint64_t len;
-  librados::ObjectReadOperation op;
-
-  if (end < 0)
-    len = 0;
-  else
-    len = end - ofs + 1;
-
-  if (objv_tracker) {
-    objv_tracker->prepare_op_for_read(&op);
-  }
-
-  ldout(cct, 20) << "rados->read ofs=" << ofs << " len=" << len << dendl;
-  op.read(ofs, len, bl, nullptr);
-
-  if (attrs) {
-    op.getxattrs(attrs, nullptr);
-  }
-
-  RGWSI_RADOS::Obj rados_obj;
-  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
-  if (r < 0) {
-    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
-    return r;
-  }
-  r = rados_obj.operate(&op, nullptr);
-  if (r < 0) {
-    ldout(cct, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl;
-    return r;
-  }
-  ldout(cct, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl;
-
-  uint64_t op_ver = rados_obj.get_last_version();
-
-  if (read_state.last_ver > 0 &&
-      read_state.last_ver != op_ver) {
-    ldout(cct, 5) << "raced with an object write, abort" << dendl;
-    return -ECANCELED;
-  }
-
-  read_state.last_ver = op_ver;
-
-  return bl->length();
-}
-
-/**
- * Get an attribute for a system object.
- * obj: the object to get attr
- * name: name of the attr to retrieve
- * dest: bufferlist to store the result in
- * Returns: 0 on success, -ERR# otherwise.
- */
-int RGWSI_SysObj::get_attr(rgw_raw_obj& obj,
-                           const char *name,
-                           bufferlist *dest)
-{
-  RGWSI_RADOS::Obj rados_obj;
-  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
-  if (r < 0) {
-    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
-    return r;
-  }
-
-  librados::ObjectReadOperation op;
-
-  int rval;
-  op.getxattr(name, dest, &rval);
-  
-  r = rados_obj.operate(&op, nullptr);
-  if (r < 0)
-    return r;
-
-  return 0;
-}
-
-int RGWSI_SysObj::omap_get_vals(rgw_raw_obj& obj,
-                                const string& marker,
-                                uint64_t count,
-                                std::map<string, bufferlist> *m,
-                                bool *pmore)
-{
-  RGWSI_RADOS::Obj rados_obj;
-  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
-  if (r < 0) {
-    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
-    return r;
-  }
-
-  string start_after = marker;
-  bool more;
-
-  do {
-    librados::ObjectReadOperation op;
-
-    std::map<string, bufferlist> t;
-    int rval;
-    op.omap_get_vals2(start_after, count, &t, &more, &rval);
-  
-    r = rados_obj.operate(&op, nullptr);
-    if (r < 0) {
-      return r;
-    }
-    if (t.empty()) {
-      break;
-    }
-    count -= t.size();
-    start_after = t.rbegin()->first;
-    m->insert(t.begin(), t.end());
-  } while (more && count > 0);
-
-  if (pmore) {
-    *pmore = more;
-  }
-  return 0;
-}
-
-int RGWSI_SysObj::omap_get_all(rgw_raw_obj& obj, std::map<string, bufferlist> *m)
-{
-  RGWSI_RADOS::Obj rados_obj;
-  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
-  if (r < 0) {
-    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
-    return r;
-  }
-
-#define MAX_OMAP_GET_ENTRIES 1024
-  const int count = MAX_OMAP_GET_ENTRIES;
-  string start_after;
-  bool more;
-
-  do {
-    librados::ObjectReadOperation op;
-
-    std::map<string, bufferlist> t;
-    int rval;
-    op.omap_get_vals2(start_after, count, &t, &more, &rval);
-  
-    r = rados_obj.operate(&op, nullptr);
-    if (r < 0) {
-      return r;
-    }
-    if (t.empty()) {
-      break;
-    }
-    start_after = t.rbegin()->first;
-    m->insert(t.begin(), t.end());
-  } while (more);
-  return 0;
-}
-
-int RGWSI_SysObj::omap_set(rgw_raw_obj& obj, const std::string& key, bufferlist& bl, bool must_exist)
-{
-  RGWSI_RADOS::Obj rados_obj;
-  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
-  if (r < 0) {
-    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
-    return r;
-  }
-
-  ldout(cct, 15) << "omap_set obj=" << obj << " key=" << key << dendl;
-
-  map<string, bufferlist> m;
-  m[key] = bl;
-  librados::ObjectWriteOperation op;
-  if (must_exist)
-    op.assert_exists();
-  op.omap_set(m);
-  r = rados_obj.operate(&op);
-  return r;
-}
-
-int RGWSI_SysObj::omap_set(rgw_raw_obj& obj, const std::map<std::string, bufferlist>& m, bool must_exist)
-{
-  RGWSI_RADOS::Obj rados_obj;
-  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
-  if (r < 0) {
-    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
-    return r;
-  }
-
-  librados::ObjectWriteOperation op;
-  if (must_exist)
-    op.assert_exists();
-  op.omap_set(m);
-  r = rados_obj.operate(&op);
-  return r;
-}
-
-int RGWSI_SysObj::omap_del(rgw_raw_obj& obj, const std::string& key)
-{
-  RGWSI_RADOS::Obj rados_obj;
-  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
-  if (r < 0) {
-    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
-    return r;
-  }
-
-  set<string> k;
-  k.insert(key);
-
-  librados::ObjectWriteOperation op;
-
-  op.omap_rm_keys(k);
-
-  r = rados_obj.operate(&op);
-  return r;
-}
-
-int RGWSI_SysObj::remove(RGWSysObjectCtx& obj_ctx,
-                         RGWObjVersionTracker *objv_tracker,
-                         rgw_raw_obj& obj)
-{
-  RGWSI_RADOS::Obj rados_obj;
-  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
-  if (r < 0) {
-    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
-    return r;
-  }
-
-  librados::ObjectWriteOperation op;
-
-  if (objv_tracker) {
-    objv_tracker->prepare_op_for_write(&op);
-  }
-
-  op.remove();
-  r = rados_obj.operate(&op);
-  if (r < 0)
-    return r;
-
-  return 0;
-}
-
-int RGWSI_SysObj::write(rgw_raw_obj& obj,
-                        real_time *pmtime,
-                        map<std::string, bufferlist>& attrs,
-                        bool exclusive,
-                        const bufferlist& data,
-                        RGWObjVersionTracker *objv_tracker,
-                        real_time set_mtime)
-{
-  RGWSI_RADOS::Obj rados_obj;
-  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
-  if (r < 0) {
-    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
-    return r;
-  }
-
-  librados::ObjectWriteOperation op;
-
-  if (exclusive) {
-    op.create(true); // exclusive create
-  } else {
-    op.remove();
-    op.set_op_flags2(LIBRADOS_OP_FLAG_FAILOK);
-    op.create(false);
-  }
-
-  if (objv_tracker) {
-    objv_tracker->prepare_op_for_write(&op);
-  }
-
-  if (real_clock::is_zero(set_mtime)) {
-    set_mtime = real_clock::now();
-  }
-
-  struct timespec mtime_ts = real_clock::to_timespec(set_mtime);
-  op.mtime2(&mtime_ts);
-  op.write_full(data);
-
-  bufferlist acl_bl;
-
-  for (map<string, bufferlist>::iterator iter = attrs.begin(); iter != attrs.end(); ++iter) {
-    const string& name = iter->first;
-    bufferlist& bl = iter->second;
-
-    if (!bl.length())
-      continue;
-
-    op.setxattr(name.c_str(), bl);
-  }
-
-  r = rados_obj.operate(&op);
-  if (r < 0) {
-    return r;
-  }
-
-  if (objv_tracker) {
-    objv_tracker->apply_write();
-  }
-
-  if (pmtime) {
-    *pmtime = set_mtime;
-  }
-
-  return 0;
-}
-
-
 void RGWSI_SysObj::Obj::invalidate_state()
 {
   ctx.invalidate(obj);
 }
 
-int RGWSI_SysObj::Obj::ROp::GetObjState::get_rados_obj(RGWSI_RADOS *rados_svc,
-                                                       RGWSI_Zone *zone_svc,
-                                                       rgw_raw_obj& obj,
-                                                       RGWSI_RADOS::Obj **pobj)
-{
-  if (!has_rados_obj) {
-    zone_svc->canonicalize_raw_obj(&obj);
-
-    rados_obj = rados_svc->obj(obj);
-    int r = rados_obj.open();
-    if (r < 0) {
-      return r;
-    }
-    has_rados_obj = true;
-  }
-  *pobj = &rados_obj;
-  return 0;
-}
-
 int RGWSI_SysObj::Obj::ROp::stat()
 {
-  RGWSI_SysObj *svc = source.sysobj_svc;
+  RGWSI_SysObj_Core *svc = source.core_svc;
   rgw_raw_obj& obj = source.obj;
 
   return svc->stat(source.get_ctx(), state, obj, attrs,
@@ -536,7 +61,7 @@ int RGWSI_SysObj::Obj::ROp::stat()
 
 int RGWSI_SysObj::Obj::ROp::read(int64_t ofs, int64_t end, bufferlist *bl)
 {
-  RGWSI_SysObj *svc = source.sysobj_svc;
+  RGWSI_SysObj_Core *svc = source.core_svc;
   rgw_raw_obj& obj = source.get_obj();
 
   return svc->read(source.get_ctx(), state,
@@ -548,7 +73,7 @@ int RGWSI_SysObj::Obj::ROp::read(int64_t ofs, int64_t end, bufferlist *bl)
 
 int RGWSI_SysObj::Obj::ROp::get_attr(const char *name, bufferlist *dest)
 {
-  RGWSI_SysObj *svc = source.sysobj_svc;
+  RGWSI_SysObj_Core *svc = source.core_svc;
   rgw_raw_obj& obj = source.get_obj();
 
   return svc->get_attr(obj, name, dest);
@@ -556,7 +81,7 @@ int RGWSI_SysObj::Obj::ROp::get_attr(const char *name, bufferlist *dest)
 
 int RGWSI_SysObj::Obj::WOp::remove()
 {
-  RGWSI_SysObj *svc = source.sysobj_svc;
+  RGWSI_SysObj_Core *svc = source.core_svc;
   rgw_raw_obj& obj = source.get_obj();
 
   return svc->remove(source.get_ctx(),
@@ -566,7 +91,7 @@ int RGWSI_SysObj::Obj::WOp::remove()
 
 int RGWSI_SysObj::Obj::WOp::write(bufferlist& bl)
 {
-  RGWSI_SysObj *svc = source.sysobj_svc;
+  RGWSI_SysObj_Core *svc = source.core_svc;
   rgw_raw_obj& obj = source.get_obj();
 
   return svc->write(obj, pmtime, attrs, exclusive,
@@ -608,7 +133,7 @@ int RGWSI_SysObj::Pool::Op::list_prefixed_objs(const string& prefix, list<string
 
 int RGWSI_SysObj::Obj::OmapOp::get_all(std::map<string, bufferlist> *m)
 {
-  RGWSI_SysObj *svc = source.sysobj_svc;
+  RGWSI_SysObj_Core *svc = source.core_svc;
   rgw_raw_obj& obj = source.obj;
 
   return svc->omap_get_all(obj, m);
@@ -619,7 +144,7 @@ int RGWSI_SysObj::Obj::OmapOp::get_vals(const string& marker,
                                         std::map<string, bufferlist> *m,
                                         bool *pmore)
 {
-  RGWSI_SysObj *svc = source.sysobj_svc;
+  RGWSI_SysObj_Core *svc = source.core_svc;
   rgw_raw_obj& obj = source.obj;
 
   return svc->omap_get_vals(obj, marker, count, m, pmore);
@@ -627,7 +152,7 @@ int RGWSI_SysObj::Obj::OmapOp::get_vals(const string& marker,
 
 int RGWSI_SysObj::Obj::OmapOp::set(const std::string& key, bufferlist& bl)
 {
-  RGWSI_SysObj *svc = source.sysobj_svc;
+  RGWSI_SysObj_Core *svc = source.core_svc;
   rgw_raw_obj& obj = source.obj;
 
   return svc->omap_set(obj, key, bl, must_exist);
@@ -635,7 +160,7 @@ int RGWSI_SysObj::Obj::OmapOp::set(const std::string& key, bufferlist& bl)
 
 int RGWSI_SysObj::Obj::OmapOp::set(const map<std::string, bufferlist>& m)
 {
-  RGWSI_SysObj *svc = source.sysobj_svc;
+  RGWSI_SysObj_Core *svc = source.core_svc;
   rgw_raw_obj& obj = source.obj;
 
   return svc->omap_set(obj, m, must_exist);
@@ -643,7 +168,7 @@ int RGWSI_SysObj::Obj::OmapOp::set(const map<std::string, bufferlist>& m)
 
 int RGWSI_SysObj::Obj::OmapOp::del(const std::string& key)
 {
-  RGWSI_SysObj *svc = source.sysobj_svc;
+  RGWSI_SysObj_Core *svc = source.core_svc;
   rgw_raw_obj& obj = source.obj;
 
   return svc->omap_del(obj, key);
index 53d93cc4f90e878ab414b0b5fc1881b2b6916a87..2209727bdc66889e0f1c648591b016e8663aedfe 100644 (file)
@@ -5,53 +5,12 @@
 #include "rgw/rgw_service.h"
 
 #include "svc_rados.h"
+#include "svc_sys_obj_core.h"
 
 
 class RGWSI_Zone;
 class RGWSI_SysObj;
-
-struct RGWSysObjState {
-  rgw_raw_obj obj;
-  bool has_attrs{false};
-  bool exists{false};
-  uint64_t size{0};
-  ceph::real_time mtime;
-  uint64_t epoch{0};
-  bufferlist obj_tag;
-  bool has_data{false};
-  bufferlist data;
-  bool prefetch_data{false};
-  uint64_t pg_ver{0};
-
-  /* important! don't forget to update copy constructor */
-
-  RGWObjVersionTracker objv_tracker;
-
-  map<string, bufferlist> attrset;
-  RGWSysObjState() {}
-  RGWSysObjState(const RGWSysObjState& rhs) : obj (rhs.obj) {
-    has_attrs = rhs.has_attrs;
-    exists = rhs.exists;
-    size = rhs.size;
-    mtime = rhs.mtime;
-    epoch = rhs.epoch;
-    if (rhs.obj_tag.length()) {
-      obj_tag = rhs.obj_tag;
-    }
-    has_data = rhs.has_data;
-    if (rhs.data.length()) {
-      data = rhs.data;
-    }
-    prefetch_data = rhs.prefetch_data;
-    pg_ver = rhs.pg_ver;
-    objv_tracker = rhs.objv_tracker;
-  }
-};
-
-template <class T, class S>
-class RGWSysObjectCtxImpl;
-
-using RGWSysObjectCtx = RGWSysObjectCtxImpl<rgw_raw_obj, RGWSysObjState>;
+class RGWSysObjectCtx;
 
 class RGWS_SysObj : public RGWService
 {
@@ -67,16 +26,16 @@ public:
   class Obj {
     friend class ROp;
 
-    RGWSI_SysObj *sysobj_svc;
+    RGWSI_SysObj_Core *core_svc;
     RGWSysObjectCtx& ctx;
     rgw_raw_obj obj;
 
     RGWSI_RADOS *get_rados_svc();
 
   public:
-    Obj(RGWSI_SysObj *_sysobj_svc,
+    Obj(RGWSI_SysObj_Core *_core_svc,
         RGWSysObjectCtx& _ctx,
-        const rgw_raw_obj& _obj) : sysobj_svc(_sysobj_svc),
+        const rgw_raw_obj& _obj) : core_svc(_core_svc),
                                    ctx(_ctx),
                                    obj(_obj) {}
 
@@ -93,18 +52,7 @@ public:
     struct ROp {
       Obj& source;
 
-      struct GetObjState {
-        RGWSI_RADOS::Obj rados_obj;
-        bool has_rados_obj{false};
-        uint64_t last_ver{0};
-
-        GetObjState() {}
-
-        int get_rados_obj(RGWSI_RADOS *rados_svc,
-                          RGWSI_Zone *zone_svc,
-                          rgw_raw_obj& obj,
-                          RGWSI_RADOS::Obj **pobj);
-      } state;
+      RGWSI_SysObj_Core::GetObjState state;
       
       RGWObjVersionTracker *objv_tracker{nullptr};
       map<string, bufferlist> *attrs{nullptr};
@@ -214,14 +162,14 @@ public:
   class Pool {
     friend class Op;
 
-    RGWSI_SysObj *sysobj_svc;
+    RGWSI_SysObj_Core *core_svc;
     rgw_pool pool;
 
     RGWSI_RADOS *get_rados_svc();
 
   public:
-    Pool(RGWSI_SysObj *_sysobj_svc,
-         const rgw_pool& _pool) : sysobj_svc(_sysobj_svc),
+    Pool(RGWSI_SysObj_Core *_core_svc,
+         const rgw_pool& _pool) : core_svc(_core_svc),
                                   pool(_pool) {}
 
     rgw_pool& get_pool() {
@@ -247,128 +195,34 @@ public:
   friend class Pool;
   friend class Pool::Op;
 
-private:
+protected:
   std::shared_ptr<RGWSI_RADOS> rados_svc;
-  std::shared_ptr<RGWSI_Zone> zone_svc;
+  std::shared_ptr<RGWSI_SysObj_Core> core_svc;
 
   std::map<std::string, RGWServiceInstance::dependency> get_deps() override;
   int load(const std::string& conf, std::map<std::string, RGWServiceInstanceRef>& dep_refs) override;
 
-  int get_rados_obj(RGWSI_Zone *zone_svc, rgw_raw_obj& obj, RGWSI_RADOS::Obj *pobj);
-
-  int get_system_obj_state_impl(RGWSysObjectCtx *rctx, rgw_raw_obj& obj, RGWSysObjState **state, RGWObjVersionTracker *objv_tracker);
-  int get_system_obj_state(RGWSysObjectCtx *rctx, rgw_raw_obj& obj, RGWSysObjState **state, RGWObjVersionTracker *objv_tracker);
-
-  int raw_stat(rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, uint64_t *epoch,
-               map<string, bufferlist> *attrs, bufferlist *first_chunk,
-               RGWObjVersionTracker *objv_tracker);
-
-  int stat(RGWSysObjectCtx& obj_ctx,
-           RGWSI_SysObj::Obj::ROp::GetObjState& state,
-           rgw_raw_obj& obj,
-           map<string, bufferlist> *attrs,
-           real_time *lastmod,
-           uint64_t *obj_size,
-           RGWObjVersionTracker *objv_tracker);
-
-  int read(RGWSysObjectCtx& obj_ctx,
-           Obj::ROp::GetObjState& read_state,
-           RGWObjVersionTracker *objv_tracker,
-           rgw_raw_obj& obj,
-           bufferlist *bl, off_t ofs, off_t end,
-           map<string, bufferlist> *attrs,
-           boost::optional<obj_version>);
-
-  int get_attr(rgw_raw_obj& obj, const char *name, bufferlist *dest);
-
-  int omap_get_all(rgw_raw_obj& obj, std::map<string, bufferlist> *m);
-  int omap_get_vals(rgw_raw_obj& obj,
-                    const string& marker,
-                    uint64_t count,
-                    std::map<string, bufferlist> *m,
-                    bool *pmore);
-  int omap_set(rgw_raw_obj& obj, const std::string& key, bufferlist& bl, bool must_exist = false);
-  int omap_set(rgw_raw_obj& obj, const map<std::string, bufferlist>& m, bool must_exist = false);
-  int omap_del(rgw_raw_obj& obj, const std::string& key);
-
-  int remove(RGWSysObjectCtx& obj_ctx,
-             RGWObjVersionTracker *objv_tracker,
-             rgw_raw_obj& obj);
-
-  int write(rgw_raw_obj& obj,
-            real_time *pmtime,
-            map<std::string, bufferlist>& attrs,
-            bool exclusive,
-            const bufferlist& data,
-            RGWObjVersionTracker *objv_tracker,
-            real_time set_mtime);
 public:
   RGWSI_SysObj(RGWService *svc, CephContext *cct): RGWServiceInstance(svc, cct) {}
 
-  RGWSysObjectCtx&& init_obj_ctx();
-  Obj&& get_obj(RGWSysObjectCtx& obj_ctx, const rgw_raw_obj& obj);
+  RGWSysObjectCtx init_obj_ctx();
+  Obj get_obj(RGWSysObjectCtx& obj_ctx, const rgw_raw_obj& obj);
 
-  Pool&& get_pool(const rgw_pool& pool) {
-    return std::move(Pool(this, pool));
+  Pool get_pool(const rgw_pool& pool) {
+    return Pool(core_svc.get(), pool);
   }
 
 };
 
 using RGWSysObj = RGWSI_SysObj::Obj;
 
-template <class T, class S>
-class RGWSysObjectCtxImpl {
+class RGWSysObjectCtx : public RGWSysObjectCtxBase
+{
   RGWSI_SysObj *sysobj_svc;
-  std::map<T, S> objs_state;
-  RWLock lock;
-
 public:
-  explicit RGWSysObjectCtxImpl(RGWSI_SysObj *_sysobj_svc) : sysobj_svc(_sysobj_svc), lock("RGWSysObjectCtxImpl") {}
-
-  RGWSysObjectCtxImpl(const RGWSysObjectCtxImpl& rhs) : sysobj_svc(rhs.sysobj_svc),
-                                                  objs_state(rhs.objs_state),
-                                                  lock("RGWSysObjectCtxImpl") {}
-  RGWSysObjectCtxImpl(const RGWSysObjectCtxImpl&& rhs) : sysobj_svc(rhs.sysobj_svc),
-                                                   objs_state(std::move(rhs.objs_state)),
-                                                   lock("RGWSysObjectCtxImpl") {}
-
-  S *get_state(const T& obj) {
-    S *result;
-    typename std::map<T, S>::iterator iter;
-    lock.get_read();
-    assert (!obj.empty());
-    iter = objs_state.find(obj);
-    if (iter != objs_state.end()) {
-      result = &iter->second;
-      lock.unlock();
-    } else {
-      lock.unlock();
-      lock.get_write();
-      result = &objs_state[obj];
-      lock.unlock();
-    }
-    return result;
-  }
-
-  void set_atomic(T& obj) {
-    RWLock::WLocker wl(lock);
-    assert (!obj.empty());
-  }
-  void set_prefetch_data(T& obj) {
-    RWLock::WLocker wl(lock);
-    assert (!obj.empty());
-    objs_state[obj].prefetch_data = true;
-  }
-  void invalidate(T& obj) {
-    RWLock::WLocker wl(lock);
-    auto iter = objs_state.find(obj);
-    if (iter == objs_state.end()) {
-      return;
-    }
-    objs_state.erase(iter);
-  }
+  RGWSysObjectCtx(RGWSI_SysObj *_sysobj_svc) : sysobj_svc(_sysobj_svc) {}
 
-  RGWSI_SysObj::Obj&& get_obj(const rgw_raw_obj& obj) {
+  RGWSI_SysObj::Obj get_obj(const rgw_raw_obj& obj) {
     return sysobj_svc->get_obj(*this, obj);
   }
 };
diff --git a/src/rgw/services/svc_sys_obj_core.cc b/src/rgw/services/svc_sys_obj_core.cc
new file mode 100644 (file)
index 0000000..491828a
--- /dev/null
@@ -0,0 +1,505 @@
+#include "svc_sys_obj_core.h"
+#include "svc_rados.h"
+#include "svc_zone.h"
+
+#include "rgw/rgw_tools.h"
+
+#define dout_subsys ceph_subsys_rgw
+
+int RGWSI_SysObj_Core::GetObjState::get_rados_obj(RGWSI_RADOS *rados_svc,
+                                                  RGWSI_Zone *zone_svc,
+                                                  rgw_raw_obj& obj,
+                                                  RGWSI_RADOS::Obj **pobj)
+{
+  if (!has_rados_obj) {
+    zone_svc->canonicalize_raw_obj(&obj);
+
+    rados_obj = rados_svc->obj(obj);
+    int r = rados_obj.open();
+    if (r < 0) {
+      return r;
+    }
+    has_rados_obj = true;
+  }
+  *pobj = &rados_obj;
+  return 0;
+}
+
+std::map<string, RGWServiceInstance::dependency> RGWSI_SysObj_Core::get_deps()
+{
+  RGWServiceInstance::dependency dep1 = { .name = "rados",
+                                          .conf = "{}" };
+  RGWServiceInstance::dependency dep2 = { .name = "zone",
+                                          .conf = "{}" };
+  map<string, RGWServiceInstance::dependency> deps;
+  deps["rados_dep"] = dep1;
+  deps["zone_dep"] = dep2;
+  return deps;
+}
+
+int RGWSI_SysObj_Core::load(const string& conf, std::map<std::string, RGWServiceInstanceRef>& dep_refs)
+{
+  rados_svc = static_pointer_cast<RGWSI_RADOS>(dep_refs["rados_dep"]);
+  assert(rados_svc);
+
+  zone_svc = static_pointer_cast<RGWSI_Zone>(dep_refs["zone_dep"]);
+  assert(zone_svc);
+
+  return 0;
+}
+
+int RGWSI_SysObj_Core::get_rados_obj(RGWSI_Zone *zone_svc,
+                                     rgw_raw_obj& obj,
+                                     RGWSI_RADOS::Obj *pobj)
+{
+  zone_svc->canonicalize_raw_obj(&obj);
+
+  *pobj = std::move(rados_svc->obj(obj));
+  int r = pobj->open();
+  if (r < 0) {
+    return r;
+  }
+
+  return 0;
+}
+
+int RGWSI_SysObj_Core::get_system_obj_state_impl(RGWSysObjectCtxBase *rctx, rgw_raw_obj& obj, RGWSysObjState **state, RGWObjVersionTracker *objv_tracker)
+{
+  if (obj.empty()) {
+    return -EINVAL;
+  }
+
+  RGWSysObjState *s = rctx->get_state(obj);
+  ldout(cct, 20) << "get_system_obj_state: rctx=" << (void *)rctx << " obj=" << obj << " state=" << (void *)s << " s->prefetch_data=" << s->prefetch_data << dendl;
+  *state = s;
+  if (s->has_attrs) {
+    return 0;
+  }
+
+  s->obj = obj;
+
+  int r = raw_stat(obj, &s->size, &s->mtime, &s->epoch, &s->attrset, (s->prefetch_data ? &s->data : nullptr), objv_tracker);
+  if (r == -ENOENT) {
+    s->exists = false;
+    s->has_attrs = true;
+    s->mtime = real_time();
+    return 0;
+  }
+  if (r < 0)
+    return r;
+
+  s->exists = true;
+  s->has_attrs = true;
+  s->obj_tag = s->attrset[RGW_ATTR_ID_TAG];
+
+  if (s->obj_tag.length())
+    ldout(cct, 20) << "get_system_obj_state: setting s->obj_tag to "
+                   << s->obj_tag.c_str() << dendl;
+  else
+    ldout(cct, 20) << "get_system_obj_state: s->obj_tag was set empty" << dendl;
+
+  return 0;
+}
+
+int RGWSI_SysObj_Core::get_system_obj_state(RGWSysObjectCtxBase *rctx, rgw_raw_obj& obj, RGWSysObjState **state, RGWObjVersionTracker *objv_tracker)
+{
+  int ret;
+
+  do {
+    ret = get_system_obj_state_impl(rctx, obj, state, objv_tracker);
+  } while (ret == -EAGAIN);
+
+  return ret;
+}
+
+int RGWSI_SysObj_Core::raw_stat(rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, uint64_t *epoch,
+                                map<string, bufferlist> *attrs, bufferlist *first_chunk,
+                                RGWObjVersionTracker *objv_tracker)
+{
+  RGWSI_RADOS::Obj rados_obj;
+  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
+  if (r < 0) {
+    return r;
+  }
+
+  map<string, bufferlist> unfiltered_attrset;
+  uint64_t size = 0;
+  struct timespec mtime_ts;
+
+  librados::ObjectReadOperation op;
+  if (objv_tracker) {
+    objv_tracker->prepare_op_for_read(&op);
+  }
+  if (attrs) {
+    op.getxattrs(&unfiltered_attrset, nullptr);
+  }
+  if (psize || pmtime) {
+    op.stat2(&size, &mtime_ts, nullptr);
+  }
+  if (first_chunk) {
+    op.read(0, cct->_conf->rgw_max_chunk_size, first_chunk, nullptr);
+  }
+  bufferlist outbl;
+  r = rados_obj.operate(&op, &outbl);
+
+  if (epoch) {
+    *epoch = rados_obj.get_last_version();
+  }
+
+  if (r < 0)
+    return r;
+
+  if (psize)
+    *psize = size;
+  if (pmtime)
+    *pmtime = ceph::real_clock::from_timespec(mtime_ts);
+  if (attrs) {
+    rgw_filter_attrset(unfiltered_attrset, RGW_ATTR_PREFIX, attrs);
+  }
+
+  return 0;
+}
+
+int RGWSI_SysObj_Core::stat(RGWSysObjectCtxBase& obj_ctx,
+                            GetObjState& state,
+                            rgw_raw_obj& obj,
+                            map<string, bufferlist> *attrs,
+                            real_time *lastmod,
+                            uint64_t *obj_size,
+                            RGWObjVersionTracker *objv_tracker)
+{
+  RGWSysObjState *astate = nullptr;
+
+  int r = get_system_obj_state(&obj_ctx, obj, &astate, objv_tracker);
+  if (r < 0)
+    return r;
+
+  if (!astate->exists) {
+    return -ENOENT;
+  }
+
+  if (attrs) {
+    *attrs = astate->attrset;
+    if (cct->_conf->subsys.should_gather<ceph_subsys_rgw, 20>()) {
+      map<string, bufferlist>::iterator iter;
+      for (iter = attrs->begin(); iter != attrs->end(); ++iter) {
+        ldout(cct, 20) << "Read xattr: " << iter->first << dendl;
+      }
+    }
+  }
+
+  if (obj_size)
+    *obj_size = astate->size;
+  if (lastmod)
+    *lastmod = astate->mtime;
+
+  return 0;
+}
+
+int RGWSI_SysObj_Core::read(RGWSysObjectCtxBase& obj_ctx,
+                            GetObjState& read_state,
+                            RGWObjVersionTracker *objv_tracker,
+                            rgw_raw_obj& obj,
+                            bufferlist *bl, off_t ofs, off_t end,
+                            map<string, bufferlist> *attrs,
+                            boost::optional<obj_version>)
+{
+  uint64_t len;
+  librados::ObjectReadOperation op;
+
+  if (end < 0)
+    len = 0;
+  else
+    len = end - ofs + 1;
+
+  if (objv_tracker) {
+    objv_tracker->prepare_op_for_read(&op);
+  }
+
+  ldout(cct, 20) << "rados->read ofs=" << ofs << " len=" << len << dendl;
+  op.read(ofs, len, bl, nullptr);
+
+  if (attrs) {
+    op.getxattrs(attrs, nullptr);
+  }
+
+  RGWSI_RADOS::Obj rados_obj;
+  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
+  if (r < 0) {
+    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+    return r;
+  }
+  r = rados_obj.operate(&op, nullptr);
+  if (r < 0) {
+    ldout(cct, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl;
+    return r;
+  }
+  ldout(cct, 20) << "rados_obj.operate() r=" << r << " bl.length=" << bl->length() << dendl;
+
+  uint64_t op_ver = rados_obj.get_last_version();
+
+  if (read_state.last_ver > 0 &&
+      read_state.last_ver != op_ver) {
+    ldout(cct, 5) << "raced with an object write, abort" << dendl;
+    return -ECANCELED;
+  }
+
+  read_state.last_ver = op_ver;
+
+  return bl->length();
+}
+
+/**
+ * Get an attribute for a system object.
+ * obj: the object to get attr
+ * name: name of the attr to retrieve
+ * dest: bufferlist to store the result in
+ * Returns: 0 on success, -ERR# otherwise.
+ */
+int RGWSI_SysObj_Core::get_attr(rgw_raw_obj& obj,
+                                const char *name,
+                                bufferlist *dest)
+{
+  RGWSI_RADOS::Obj rados_obj;
+  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
+  if (r < 0) {
+    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+    return r;
+  }
+
+  librados::ObjectReadOperation op;
+
+  int rval;
+  op.getxattr(name, dest, &rval);
+  
+  r = rados_obj.operate(&op, nullptr);
+  if (r < 0)
+    return r;
+
+  return 0;
+}
+
+int RGWSI_SysObj_Core::omap_get_vals(rgw_raw_obj& obj,
+                                     const string& marker,
+                                     uint64_t count,
+                                     std::map<string, bufferlist> *m,
+                                     bool *pmore)
+{
+  RGWSI_RADOS::Obj rados_obj;
+  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
+  if (r < 0) {
+    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+    return r;
+  }
+
+  string start_after = marker;
+  bool more;
+
+  do {
+    librados::ObjectReadOperation op;
+
+    std::map<string, bufferlist> t;
+    int rval;
+    op.omap_get_vals2(start_after, count, &t, &more, &rval);
+  
+    r = rados_obj.operate(&op, nullptr);
+    if (r < 0) {
+      return r;
+    }
+    if (t.empty()) {
+      break;
+    }
+    count -= t.size();
+    start_after = t.rbegin()->first;
+    m->insert(t.begin(), t.end());
+  } while (more && count > 0);
+
+  if (pmore) {
+    *pmore = more;
+  }
+  return 0;
+}
+
+int RGWSI_SysObj_Core::omap_get_all(rgw_raw_obj& obj, std::map<string, bufferlist> *m)
+{
+  RGWSI_RADOS::Obj rados_obj;
+  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
+  if (r < 0) {
+    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+    return r;
+  }
+
+#define MAX_OMAP_GET_ENTRIES 1024
+  const int count = MAX_OMAP_GET_ENTRIES;
+  string start_after;
+  bool more;
+
+  do {
+    librados::ObjectReadOperation op;
+
+    std::map<string, bufferlist> t;
+    int rval;
+    op.omap_get_vals2(start_after, count, &t, &more, &rval);
+  
+    r = rados_obj.operate(&op, nullptr);
+    if (r < 0) {
+      return r;
+    }
+    if (t.empty()) {
+      break;
+    }
+    start_after = t.rbegin()->first;
+    m->insert(t.begin(), t.end());
+  } while (more);
+  return 0;
+}
+
+int RGWSI_SysObj_Core::omap_set(rgw_raw_obj& obj, const std::string& key, bufferlist& bl, bool must_exist)
+{
+  RGWSI_RADOS::Obj rados_obj;
+  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
+  if (r < 0) {
+    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+    return r;
+  }
+
+  ldout(cct, 15) << "omap_set obj=" << obj << " key=" << key << dendl;
+
+  map<string, bufferlist> m;
+  m[key] = bl;
+  librados::ObjectWriteOperation op;
+  if (must_exist)
+    op.assert_exists();
+  op.omap_set(m);
+  r = rados_obj.operate(&op);
+  return r;
+}
+
+int RGWSI_SysObj_Core::omap_set(rgw_raw_obj& obj, const std::map<std::string, bufferlist>& m, bool must_exist)
+{
+  RGWSI_RADOS::Obj rados_obj;
+  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
+  if (r < 0) {
+    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+    return r;
+  }
+
+  librados::ObjectWriteOperation op;
+  if (must_exist)
+    op.assert_exists();
+  op.omap_set(m);
+  r = rados_obj.operate(&op);
+  return r;
+}
+
+int RGWSI_SysObj_Core::omap_del(rgw_raw_obj& obj, const std::string& key)
+{
+  RGWSI_RADOS::Obj rados_obj;
+  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
+  if (r < 0) {
+    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+    return r;
+  }
+
+  set<string> k;
+  k.insert(key);
+
+  librados::ObjectWriteOperation op;
+
+  op.omap_rm_keys(k);
+
+  r = rados_obj.operate(&op);
+  return r;
+}
+
+int RGWSI_SysObj_Core::remove(RGWSysObjectCtxBase& obj_ctx,
+                         RGWObjVersionTracker *objv_tracker,
+                         rgw_raw_obj& obj)
+{
+  RGWSI_RADOS::Obj rados_obj;
+  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
+  if (r < 0) {
+    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+    return r;
+  }
+
+  librados::ObjectWriteOperation op;
+
+  if (objv_tracker) {
+    objv_tracker->prepare_op_for_write(&op);
+  }
+
+  op.remove();
+  r = rados_obj.operate(&op);
+  if (r < 0)
+    return r;
+
+  return 0;
+}
+
+int RGWSI_SysObj_Core::write(rgw_raw_obj& obj,
+                             real_time *pmtime,
+                             map<std::string, bufferlist>& attrs,
+                             bool exclusive,
+                             const bufferlist& data,
+                             RGWObjVersionTracker *objv_tracker,
+                             real_time set_mtime)
+{
+  RGWSI_RADOS::Obj rados_obj;
+  int r = get_rados_obj(zone_svc.get(), obj, &rados_obj);
+  if (r < 0) {
+    ldout(cct, 20) << "get_rados_obj() on obj=" << obj << " returned " << r << dendl;
+    return r;
+  }
+
+  librados::ObjectWriteOperation op;
+
+  if (exclusive) {
+    op.create(true); // exclusive create
+  } else {
+    op.remove();
+    op.set_op_flags2(LIBRADOS_OP_FLAG_FAILOK);
+    op.create(false);
+  }
+
+  if (objv_tracker) {
+    objv_tracker->prepare_op_for_write(&op);
+  }
+
+  if (real_clock::is_zero(set_mtime)) {
+    set_mtime = real_clock::now();
+  }
+
+  struct timespec mtime_ts = real_clock::to_timespec(set_mtime);
+  op.mtime2(&mtime_ts);
+  op.write_full(data);
+
+  bufferlist acl_bl;
+
+  for (map<string, bufferlist>::iterator iter = attrs.begin(); iter != attrs.end(); ++iter) {
+    const string& name = iter->first;
+    bufferlist& bl = iter->second;
+
+    if (!bl.length())
+      continue;
+
+    op.setxattr(name.c_str(), bl);
+  }
+
+  r = rados_obj.operate(&op);
+  if (r < 0) {
+    return r;
+  }
+
+  if (objv_tracker) {
+    objv_tracker->apply_write();
+  }
+
+  if (pmtime) {
+    *pmtime = set_mtime;
+  }
+
+  return 0;
+}
+
+
diff --git a/src/rgw/services/svc_sys_obj_core.h b/src/rgw/services/svc_sys_obj_core.h
new file mode 100644 (file)
index 0000000..b79c32b
--- /dev/null
@@ -0,0 +1,178 @@
+#ifndef CEPH_RGW_SERVICES_SYS_OBJ_CORE_H
+#define CEPH_RGW_SERVICES_SYS_OBJ_CORE_H
+
+
+#include "rgw/rgw_service.h"
+
+#include "svc_rados.h"
+
+
+class RGWSI_Zone;
+
+struct RGWSysObjState {
+  rgw_raw_obj obj;
+  bool has_attrs{false};
+  bool exists{false};
+  uint64_t size{0};
+  ceph::real_time mtime;
+  uint64_t epoch{0};
+  bufferlist obj_tag;
+  bool has_data{false};
+  bufferlist data;
+  bool prefetch_data{false};
+  uint64_t pg_ver{0};
+
+  /* important! don't forget to update copy constructor */
+
+  RGWObjVersionTracker objv_tracker;
+
+  map<string, bufferlist> attrset;
+  RGWSysObjState() {}
+  RGWSysObjState(const RGWSysObjState& rhs) : obj (rhs.obj) {
+    has_attrs = rhs.has_attrs;
+    exists = rhs.exists;
+    size = rhs.size;
+    mtime = rhs.mtime;
+    epoch = rhs.epoch;
+    if (rhs.obj_tag.length()) {
+      obj_tag = rhs.obj_tag;
+    }
+    has_data = rhs.has_data;
+    if (rhs.data.length()) {
+      data = rhs.data;
+    }
+    prefetch_data = rhs.prefetch_data;
+    pg_ver = rhs.pg_ver;
+    objv_tracker = rhs.objv_tracker;
+  }
+};
+
+class RGWSysObjectCtxBase {
+  std::map<rgw_raw_obj, RGWSysObjState> objs_state;
+  RWLock lock;
+
+public:
+  explicit RGWSysObjectCtxBase() : lock("RGWSysObjectCtxBase") {}
+
+  RGWSysObjectCtxBase(const RGWSysObjectCtxBase& rhs) : objs_state(rhs.objs_state),
+                                                  lock("RGWSysObjectCtxBase") {}
+  RGWSysObjectCtxBase(const RGWSysObjectCtxBase&& rhs) : objs_state(std::move(rhs.objs_state)),
+                                                   lock("RGWSysObjectCtxBase") {}
+
+  RGWSysObjState *get_state(const rgw_raw_obj& obj) {
+    RGWSysObjState *result;
+    std::map<rgw_raw_obj, RGWSysObjState>::iterator iter;
+    lock.get_read();
+    assert (!obj.empty());
+    iter = objs_state.find(obj);
+    if (iter != objs_state.end()) {
+      result = &iter->second;
+      lock.unlock();
+    } else {
+      lock.unlock();
+      lock.get_write();
+      result = &objs_state[obj];
+      lock.unlock();
+    }
+    return result;
+  }
+
+  void set_atomic(rgw_raw_obj& obj) {
+    RWLock::WLocker wl(lock);
+    assert (!obj.empty());
+  }
+  void set_prefetch_data(rgw_raw_obj& obj) {
+    RWLock::WLocker wl(lock);
+    assert (!obj.empty());
+    objs_state[obj].prefetch_data = true;
+  }
+  void invalidate(rgw_raw_obj& obj) {
+    RWLock::WLocker wl(lock);
+    auto iter = objs_state.find(obj);
+    if (iter == objs_state.end()) {
+      return;
+    }
+    objs_state.erase(iter);
+  }
+};
+
+class RGWSI_SysObj_Core : public RGWServiceInstance
+{
+  friend class RGWSI_SysObj;
+
+protected:
+  std::shared_ptr<RGWSI_RADOS> rados_svc;
+  std::shared_ptr<RGWSI_Zone> zone_svc;
+
+  struct GetObjState {
+    RGWSI_RADOS::Obj rados_obj;
+    bool has_rados_obj{false};
+    uint64_t last_ver{0};
+
+    GetObjState() {}
+
+    int get_rados_obj(RGWSI_RADOS *rados_svc,
+                      RGWSI_Zone *zone_svc,
+                      rgw_raw_obj& obj,
+                      RGWSI_RADOS::Obj **pobj);
+  };
+
+
+  std::map<std::string, RGWServiceInstance::dependency> get_deps() override;
+  int load(const std::string& conf, std::map<std::string, RGWServiceInstanceRef>& dep_refs) override;
+
+  virtual int get_rados_obj(RGWSI_Zone *zone_svc, rgw_raw_obj& obj, RGWSI_RADOS::Obj *pobj);
+
+  virtual int get_system_obj_state_impl(RGWSysObjectCtxBase *rctx, rgw_raw_obj& obj, RGWSysObjState **state, RGWObjVersionTracker *objv_tracker);
+  virtual int get_system_obj_state(RGWSysObjectCtxBase *rctx, rgw_raw_obj& obj, RGWSysObjState **state, RGWObjVersionTracker *objv_tracker);
+
+  virtual int raw_stat(rgw_raw_obj& obj, uint64_t *psize, real_time *pmtime, uint64_t *epoch,
+                       map<string, bufferlist> *attrs, bufferlist *first_chunk,
+                       RGWObjVersionTracker *objv_tracker);
+
+  virtual int stat(RGWSysObjectCtxBase& obj_ctx,
+                   GetObjState& state,
+                   rgw_raw_obj& obj,
+                   map<string, bufferlist> *attrs,
+                   real_time *lastmod,
+                   uint64_t *obj_size,
+                   RGWObjVersionTracker *objv_tracker);
+
+  virtual int read(RGWSysObjectCtxBase& obj_ctx,
+                   GetObjState& read_state,
+                   RGWObjVersionTracker *objv_tracker,
+                   rgw_raw_obj& obj,
+                   bufferlist *bl, off_t ofs, off_t end,
+                   map<string, bufferlist> *attrs,
+                   boost::optional<obj_version>);
+
+  virtual int get_attr(rgw_raw_obj& obj, const char *name, bufferlist *dest);
+
+  virtual int omap_get_all(rgw_raw_obj& obj, std::map<string, bufferlist> *m);
+  virtual int omap_get_vals(rgw_raw_obj& obj,
+                            const string& marker,
+                            uint64_t count,
+                            std::map<string, bufferlist> *m,
+                            bool *pmore);
+  virtual int omap_set(rgw_raw_obj& obj, const std::string& key, bufferlist& bl, bool must_exist = false);
+  virtual int omap_set(rgw_raw_obj& obj, const map<std::string, bufferlist>& m, bool must_exist = false);
+  virtual int omap_del(rgw_raw_obj& obj, const std::string& key);
+
+  virtual int remove(RGWSysObjectCtxBase& obj_ctx,
+                     RGWObjVersionTracker *objv_tracker,
+                     rgw_raw_obj& obj);
+
+  virtual int write(rgw_raw_obj& obj,
+                    real_time *pmtime,
+                    map<std::string, bufferlist>& attrs,
+                    bool exclusive,
+                    const bufferlist& data,
+                    RGWObjVersionTracker *objv_tracker,
+                    real_time set_mtime);
+
+public:
+  RGWSI_SysObj_Core(RGWService *svc, CephContext *cct): RGWServiceInstance(svc, cct) {}
+
+};
+
+#endif