]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
rgw: don't update bucket index multiple times in overwrite 14515/head
authorYehuda Sadeh <yehuda@redhat.com>
Sat, 21 Jan 2017 01:05:24 +0000 (17:05 -0800)
committerNathan Cutler <ncutler@suse.com>
Mon, 19 Jun 2017 17:17:16 +0000 (19:17 +0200)
Instead of this for overwrites:
 prepare (index),
   write (head) [-EEXIST]
 cancel (index)
 read (head)
 prepare (index)
   write (head)
 complete (index)

We now do:
 prepare (index),
   write (head) [-EEXIST]
   read (head)
   write (head)
 complete (index)

Signed-off-by: Yehuda Sadeh <yehuda@redhat.com>
(cherry picked from commit 7f4818f9611ea8f7b3fac7df8c5c314964f657a2)

src/rgw/rgw_rados.cc
src/rgw/rgw_rados.h

index 223e96a5a06881a58dbadc692466c7a3f6d2053e..7facdcd3bee711d3095155b59019ed770e6c7537 100644 (file)
@@ -2735,7 +2735,18 @@ RGWObjState *RGWObjectCtx::get_state(rgw_obj& obj) {
 void RGWObjectCtx::invalidate(rgw_obj& obj)
 {
   RWLock::WLocker wl(lock);
-  objs_state.erase(obj);
+  auto iter = objs_state.find(obj);
+  if (iter == objs_state.end()) {
+    return;
+  }
+  bool is_atomic = iter->second.is_atomic;
+  bool prefetch_data = iter->second.prefetch_data;
+  
+  objs_state.erase(iter);
+
+  auto& s = objs_state[obj];
+  s.is_atomic = is_atomic;
+  s.prefetch_data = prefetch_data;
 }
 
 void RGWObjectCtx::set_atomic(rgw_obj& obj) {
@@ -6295,8 +6306,10 @@ int RGWRados::swift_versioning_restore(RGWObjectCtx& obj_ctx,
  * Returns: 0 on success, -ERR# otherwise.
  */
 int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_size,
-                                           map<string, bufferlist>& attrs, bool assume_noent)
+                                           map<string, bufferlist>& attrs, bool assume_noent,
+                                           void *_index_op)
 {
+  RGWRados::Bucket::UpdateIndex *index_op = static_cast<RGWRados::Bucket::UpdateIndex *>(_index_op);
   rgw_bucket bucket;
   rgw_rados_ref ref;
   RGWRados *store = target->get_store();
@@ -6322,7 +6335,12 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si
   bool is_olh = state->is_olh;
 
   bool reset_obj = (meta.flags & PUT_OBJ_CREATE) != 0;
-  r = target->prepare_atomic_modification(op, reset_obj, meta.ptag, meta.if_match, meta.if_nomatch, false);
+
+  const string *ptag = meta.ptag;
+  if (!ptag && !index_op->get_optag()->empty()) {
+    ptag = index_op->get_optag();
+  }
+  r = target->prepare_atomic_modification(op, reset_obj, ptag, meta.if_match, meta.if_nomatch, false);
   if (r < 0)
     return r;
 
@@ -6396,7 +6414,6 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si
   if (!op.size())
     return 0;
 
-  string index_tag;
   uint64_t epoch;
   int64_t poolid;
 
@@ -6405,23 +6422,17 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si
 
   bool versioned_target = (meta.olh_epoch > 0 || !obj.get_instance().empty());
 
-  index_tag = state->write_tag;
-
   bool versioned_op = (target->versioning_enabled() || is_olh || versioned_target);
 
-  RGWBucketInfo& bucket_info = target->get_bucket_info();
-
-  RGWRados::Bucket bop(store, bucket_info);
-  RGWRados::Bucket::UpdateIndex index_op(&bop, obj, state);
-
   if (versioned_op) {
-    index_op.set_bilog_flags(RGW_BILOG_FLAG_VERSIONED_OP);
+    index_op->set_bilog_flags(RGW_BILOG_FLAG_VERSIONED_OP);
   }
 
-
-  r = index_op.prepare(CLS_RGW_OP_ADD);
-  if (r < 0)
-    return r;
+  if (!index_op->is_prepared()) {
+    r = index_op->prepare(CLS_RGW_OP_ADD, &state->write_tag);
+    if (r < 0)
+      return r;
+  }
 
   r = ref.ioctx.operate(ref.oid, &op);
   if (r < 0) { /* we can expect to get -ECANCELED if object was replaced under,
@@ -6442,7 +6453,7 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si
     ldout(store->ctx(), 0) << "ERROR: complete_atomic_modification returned r=" << r << dendl;
   }
 
-  r = index_op.complete(poolid, epoch, size, accounted_size,
+  r = index_op->complete(poolid, epoch, size, accounted_size,
                         meta.set_mtime, etag, content_type, &acl_bl,
                         meta.category, meta.remove_objs);
   if (r < 0)
@@ -6482,7 +6493,7 @@ int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_si
   return 0;
 
 done_cancel:
-  int ret = index_op.cancel();
+  int ret = index_op->cancel();
   if (ret < 0) {
     ldout(store->ctx(), 0) << "ERROR: index_op.cancel()() returned ret=" << ret << dendl;
   }
@@ -6496,10 +6507,7 @@ done_cancel:
    * should treat it as a success
    */
   if (meta.if_match == NULL && meta.if_nomatch == NULL) {
-    if (r == -ECANCELED || r == -ENOENT ||
-        (r == -EEXIST && !assume_noent)) /* if assume_noent, we want to send back error so that
-                                          * we'd be called again with assume_noent == false
-                                          */ {
+    if (r == -ECANCELED || r == -ENOENT || r == -EEXIST) {
       r = 0;
     }
   } else {
@@ -6532,16 +6540,21 @@ done_cancel:
 int RGWRados::Object::Write::write_meta(uint64_t size, uint64_t accounted_size,
                                            map<string, bufferlist>& attrs)
 {
+  RGWBucketInfo& bucket_info = target->get_bucket_info();
+
+  RGWRados::Bucket bop(target->get_store(), bucket_info);
+  RGWRados::Bucket::UpdateIndex index_op(&bop, target->get_obj());
+
   bool assume_noent = (meta.if_match == NULL && meta.if_nomatch == NULL);
   int r;
   if (assume_noent) {
-    r = _do_write_meta(size, accounted_size, attrs, assume_noent);
+    r = _do_write_meta(size, accounted_size, attrs, assume_noent, (void *)&index_op);
     if (r == -EEXIST) {
       assume_noent = false;
     }
   }
   if (!assume_noent) {
-    r = _do_write_meta(size, accounted_size, attrs, assume_noent);
+    r = _do_write_meta(size, accounted_size, attrs, assume_noent, (void *)&index_op);
   }
   return r;
 }
@@ -8307,12 +8320,12 @@ int RGWRados::Object::Delete::delete_obj()
   RGWBucketInfo& bucket_info = target->get_bucket_info();
 
   RGWRados::Bucket bop(store, bucket_info);
-  RGWRados::Bucket::UpdateIndex index_op(&bop, obj, state);
+  RGWRados::Bucket::UpdateIndex index_op(&bop, obj);
 
   index_op.set_bilog_flags(params.bilog_flags);
 
 
-  r = index_op.prepare(CLS_RGW_OP_DEL);
+  r = index_op.prepare(CLS_RGW_OP_DEL, &state->write_tag);
   if (r < 0)
     return r;
 
@@ -8418,7 +8431,7 @@ int RGWRados::delete_obj_index(rgw_obj& obj)
   }
 
   RGWRados::Bucket bop(this, bucket_info);
-  RGWRados::Bucket::UpdateIndex index_op(&bop, obj, NULL);
+  RGWRados::Bucket::UpdateIndex index_op(&bop, obj);
 
   real_time removed_mtime;
   int r = index_op.complete_del(-1 /* pool */, 0, removed_mtime, NULL);
@@ -9074,13 +9087,13 @@ int RGWRados::set_attrs(void *ctx, rgw_obj& obj,
 
   bufferlist bl;
   RGWRados::Bucket bop(this, bucket_info);
-  RGWRados::Bucket::UpdateIndex index_op(&bop, obj, state);
+  RGWRados::Bucket::UpdateIndex index_op(&bop, obj);
 
   if (state) {
     string tag;
     append_rand_alpha(cct, tag, tag, 32);
     state->write_tag = tag;
-    r = index_op.prepare(CLS_RGW_OP_ADD);
+    r = index_op.prepare(CLS_RGW_OP_ADD, &state->write_tag);
 
     if (r < 0)
       return r;
@@ -9312,7 +9325,7 @@ int RGWRados::SystemObject::Read::stat(RGWObjVersionTracker *objv_tracker)
                                 stat_params.lastmod, stat_params.obj_size, objv_tracker);
 }
 
-int RGWRados::Bucket::UpdateIndex::prepare(RGWModifyOp op)
+int RGWRados::Bucket::UpdateIndex::prepare(RGWModifyOp op, const string *write_tag)
 {
   if (blind) {
     return 0;
@@ -9325,15 +9338,20 @@ int RGWRados::Bucket::UpdateIndex::prepare(RGWModifyOp op)
     return ret;
   }
 
-  if (obj_state && obj_state->write_tag.length()) {
-    optag = string(obj_state->write_tag.c_str(), obj_state->write_tag.length());
+  if (write_tag && write_tag->length()) {
+    optag = string(write_tag->c_str(), write_tag->length());
   } else {
     if (optag.empty()) {
       append_rand_alpha(store->ctx(), optag, optag, 32);
     }
   }
 
-  return store->cls_obj_prepare_op(*bs, op, optag, obj, bilog_flags);
+  int r = store->cls_obj_prepare_op(*bs, op, optag, obj, bilog_flags);
+  if (r < 0) {
+    return r;
+  }
+  prepared = true;
+  return 0;
 }
 
 int RGWRados::Bucket::UpdateIndex::complete(int64_t poolid, uint64_t epoch,
@@ -13005,9 +13023,9 @@ int RGWRados::delete_obj_aio(rgw_obj& obj, rgw_bucket& bucket,
 
   if (keep_index_consistent) {
     RGWRados::Bucket bop(this, bucket_info);
-    RGWRados::Bucket::UpdateIndex index_op(&bop, obj, astate);
+    RGWRados::Bucket::UpdateIndex index_op(&bop, obj);
 
-    ret = index_op.prepare(CLS_RGW_OP_DEL);
+    ret = index_op.prepare(CLS_RGW_OP_DEL, &astate->write_tag);
     if (ret < 0) {
       lderr(cct) << "ERROR: failed to prepare index op with ret=" << ret << dendl;
       return ret;
index 29c6db01e7896403f8008d7619692d5acbc287bf..962c78c74589d91d1da1dd17cfb8c70ed4ddaa85 100644 (file)
@@ -2458,7 +2458,8 @@ public:
 
       int _do_write_meta(uint64_t size, uint64_t accounted_size,
                      map<std::string, bufferlist>& attrs,
-                     bool assume_noent);
+                     bool assume_noent,
+                     void *index_op);
       int write_meta(uint64_t size, uint64_t accounted_size,
                      map<std::string, bufferlist>& attrs);
       int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
@@ -2550,17 +2551,17 @@ public:
       RGWRados::Bucket *target;
       string optag;
       rgw_obj obj;
-      RGWObjState *obj_state;
-      uint16_t bilog_flags;
+      uint16_t bilog_flags{0};
       BucketShard bs;
-      bool bs_initialized;
+      bool bs_initialized{false};
       bool blind;
+      bool prepared{false};
     public:
 
-      UpdateIndex(RGWRados::Bucket *_target, rgw_obj& _obj, RGWObjState *_state) : target(_target), obj(_obj), obj_state(_state), bilog_flags(0),
-                                                                                   bs(target->get_store()), bs_initialized(false) {
-                                                                                     blind = (target->get_bucket_info().index_type == RGWBIType_Indexless);
-                                                                                   }
+      UpdateIndex(RGWRados::Bucket *_target, rgw_obj& _obj) : target(_target), obj(_obj),
+                                                              bs(target->get_store()) {
+                                                                blind = (target->get_bucket_info().index_type == RGWBIType_Indexless);
+                                                              }
 
       int get_bucket_shard(BucketShard **pbs) {
         if (!bs_initialized) {
@@ -2578,7 +2579,7 @@ public:
         bilog_flags = flags;
       }
 
-      int prepare(RGWModifyOp);
+      int prepare(RGWModifyOp, const string *write_tag);
       int complete(int64_t poolid, uint64_t epoch, uint64_t size,
                    uint64_t accounted_size, ceph::real_time& ut,
                    const string& etag, const string& content_type,
@@ -2588,6 +2589,10 @@ public:
                        ceph::real_time& removed_mtime, /* mtime of removed object */
                        list<rgw_obj_key> *remove_objs);
       int cancel();
+
+      const string *get_optag() { return &optag; }
+
+      bool is_prepared() { return prepared; }
     };
 
     struct List {