From b664903f930b4adeecd9fbd89926519648837f3a Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Fri, 19 Dec 2014 15:29:27 -0800 Subject: [PATCH] rgw, cls_rgw: clean up olh artifacts when dropping last version This is still missing one piece, however, it works like this: 1. bucket index log reflects last version was removed (already did that) 2. olh entry is marked as 'pending removal' - any new versions coming in now to the olh entry will clear pending state 3. rgw conditionally removes olh object (still missing condition check) - condition checks should verify olh tag & version, and no pending modifications 4. if (2) is successful, rgw sends 'clear olh' request to bucket index 5. bucket index, if still 'pending removal' clears olh and plain entry Signed-off-by: Yehuda Sadeh --- src/cls/rgw/cls_rgw.cc | 83 ++++++++++++++++++++++++++++++++++- src/cls/rgw/cls_rgw_client.cc | 17 +++++++ src/cls/rgw/cls_rgw_client.h | 1 + src/cls/rgw/cls_rgw_ops.cc | 17 +++++++ src/cls/rgw/cls_rgw_ops.h | 25 +++++++++++ src/cls/rgw/cls_rgw_types.cc | 1 + src/cls/rgw/cls_rgw_types.h | 5 ++- src/rgw/rgw_rados.cc | 57 +++++++++++++++++++++--- src/rgw/rgw_rados.h | 1 + 9 files changed, 198 insertions(+), 9 deletions(-) diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc index 9b05da1f67a7b..7b613b964489a 100644 --- a/src/cls/rgw/cls_rgw.cc +++ b/src/cls/rgw/cls_rgw.cc @@ -33,6 +33,7 @@ cls_method_handle_t h_rgw_bucket_link_olh; cls_method_handle_t h_rgw_bucket_unlink_instance_op; cls_method_handle_t h_rgw_bucket_read_olh_log; cls_method_handle_t h_rgw_bucket_trim_olh_log; +cls_method_handle_t h_rgw_bucket_clear_olh; cls_method_handle_t h_rgw_obj_remove; cls_method_handle_t h_rgw_bi_get_op; cls_method_handle_t h_rgw_bi_put_op; @@ -1257,6 +1258,12 @@ public: olh_data_entry.exists = exists; } + bool pending_removal() { return olh_data_entry.pending_removal; } + + void set_pending_removal(bool pending_removal) { + olh_data_entry.pending_removal = pending_removal; + } + const string& get_tag() { return olh_data_entry.tag; } void set_tag(const string& tag) { olh_data_entry.tag = tag; @@ -1420,8 +1427,12 @@ static int rgw_bucket_link_olh(cls_method_context_t hctx, bufferlist *in, buffer if (olh_found) { const string& olh_tag = olh.get_tag(); if (op.olh_tag != olh_tag) { - CLS_LOG(5, "NOTICE: op.olh_tag (%s) != olh.tag (%s)", op.olh_tag.c_str(), olh_tag.c_str()); - return -ECANCELED; + if (!olh.pending_removal()) { + CLS_LOG(5, "NOTICE: op.olh_tag (%s) != olh.tag (%s)", op.olh_tag.c_str(), olh_tag.c_str()); + return -ECANCELED; + } + /* if pending removal, this is a new olh instance */ + olh.set_tag(op.olh_tag); } if (olh.exists()) { rgw_bucket_olh_entry& olh_entry = olh.get_entry(); @@ -1436,6 +1447,7 @@ static int rgw_bucket_link_olh(cls_method_context_t hctx, bufferlist *in, buffer } } } + olh.set_pending_removal(false); } else { bool instance_only = (op.key.instance.empty() && op.delete_marker); cls_rgw_obj_key key(op.key.name); @@ -1556,6 +1568,7 @@ static int rgw_bucket_unlink_instance(cls_method_context_t hctx, bufferlist *in, olh.update(next_key, false); olh.update_log(CLS_RGW_OLH_OP_UNLINK_OLH, op.op_tag, next_key, false); olh.set_exists(false); + olh.set_pending_removal(true); } } @@ -1680,6 +1693,71 @@ static int rgw_bucket_trim_olh_log(cls_method_context_t hctx, bufferlist *in, bu return 0; } +static int rgw_bucket_clear_olh(cls_method_context_t hctx, bufferlist *in, bufferlist *out) +{ + // decode request + rgw_cls_bucket_clear_olh_op op; + bufferlist::iterator iter = in->begin(); + try { + ::decode(op, iter); + } catch (buffer::error& err) { + CLS_LOG(0, "ERROR: rgw_bucket_clear_olh(): failed to decode request\n"); + return -EINVAL; + } + + if (!op.key.instance.empty()) { + CLS_LOG(1, "bad key passed in (non empty instance)"); + return -EINVAL; + } + + /* read olh entry */ + struct rgw_bucket_olh_entry olh_data_entry; + string olh_data_key; + encode_olh_data_key(op.key, &olh_data_key); + int ret = read_index_entry(hctx, olh_data_key, &olh_data_entry); + if (ret < 0 && ret != -ENOENT) { + CLS_LOG(0, "ERROR: read_index_entry() olh_key=%s ret=%d", olh_data_key.c_str(), ret); + return ret; + } + + if (olh_data_entry.tag != op.olh_tag) { + CLS_LOG(1, "NOTICE: %s(): olh_tag_mismatch olh_data_entry.tag=%s op.olh_tag=%s", __func__, olh_data_entry.tag.c_str(), op.olh_tag.c_str()); + return -ECANCELED; + } + + ret = cls_cxx_map_remove_key(hctx, olh_data_key); + if (ret < 0) { + CLS_LOG(1, "NOTICE: %s(): can't remove key %s ret=%d", __func__, olh_data_key.c_str(), ret); + return ret; + } + + rgw_bucket_dir_entry plain_entry; + + /* read plain entry, make sure it's a versioned place holder */ + ret = read_index_entry(hctx, op.key.name, &plain_entry); + if (ret == -ENOENT) { + /* we're done, no entry existing */ + return 0; + } + if (ret < 0) { + CLS_LOG(0, "ERROR: read_index_entry key=%s ret=%d", op.key.name.c_str(), ret); + return ret; + } + + if ((plain_entry.flags & RGW_BUCKET_DIRENT_FLAG_VER_MARKER) == 0) { + /* it's not a version marker, don't remove it */ + return 0; + } + + ret = cls_cxx_map_remove_key(hctx, op.key.name); + if (ret < 0) { + CLS_LOG(1, "NOTICE: %s(): can't remove key %s ret=%d", __func__, op.key.name.c_str(), ret); + return ret; + } + + return 0; +} + int rgw_dir_suggest_changes(cls_method_context_t hctx, bufferlist *in, bufferlist *out) { CLS_LOG(1, "rgw_dir_suggest_changes()"); @@ -2916,6 +2994,7 @@ void __cls_init() cls_register_cxx_method(h_class, "bucket_unlink_instance", CLS_METHOD_RD | CLS_METHOD_WR, rgw_bucket_unlink_instance, &h_rgw_bucket_unlink_instance_op); cls_register_cxx_method(h_class, "bucket_read_olh_log", CLS_METHOD_RD, rgw_bucket_read_olh_log, &h_rgw_bucket_read_olh_log); cls_register_cxx_method(h_class, "bucket_trim_olh_log", CLS_METHOD_RD | CLS_METHOD_WR, rgw_bucket_trim_olh_log, &h_rgw_bucket_trim_olh_log); + cls_register_cxx_method(h_class, "bucket_clear_olh", CLS_METHOD_RD | CLS_METHOD_WR, rgw_bucket_clear_olh, &h_rgw_bucket_clear_olh); cls_register_cxx_method(h_class, "obj_remove", CLS_METHOD_RD | CLS_METHOD_WR, rgw_obj_remove, &h_rgw_obj_remove); diff --git a/src/cls/rgw/cls_rgw_client.cc b/src/cls/rgw/cls_rgw_client.cc index e78d77a192b5e..ab8b9a2981e6e 100644 --- a/src/cls/rgw/cls_rgw_client.cc +++ b/src/cls/rgw/cls_rgw_client.cc @@ -258,6 +258,23 @@ void cls_rgw_trim_olh_log(librados::ObjectWriteOperation& op, string& oid, const op.exec("rgw", "bucket_trim_olh_log", in); } +int cls_rgw_clear_olh(IoCtx& io_ctx, string& oid, const cls_rgw_obj_key& olh, const string& olh_tag) +{ + bufferlist in, out; + struct rgw_cls_bucket_clear_olh_op call; + call.key = olh; + call.olh_tag = olh_tag; + ::encode(call, in); + librados::ObjectWriteOperation op; + int op_ret; + op.exec("rgw", "bucket_clear_olh", in, &out, &op_ret); + int r = io_ctx.operate(oid, &op); + if (r < 0) { + return r; + } + return op_ret; +} + int cls_rgw_bucket_check_index_op(IoCtx& io_ctx, string& oid, rgw_bucket_dir_header *existing_header, rgw_bucket_dir_header *calculated_header) diff --git a/src/cls/rgw/cls_rgw_client.h b/src/cls/rgw/cls_rgw_client.h index c9537dab52e88..4a44561574825 100644 --- a/src/cls/rgw/cls_rgw_client.h +++ b/src/cls/rgw/cls_rgw_client.h @@ -51,6 +51,7 @@ int cls_rgw_get_olh_log(librados::IoCtx& io_ctx, string& oid, librados::ObjectRe const string& olh_tag, map > *log, bool *is_truncated); void cls_rgw_trim_olh_log(librados::ObjectWriteOperation& op, string& oid, const cls_rgw_obj_key& olh, uint64_t ver, const string& olh_tag); +int cls_rgw_clear_olh(librados::IoCtx& io_ctx, string& oid, const cls_rgw_obj_key& olh, const string& olh_tag); int cls_rgw_bucket_check_index_op(librados::IoCtx& io_ctx, string& oid, rgw_bucket_dir_header *existing_header, diff --git a/src/cls/rgw/cls_rgw_ops.cc b/src/cls/rgw/cls_rgw_ops.cc index 00aca4a9aa836..69ffef68ff6a4 100644 --- a/src/cls/rgw/cls_rgw_ops.cc +++ b/src/cls/rgw/cls_rgw_ops.cc @@ -247,6 +247,23 @@ void rgw_cls_trim_olh_log_op::dump(Formatter *f) const ::encode_json("olh_tag", olh_tag, f); } +void rgw_cls_bucket_clear_olh_op::generate_test_instances(list& o) +{ + + rgw_cls_bucket_clear_olh_op *op = new rgw_cls_bucket_clear_olh_op; + op->key.name = "key.name"; + op->olh_tag = "olh_tag"; + + o.push_back(op); + o.push_back(new rgw_cls_bucket_clear_olh_op); +} + +void rgw_cls_bucket_clear_olh_op::dump(Formatter *f) const +{ + ::encode_json("key", key, f); + ::encode_json("olh_tag", olh_tag, f); +} + void rgw_cls_list_op::generate_test_instances(list& o) { rgw_cls_list_op *op = new rgw_cls_list_op; diff --git a/src/cls/rgw/cls_rgw_ops.h b/src/cls/rgw/cls_rgw_ops.h index c63633f3c4e35..7d50d42b1f2a7 100644 --- a/src/cls/rgw/cls_rgw_ops.h +++ b/src/cls/rgw/cls_rgw_ops.h @@ -289,6 +289,31 @@ struct rgw_cls_trim_olh_log_op }; WRITE_CLASS_ENCODER(rgw_cls_trim_olh_log_op) +struct rgw_cls_bucket_clear_olh_op { + cls_rgw_obj_key key; + string olh_tag; + + rgw_cls_bucket_clear_olh_op() {} + + void encode(bufferlist& bl) const { + ENCODE_START(1, 1, bl); + ::encode(key, bl); + ::encode(olh_tag, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::iterator& bl) { + DECODE_START(1, bl); + ::decode(key, bl); + ::decode(olh_tag, bl); + DECODE_FINISH(bl); + } + + static void generate_test_instances(list& o); + void dump(Formatter *f) const; +}; +WRITE_CLASS_ENCODER(rgw_cls_bucket_clear_olh_op) + struct rgw_cls_list_op { cls_rgw_obj_key start_obj; diff --git a/src/cls/rgw/cls_rgw_types.cc b/src/cls/rgw/cls_rgw_types.cc index 69732945b5476..cf9df7db0bddc 100644 --- a/src/cls/rgw/cls_rgw_types.cc +++ b/src/cls/rgw/cls_rgw_types.cc @@ -232,6 +232,7 @@ void rgw_bucket_olh_entry::dump(Formatter *f) const encode_json("pending_log", pending_log, f); encode_json("tag", tag, f); encode_json("exists", exists, f); + encode_json("pending_removal", pending_removal, f); } void rgw_bucket_olh_log_entry::generate_test_instances(list& o) diff --git a/src/cls/rgw/cls_rgw_types.h b/src/cls/rgw/cls_rgw_types.h index 7562c02d646cd..a36298cc1cf8f 100644 --- a/src/cls/rgw/cls_rgw_types.h +++ b/src/cls/rgw/cls_rgw_types.h @@ -419,8 +419,9 @@ struct rgw_bucket_olh_entry { map > pending_log; string tag; bool exists; + bool pending_removal; - rgw_bucket_olh_entry() : delete_marker(false), epoch(0), exists(false) {} + rgw_bucket_olh_entry() : delete_marker(false), epoch(0), exists(false), pending_removal(false) {} void encode(bufferlist &bl) const { ENCODE_START(1, 1, bl); @@ -430,6 +431,7 @@ struct rgw_bucket_olh_entry { ::encode(pending_log, bl); ::encode(tag, bl); ::encode(exists, bl); + ::encode(pending_removal, bl); ENCODE_FINISH(bl); } void decode(bufferlist::iterator &bl) { @@ -440,6 +442,7 @@ struct rgw_bucket_olh_entry { ::decode(pending_log, bl); ::decode(tag, bl); ::decode(exists, bl); + ::decode(pending_removal, bl); DECODE_FINISH(bl); } void dump(Formatter *f) const; diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 56931e27ac66e..d2be902cbea1b 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -5747,6 +5747,36 @@ int RGWRados::bucket_index_trim_olh_log(RGWObjState& state, rgw_obj& obj_instanc return 0; } +int RGWRados::bucket_index_clear_olh(RGWObjState& state, rgw_obj& obj_instance) +{ + rgw_rados_ref ref; + rgw_bucket bucket; + int r = get_obj_ref(obj_instance, &ref, &bucket); + if (r < 0) { + return r; + } + + librados::IoCtx index_ctx; + string oid; + + int ret = open_bucket_index(bucket, index_ctx, oid); + if (ret < 0) { + return ret; + } + + string olh_tag(state.olh_tag.c_str(), state.olh_tag.length()); + + cls_rgw_obj_key key(obj_instance.get_index_key_name(), string()); + + ret = cls_rgw_clear_olh(index_ctx, oid, key, olh_tag); + if (ret < 0) { + ldout(cct, 5) << "cls_rgw_clear_olh() returned ret=" << ret << dendl; + return ret; + } + + return 0; +} + int RGWRados::apply_olh_log(RGWObjectCtx& obj_ctx, RGWObjState& state, RGWBucketInfo& bucket_info, rgw_obj& obj, bufferlist& olh_tag, map >& log, uint64_t *plast_ver) @@ -5769,6 +5799,7 @@ int RGWRados::apply_olh_log(RGWObjectCtx& obj_ctx, RGWObjState& state, RGWBucket cls_rgw_obj_key key; bool delete_marker = false; list remove_instances; + bool need_to_remove = false; for (iter = log.begin(); iter != log.end(); ++iter) { vector::iterator viter = iter->second.begin(); @@ -5783,14 +5814,13 @@ int RGWRados::apply_olh_log(RGWObjectCtx& obj_ctx, RGWObjState& state, RGWBucket break; case CLS_RGW_OLH_OP_LINK_OLH: need_to_link = true; + need_to_remove = false; key = entry.key; delete_marker = entry.delete_marker; break; case CLS_RGW_OLH_OP_UNLINK_OLH: - /* treat this as linking into a delete marker */ - need_to_link = true; - key = entry.key; - delete_marker = true; + need_to_remove = true; + need_to_link = false; break; default: ldout(cct, 0) << "ERROR: apply_olh_log: invalid op: " << (int)entry.op << dendl; @@ -5809,7 +5839,9 @@ int RGWRados::apply_olh_log(RGWObjectCtx& obj_ctx, RGWObjState& state, RGWBucket return r; } - if (need_to_link) { + if (need_to_remove) { + op.remove(); + } else if (need_to_link) { rgw_obj target(bucket, key.name); target.set_instance(key.instance); RGWOLHInfo info; @@ -5835,6 +5867,9 @@ int RGWRados::apply_olh_log(RGWObjectCtx& obj_ctx, RGWObjState& state, RGWBucket /* update olh object */ r = ref.ioctx.operate(ref.oid, &op); + if (need_to_remove && (r == -ENOENT || r == -ECANCELED)) { + r = 0; + } if (r < 0) { ldout(cct, 0) << "ERROR: could not apply olh update, r=" << r << dendl; return r; @@ -5843,8 +5878,18 @@ int RGWRados::apply_olh_log(RGWObjectCtx& obj_ctx, RGWObjState& state, RGWBucket r = bucket_index_trim_olh_log(state, obj, last_ver); if (r < 0) { ldout(cct, 0) << "ERROR: could not trim olh log, r=" << r << dendl; + return r; } - return r; + + if (need_to_remove) { + r = bucket_index_clear_olh(state, obj); + if (r < 0) { + ldout(cct, 0) << "ERROR: could not clear bucket index olh entries r=" << r << dendl; + return r; + } + } + + return 0; } /* diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index 3d61b0e43d56e..f0b9436520a8f 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -1775,6 +1775,7 @@ public: int bucket_index_read_olh_log(RGWObjState& state, rgw_obj& obj_instance, uint64_t ver_marker, map > *log, bool *is_truncated); int bucket_index_trim_olh_log(RGWObjState& obj_state, rgw_obj& obj_instance, uint64_t ver); + int bucket_index_clear_olh(RGWObjState& state, rgw_obj& obj_instance); int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, RGWBucketInfo& bucket_info, rgw_obj& obj, bufferlist& obj_tag, map >& log, uint64_t *plast_ver); -- 2.39.5