From 6bc1ef0aa20768e8ed3cec2b06d1abc1459b275e Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Thu, 13 Sep 2012 16:20:15 -0700 Subject: [PATCH] rgw, cls_rgw: a tool to rebuild bucket stats radosgw-admin bucket check [--fix] --bucket= The command will dump the existing bucket header stats, and the calculated bucket header stats. If --fix is provided the bucket stats will be overwritten by the recalculated stats. Signed-off-by: Yehuda Sadeh --- src/cls/rgw/cls_rgw.cc | 82 ++++++++++++++++++++ src/cls/rgw/cls_rgw_client.cc | 35 +++++++++ src/cls/rgw/cls_rgw_client.h | 5 ++ src/cls/rgw/cls_rgw_ops.h | 24 ++++++ src/rgw/rgw_admin.cc | 70 +++++++++++++---- src/rgw/rgw_rados.cc | 137 +++++++++++++++++++++------------- src/rgw/rgw_rados.h | 6 ++ 7 files changed, 293 insertions(+), 66 deletions(-) diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc index f9b7c93054d09..1a9c40076e5be 100644 --- a/src/cls/rgw/cls_rgw.cc +++ b/src/cls/rgw/cls_rgw.cc @@ -22,6 +22,8 @@ cls_handle_t h_class; cls_method_handle_t h_rgw_bucket_init_index; cls_method_handle_t h_rgw_bucket_set_tag_timeout; cls_method_handle_t h_rgw_bucket_list; +cls_method_handle_t h_rgw_bucket_check_index; +cls_method_handle_t h_rgw_bucket_rebuild_index; cls_method_handle_t h_rgw_bucket_prepare_op; cls_method_handle_t h_rgw_bucket_complete_op; cls_method_handle_t h_rgw_dir_suggest_changes; @@ -97,6 +99,84 @@ int rgw_bucket_list(cls_method_context_t hctx, bufferlist *in, bufferlist *out) return 0; } +static int check_index(cls_method_context_t hctx, struct rgw_bucket_dir_header *existing_header, struct rgw_bucket_dir_header *calc_header) +{ + bufferlist header_bl; + int rc = cls_cxx_map_read_header(hctx, &header_bl); + if (rc < 0) + return rc; + bufferlist::iterator header_iter = header_bl.begin(); + try { + ::decode(*existing_header, header_iter); + } catch (buffer::error& err) { + CLS_LOG(1, "ERROR: rgw_bucket_list(): failed to decode header\n"); + return -EINVAL; + } + + calc_header->tag_timeout = existing_header->tag_timeout; + + bufferlist bl; + + map keys; + string start_obj; + string filter_prefix; + +#define CHECK_CHUNK_SIZE 1000 + do { + rc = cls_cxx_map_get_vals(hctx, start_obj, filter_prefix, CHECK_CHUNK_SIZE, &keys); + if (rc < 0) + return rc; + + std::map::iterator kiter = keys.begin(); + for (; kiter != keys.end(); ++kiter) { + struct rgw_bucket_dir_entry entry; + bufferlist::iterator eiter = kiter->second.begin(); + try { + ::decode(entry, eiter); + } catch (buffer::error& err) { + CLS_LOG(1, "ERROR: rgw_bucket_list(): failed to decode entry, key=%s\n", kiter->first.c_str()); + return -EIO; + } + struct rgw_bucket_category_stats& stats = calc_header->stats[entry.meta.category]; + stats.num_entries++; + stats.total_size += entry.meta.size; + stats.total_size_rounded += get_rounded_size(entry.meta.size); + + start_obj = kiter->first; + } + } while (keys.size() == CHECK_CHUNK_SIZE); + + return 0; +} + +int rgw_bucket_check_index(cls_method_context_t hctx, bufferlist *in, bufferlist *out) +{ + struct rgw_cls_check_index_ret ret; + + int rc = check_index(hctx, &ret.existing_header, &ret.calculated_header); + if (rc < 0) + return rc; + + ::encode(ret, *out); + + return 0; +} + +int rgw_bucket_rebuild_index(cls_method_context_t hctx, bufferlist *in, bufferlist *out) +{ + struct rgw_bucket_dir_header existing_header; + struct rgw_bucket_dir_header calc_header; + int rc = check_index(hctx, &existing_header, &calc_header); + if (rc < 0) + return rc; + + bufferlist header_bl; + ::encode(calc_header, header_bl); + rc = cls_cxx_map_write_header(hctx, &header_bl); + return rc; +} + + int rgw_bucket_init_index(cls_method_context_t hctx, bufferlist *in, bufferlist *out) { bufferlist bl; @@ -1057,6 +1137,8 @@ void __cls_init() cls_register_cxx_method(h_class, "bucket_init_index", CLS_METHOD_RD | CLS_METHOD_WR | CLS_METHOD_PUBLIC, rgw_bucket_init_index, &h_rgw_bucket_init_index); cls_register_cxx_method(h_class, "bucket_set_tag_timeout", CLS_METHOD_RD | CLS_METHOD_WR | CLS_METHOD_PUBLIC, rgw_bucket_set_tag_timeout, &h_rgw_bucket_set_tag_timeout); cls_register_cxx_method(h_class, "bucket_list", CLS_METHOD_RD | CLS_METHOD_PUBLIC, rgw_bucket_list, &h_rgw_bucket_list); + cls_register_cxx_method(h_class, "bucket_check_index", CLS_METHOD_RD | CLS_METHOD_PUBLIC, rgw_bucket_check_index, &h_rgw_bucket_check_index); + cls_register_cxx_method(h_class, "bucket_rebuild_index", CLS_METHOD_RD | CLS_METHOD_WR | CLS_METHOD_PUBLIC, rgw_bucket_rebuild_index, &h_rgw_bucket_rebuild_index); cls_register_cxx_method(h_class, "bucket_prepare_op", CLS_METHOD_RD | CLS_METHOD_WR | CLS_METHOD_PUBLIC, rgw_bucket_prepare_op, &h_rgw_bucket_prepare_op); cls_register_cxx_method(h_class, "bucket_complete_op", CLS_METHOD_RD | CLS_METHOD_WR | CLS_METHOD_PUBLIC, rgw_bucket_complete_op, &h_rgw_bucket_complete_op); cls_register_cxx_method(h_class, "dir_suggest_changes", CLS_METHOD_RD | CLS_METHOD_WR | CLS_METHOD_PUBLIC, rgw_dir_suggest_changes, &h_rgw_dir_suggest_changes); diff --git a/src/cls/rgw/cls_rgw_client.cc b/src/cls/rgw/cls_rgw_client.cc index 8e430ae13fe54..b873a86690384 100644 --- a/src/cls/rgw/cls_rgw_client.cc +++ b/src/cls/rgw/cls_rgw_client.cc @@ -80,6 +80,41 @@ int cls_rgw_list_op(IoCtx& io_ctx, string& oid, string& start_obj, return r; } +int cls_rgw_bucket_check_index_op(IoCtx& io_ctx, string& oid, + rgw_bucket_dir_header *existing_header, + rgw_bucket_dir_header *calculated_header) +{ + bufferlist in, out; + int r = io_ctx.exec(oid, "rgw", "bucket_check_index", in, out); + if (r < 0) + return r; + + struct rgw_cls_check_index_ret ret; + try { + bufferlist::iterator iter = out.begin(); + ::decode(ret, iter); + } catch (buffer::error& err) { + return -EIO; + } + + if (existing_header) + *existing_header = ret.existing_header; + if (calculated_header) + *calculated_header = ret.calculated_header; + + return 0; +} + +int cls_rgw_bucket_rebuild_index_op(IoCtx& io_ctx, string& oid) +{ + bufferlist in, out; + int r = io_ctx.exec(oid, "rgw", "bucket_rebuild_index", in, out); + if (r < 0) + return r; + + return 0; +} + void cls_rgw_encode_suggestion(char op, rgw_bucket_dir_entry& dirent, bufferlist& updates) { updates.append(op); diff --git a/src/cls/rgw/cls_rgw_client.h b/src/cls/rgw/cls_rgw_client.h index 1e0b2a7b5ca3e..d2218b9de7b87 100644 --- a/src/cls/rgw/cls_rgw_client.h +++ b/src/cls/rgw/cls_rgw_client.h @@ -20,6 +20,11 @@ int cls_rgw_list_op(librados::IoCtx& io_ctx, string& oid, string& start_obj, string& filter_prefix, uint32_t num_entries, rgw_bucket_dir *dir, bool *is_truncated); +int cls_rgw_bucket_check_index_op(librados::IoCtx& io_ctx, string& oid, + rgw_bucket_dir_header *existing_header, + rgw_bucket_dir_header *calculated_header); +int cls_rgw_bucket_rebuild_index_op(librados::IoCtx& io_ctx, string& oid); + int cls_rgw_get_dir_header(librados::IoCtx& io_ctx, string& oid, rgw_bucket_dir_header *header); void cls_rgw_encode_suggestion(char op, rgw_bucket_dir_entry& dirent, bufferlist& updates); diff --git a/src/cls/rgw/cls_rgw_ops.h b/src/cls/rgw/cls_rgw_ops.h index a995aae217336..35c96c0a6da9c 100644 --- a/src/cls/rgw/cls_rgw_ops.h +++ b/src/cls/rgw/cls_rgw_ops.h @@ -145,6 +145,30 @@ struct rgw_cls_list_ret }; WRITE_CLASS_ENCODER(rgw_cls_list_ret) +struct rgw_cls_check_index_ret +{ + rgw_bucket_dir_header existing_header; + rgw_bucket_dir_header calculated_header; + + rgw_cls_check_index_ret() {} + + void encode(bufferlist &bl) const { + ENCODE_START(1, 1, bl); + ::encode(existing_header, bl); + ::encode(calculated_header, bl); + ENCODE_FINISH(bl); + } + void decode(bufferlist::iterator &bl) { + DECODE_START(1, bl); + ::decode(existing_header, bl); + ::decode(calculated_header, bl); + DECODE_FINISH(bl); + } + void dump(Formatter *f) const; + static void generate_test_instances(list& o); +}; +WRITE_CLASS_ENCODER(rgw_cls_check_index_ret) + struct rgw_cls_usage_log_add_op { rgw_usage_log_info info; diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index fcbeed78f4683..45756e02fb50b 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -48,6 +48,7 @@ void _usage() cerr << " bucket unlink unlink bucket from specified user\n"; cerr << " bucket stats returns bucket statistics\n"; cerr << " bucket rm remove bucket\n"; + cerr << " bucket check check bucket index\n"; cerr << " object rm remove object\n"; cerr << " pool add add an existing pool for data placement\n"; cerr << " pool rm remove an existing pool from data placement set\n"; @@ -83,6 +84,7 @@ void _usage() cerr << " --start-date=\n"; cerr << " --end-date=\n"; cerr << " --bucket-id=\n"; + cerr << " --fix besides checking bucket index, will also fix it\n"; cerr << " --format= specify output format for certain operations: xml,\n"; cerr << " json\n"; cerr << " --purge-data when specified, user removal will also purge all the\n"; @@ -133,6 +135,7 @@ enum { OPT_BUCKET_UNLINK, OPT_BUCKET_STATS, OPT_BUCKET_RM, + OPT_BUCKET_CHECK, OPT_POLICY, OPT_POOL_ADD, OPT_POOL_RM, @@ -269,6 +272,8 @@ static int get_cmd(const char *cmd, const char *prev_cmd, bool *need_more) return OPT_BUCKET_STATS; if (strcmp(cmd, "rm") == 0) return OPT_BUCKET_RM; + if (strcmp(cmd, "check") == 0) + return OPT_BUCKET_CHECK; } else if (strcmp(prev_cmd, "log") == 0) { if (strcmp(cmd, "list") == 0) return OPT_LOG_LIST; @@ -498,6 +503,24 @@ static bool validate_access_key(string& key) return true; } +static void dump_bucket_usage(map& stats, Formatter *formatter) +{ + map::iterator iter; + + formatter->open_object_section("usage"); + for (iter = stats.begin(); iter != stats.end(); ++iter) { + RGWBucketStats& s = iter->second; + const char *cat_name = rgw_obj_category_name(iter->first); + formatter->open_object_section(cat_name); + formatter->dump_int("size_kb", s.num_kb); + formatter->dump_int("size_kb_actual", s.num_kb_rounded); + formatter->dump_int("num_objects", s.num_objects); + formatter->close_section(); + formatter->flush(cout); + } + formatter->close_section(); +} + int bucket_stats(rgw_bucket& bucket, Formatter *formatter) { RGWBucketInfo bucket_info; @@ -511,7 +534,6 @@ int bucket_stats(rgw_bucket& bucket, Formatter *formatter) cerr << "error getting bucket stats ret=" << ret << std::endl; return ret; } - map::iterator iter; formatter->open_object_section("stats"); formatter->dump_string("bucket", bucket.name); formatter->dump_string("pool", bucket.pool); @@ -519,19 +541,9 @@ int bucket_stats(rgw_bucket& bucket, Formatter *formatter) formatter->dump_string("id", bucket.bucket_id); formatter->dump_string("marker", bucket.marker); formatter->dump_string("owner", bucket_info.owner); - formatter->open_object_section("usage"); - for (iter = stats.begin(); iter != stats.end(); ++iter) { - RGWBucketStats& s = iter->second; - const char *cat_name = rgw_obj_category_name(iter->first); - formatter->open_object_section(cat_name); - formatter->dump_int("size_kb", s.num_kb); - formatter->dump_int("size_kb_actual", s.num_kb_rounded); - formatter->dump_int("num_objects", s.num_objects); - formatter->close_section(); - formatter->flush(cout); - } - formatter->close_section(); + dump_bucket_usage(stats, formatter); formatter->close_section(); + return 0; } @@ -708,6 +720,7 @@ int main(int argc, char **argv) int purge_keys = false; int yes_i_really_mean_it = false; int delete_child_objects = false; + int fix = false; int max_buckets = -1; map categories; @@ -804,6 +817,8 @@ int main(int argc, char **argv) // do nothing } else if (ceph_argparse_binary_flag(args, i, &yes_i_really_mean_it, NULL, "--yes-i-really-mean-it", (char*)NULL)) { // do nothing + } else if (ceph_argparse_binary_flag(args, i, &fix, NULL, "--fix", (char*)NULL)) { + // do nothing } else { ++i; } @@ -1677,6 +1692,35 @@ next: } } + if (opt_cmd == OPT_BUCKET_CHECK) { + map existing_stats; + map calculated_stats; + + int r = store->bucket_check_index(bucket, &existing_stats, &calculated_stats); + if (r < 0) { + cerr << "failed to check index err=" << cpp_strerror(-r) << std::endl; + return r; + } + + formatter->open_object_section("check_result"); + formatter->open_object_section("existing_header"); + dump_bucket_usage(existing_stats, formatter); + formatter->close_section(); + formatter->open_object_section("calculated_header"); + dump_bucket_usage(calculated_stats, formatter); + formatter->close_section(); + formatter->close_section(); + formatter->flush(cout); + + if (fix) { + r = store->bucket_rebuild_index(bucket); + if (r < 0) { + cerr << "failed to rebuild index err=" << cpp_strerror(-r) << std::endl; + return r; + } + } + } + if (opt_cmd == OPT_BUCKET_RM) { int ret = remove_bucket(bucket, delete_child_objects); diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 0a3ceacb129cb..2ed3562f5f365 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -704,7 +704,7 @@ int RGWRados::create_bucket(string& owner, rgw_bucket& bucket, bucket.marker = buf; bucket.bucket_id = bucket.marker; - string dir_oid = dir_oid_prefix; + string dir_oid = dir_oid_prefix; dir_oid.append(bucket.marker); librados::ObjectWriteOperation op; @@ -1350,6 +1350,77 @@ int RGWRados::complete_atomic_overwrite(RGWRadosCtx *rctx, RGWObjState *state, r return ret; } +int RGWRados::open_bucket(rgw_bucket& bucket, librados::IoCtx& io_ctx, string& bucket_oid) +{ + if (bucket_is_system(bucket)) + return -EINVAL; + + int r = open_bucket_ctx(bucket, io_ctx); + if (r < 0) + return r; + + if (bucket.marker.empty()) { + ldout(cct, 0) << "ERROR: empty marker for bucket operation" << dendl; + return -EIO; + } + + bucket_oid = dir_oid_prefix; + bucket_oid.append(bucket.marker); + + return 0; +} + +static void translate_raw_stats(rgw_bucket_dir_header& header, map& stats) +{ + map::iterator iter = header.stats.begin(); + for (; iter != header.stats.end(); ++iter) { + RGWObjCategory category = (RGWObjCategory)iter->first; + RGWBucketStats& s = stats[category]; + struct rgw_bucket_category_stats& header_stats = iter->second; + s.category = (RGWObjCategory)iter->first; + s.num_kb = ((header_stats.total_size + 1023) / 1024); + s.num_kb_rounded = ((header_stats.total_size_rounded + 1023) / 1024); + s.num_objects = header_stats.num_entries; + } +} + +int RGWRados::bucket_check_index(rgw_bucket& bucket, + map *existing_stats, + map *calculated_stats) +{ + librados::IoCtx io_ctx; + string oid; + + int ret = open_bucket(bucket, io_ctx, oid); + if (ret < 0) + return ret; + + rgw_bucket_dir_header existing_header; + rgw_bucket_dir_header calculated_header; + + ret = cls_rgw_bucket_check_index_op(io_ctx, oid, &existing_header, &calculated_header); + if (ret < 0) + return ret; + + translate_raw_stats(existing_header, *existing_stats); + translate_raw_stats(calculated_header, *calculated_stats); + + return 0; +} + +int RGWRados::bucket_rebuild_index(rgw_bucket& bucket) +{ + librados::IoCtx io_ctx; + string oid; + + int ret = open_bucket(bucket, io_ctx, oid); + if (ret < 0) + return ret; + + return cls_rgw_bucket_rebuild_index_op(io_ctx, oid); +} + + int RGWRados::defer_gc(void *ctx, rgw_obj& obj) { RGWRadosCtx *rctx = (RGWRadosCtx *)ctx; @@ -2450,16 +2521,8 @@ int RGWRados::get_bucket_stats(rgw_bucket& bucket, map::iterator iter = header.stats.begin(); - for (; iter != header.stats.end(); ++iter) { - RGWObjCategory category = (RGWObjCategory)iter->first; - RGWBucketStats& s = stats[category]; - struct rgw_bucket_category_stats& stats = iter->second; - s.category = (RGWObjCategory)iter->first; - s.num_kb = ((stats.total_size + 1023) / 1024); - s.num_kb_rounded = ((stats.total_size_rounded + 1023) / 1024); - s.num_objects = stats.num_entries; - } + + translate_raw_stats(header, stats); return 0; } @@ -2727,22 +2790,13 @@ int RGWRados::cls_rgw_init_index(librados::IoCtx& io_ctx, librados::ObjectWriteO int RGWRados::cls_obj_prepare_op(rgw_bucket& bucket, uint8_t op, string& tag, string& name, string& locator) { - if (bucket_is_system(bucket)) - return 0; - - if (bucket.marker.empty()) { - ldout(cct, 0) << "ERROR: empty marker for cls_rgw bucket operation" << dendl; - return -EIO; - } - librados::IoCtx io_ctx; - int r = open_bucket_ctx(bucket, io_ctx); + string oid; + + int r = open_bucket(bucket, io_ctx, oid); if (r < 0) return r; - string oid = dir_oid_prefix; - oid.append(bucket.marker); - ObjectWriteOperation o; cls_rgw_bucket_prepare_op(o, op, tag, name, locator); r = io_ctx.operate(oid, &o); @@ -2751,22 +2805,13 @@ int RGWRados::cls_obj_prepare_op(rgw_bucket& bucket, uint8_t op, string& tag, int RGWRados::cls_obj_complete_op(rgw_bucket& bucket, uint8_t op, string& tag, uint64_t epoch, RGWObjEnt& ent, RGWObjCategory category) { - if (bucket_is_system(bucket)) - return 0; - - if (bucket.marker.empty()) { - ldout(cct, 0) << "ERROR: empty marker for cls_rgw bucket operation" << dendl; - return -EIO; - } - librados::IoCtx io_ctx; - int r = open_bucket_ctx(bucket, io_ctx); + string oid; + + int r = open_bucket(bucket, io_ctx, oid); if (r < 0) return r; - string oid = dir_oid_prefix; - oid.append(bucket.marker); - ObjectWriteOperation o; rgw_bucket_dir_entry_meta dir_meta; dir_meta.size = ent.size; @@ -2810,18 +2855,11 @@ int RGWRados::cls_bucket_list(rgw_bucket& bucket, string start, string prefix, ldout(cct, 10) << "cls_bucket_list " << bucket << " start " << start << " num " << num << dendl; librados::IoCtx io_ctx; - int r = open_bucket_ctx(bucket, io_ctx); + string oid; + int r = open_bucket(bucket, io_ctx, oid); if (r < 0) return r; - if (bucket.marker.empty()) { - ldout(cct, 0) << "ERROR: empty marker for cls_rgw bucket operation" << dendl; - return -EIO; - } - - string oid = dir_oid_prefix; - oid.append(bucket.marker); - struct rgw_bucket_dir dir; r = cls_rgw_list_op(io_ctx, oid, start, prefix, num, &dir, is_truncated); if (r < 0) @@ -2992,18 +3030,11 @@ int RGWRados::check_disk_state(librados::IoCtx io_ctx, int RGWRados::cls_bucket_head(rgw_bucket& bucket, struct rgw_bucket_dir_header& header) { librados::IoCtx io_ctx; - int r = open_bucket_ctx(bucket, io_ctx); + string oid; + int r = open_bucket(bucket, io_ctx, oid); if (r < 0) return r; - if (bucket.marker.empty()) { - ldout(cct, 0) << "ERROR: empty marker for cls_rgw bucket operation" << dendl; - return -EIO; - } - - string oid = dir_oid_prefix; - oid.append(bucket.marker); - r = cls_rgw_get_dir_header(io_ctx, oid, &header); if (r < 0) return r; diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index 8b0b48e08af75..6637e56ae2af2 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -209,6 +209,7 @@ class RGWRados int open_gc_pool_ctx(); int open_bucket_ctx(rgw_bucket& bucket, librados::IoCtx& io_ctx); + int open_bucket(rgw_bucket& bucket, librados::IoCtx& io_ctx, string& bucket_oid); struct GetObjState { librados::IoCtx io_ctx; @@ -610,6 +611,11 @@ public: int list_gc_objs(int *index, string& marker, uint32_t max, std::list& result, bool *truncated); int process_gc(); int defer_gc(void *ctx, rgw_obj& obj); + + int bucket_check_index(rgw_bucket& bucket, + map *existing_stats, + map *calculated_stats); + int bucket_rebuild_index(rgw_bucket& bucket); private: int process_intent_log(rgw_bucket& bucket, string& oid, time_t epoch, int flags, bool purge); -- 2.39.5