From: J. Eric Ivancich Date: Fri, 21 Jan 2022 20:30:45 +0000 (-0500) Subject: rgw: configurable instrumentation on bucket index transactions X-Git-Tag: v18.0.0~1183^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=cdfc29ff5a88993afd884df2d29b086612b7bc2a;p=ceph.git rgw: configurable instrumentation on bucket index transactions In order to better understand corner cases with bucket index operations, extra instrumentation is now added and controlled by a boolean configuration variable ("rgw_bucket_index_transaction_instrumentation"). When set to true, there is extra logging during all CLS operations involving bucket index transactions. Additionally, all these log entries are tagged with "BITX" to make them easier to find in the logs. This is preferable to setting all OSD logging at a high level due to the log size issues. Signed-off-by: J. Eric Ivancich --- diff --git a/src/cls/rgw/cls_rgw.cc b/src/cls/rgw/cls_rgw.cc index 0ef26edfd25..0bdcd495332 100644 --- a/src/cls/rgw/cls_rgw.cc +++ b/src/cls/rgw/cls_rgw.cc @@ -35,6 +35,13 @@ using ceph::timespan; CLS_VER(1,0) CLS_NAME(rgw) +// special logging for bucket index transaction instrumentation; if +// instrumenting, log at level 0 and include string "BITX" in log +// message to make entries easier to find +#define CLS_LOG_BITX(is_bitx, level, fmt, ...) \ + if (is_bitx) \ + { CLS_LOG(0, "BITX: " fmt, ##__VA_ARGS__); } \ + else { CLS_LOG(level, fmt, ##__VA_ARGS__); } // No UTF-8 character can begin with 0x80, so this is a safe indicator // of a special bucket-index entry for the first byte. Note: although @@ -851,31 +858,44 @@ static int read_key_entry(cls_method_context_t hctx, const cls_rgw_obj_key& key, int rgw_bucket_prepare_op(cls_method_context_t hctx, bufferlist *in, bufferlist *out) { CLS_LOG(10, "entered %s", __func__); + + const ConfigProxy& conf = cls_get_config(hctx); + + // bucket index transaction instrumentation + const bool bitx_inst = + conf->rgw_bucket_index_transaction_instrumentation; + // decode request rgw_cls_obj_prepare_op op; auto iter = in->cbegin(); try { decode(op, iter); } catch (ceph::buffer::error& err) { - CLS_LOG(1, "ERROR: rgw_bucket_prepare_op(): failed to decode request\n"); + CLS_LOG_BITX(bitx_inst, 1, + "ERROR: %s: failed to decode request", __func__); return -EINVAL; } if (op.tag.empty()) { - CLS_LOG(1, "ERROR: tag is empty\n"); + CLS_LOG_BITX(bitx_inst, 1, "ERROR: %s: tag is empty", __func__); return -EINVAL; } - CLS_LOG(1, "rgw_bucket_prepare_op(): request: op=%d name=%s instance=%s tag=%s", - op.op, op.key.name.c_str(), op.key.instance.c_str(), op.tag.c_str()); + CLS_LOG_BITX(bitx_inst, 1, + "%s: request: op=%d name=%s instance=%s tag=%s", __func__, + op.op, op.key.name.c_str(), op.key.instance.c_str(), op.tag.c_str()); // get on-disk state - string idx; + std::string idx; rgw_bucket_dir_entry entry; int rc = read_key_entry(hctx, op.key, &idx, &entry); - if (rc < 0 && rc != -ENOENT) + if (rc < 0 && rc != -ENOENT) { + CLS_LOG_BITX(bitx_inst, 1, + "ERROR: %s could not read key entry, key=%s(%s), rc=%d", + __func__, op.key.name.c_str(), op.key.instance.c_str(), rc); return rc; + } bool noent = (rc == -ENOENT); @@ -898,7 +918,15 @@ int rgw_bucket_prepare_op(cls_method_context_t hctx, bufferlist *in, bufferlist // write out new key to disk bufferlist info_bl; encode(entry, info_bl); - return cls_cxx_map_set_val(hctx, idx, &info_bl); + rc = cls_cxx_map_set_val(hctx, idx, &info_bl); + if (rc < 0) { + CLS_LOG_BITX(bitx_inst, 1, + "ERROR: %s could not set value for key, key=%s, rc=%d", + __func__, idx.c_str(), rc); + return rc; + } + + return 0; } static void unaccount_entry(rgw_bucket_dir_header& header, @@ -1035,25 +1063,33 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist { CLS_LOG(10, "entered %s", __func__); + const ConfigProxy& conf = cls_get_config(hctx); + + // bucket index transaction instrumentation + const bool bitx_inst = + conf->rgw_bucket_index_transaction_instrumentation; + // decode request rgw_cls_obj_complete_op op; auto iter = in->cbegin(); try { decode(op, iter); } catch (ceph::buffer::error& err) { - CLS_LOG(1, "ERROR: rgw_bucket_complete_op(): failed to decode request\n"); + CLS_LOG_BITX(bitx_inst, 1, "ERROR: %s: failed to decode request", __func__); return -EINVAL; } - CLS_LOG(1, "rgw_bucket_complete_op(): request: op=%d name=%s instance=%s ver=%lu:%llu tag=%s", - op.op, op.key.name.c_str(), op.key.instance.c_str(), - (unsigned long)op.ver.pool, (unsigned long long)op.ver.epoch, - op.tag.c_str()); + CLS_LOG_BITX(bitx_inst, 1, + "%s: request: op=%d name=%s instance=%s ver=%lu:%llu tag=%s", + __func__, + op.op, op.key.name.c_str(), op.key.instance.c_str(), + (unsigned long)op.ver.pool, (unsigned long long)op.ver.epoch, + op.tag.c_str()); rgw_bucket_dir_header header; int rc = read_bucket_header(hctx, &header); if (rc < 0) { - CLS_LOG(1, "ERROR: rgw_bucket_complete_op(): failed to read header\n"); + CLS_LOG_BITX(bitx_inst, 1, "ERROR: %s: failed to read header", __func__); return -EINVAL; } @@ -1069,6 +1105,9 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist entry.locator = op.locator; ondisk = false; } else if (rc < 0) { + CLS_LOG_BITX(bitx_inst, 1, + "ERROR: %s: read key entry failed, key=%s(%s), rc=%d", + __func__, op.key.name.c_str(), op.key.instance.c_str(), rc); return rc; } @@ -1080,17 +1119,20 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist if (op.tag.size()) { auto pinter = entry.pending_map.find(op.tag); if (pinter == entry.pending_map.end()) { - CLS_LOG(1, "ERROR: couldn't find tag for pending operation\n"); + CLS_LOG_BITX(bitx_inst, 1, + "ERROR: %s: couldn't find tag for pending operation with tag:\"%s\"", + __func__, op.tag.c_str()); return -EINVAL; } entry.pending_map.erase(pinter); } if (op.tag.size() && op.op == CLS_RGW_OP_CANCEL) { - CLS_LOG(1, "rgw_bucket_complete_op(): cancel requested\n"); + CLS_LOG_BITX(bitx_inst, 1, "%s: cancel requested", __func__); } else if (op.ver.pool == entry.ver.pool && op.ver.epoch && op.ver.epoch <= entry.ver.epoch) { - CLS_LOG(1, "rgw_bucket_complete_op(): skipping request, old epoch\n"); + CLS_LOG_BITX(bitx_inst, 1, + "%s: skipping request, old epoch", __func__); op.op = CLS_RGW_OP_CANCEL; } @@ -1108,6 +1150,9 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist encode(entry, new_key_bl); rc = cls_cxx_map_set_val(hctx, idx, &new_key_bl); if (rc < 0) { + CLS_LOG_BITX(bitx_inst, 1, + "ERROR: %s: unable to set map val, key=%s, rc=%d", + __func__, idx.c_str(), rc); return rc; } } @@ -1123,6 +1168,9 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist } else if (!entry.pending_map.size()) { rc = cls_cxx_map_remove_key(hctx, idx); if (rc < 0) { + CLS_LOG_BITX(bitx_inst, 1, + "ERROR: %s: unable to remove map key, key=%s, rc=%d", + __func__, idx.c_str(), rc); return rc; } } else { @@ -1131,6 +1179,9 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist encode(entry, new_key_bl); rc = cls_cxx_map_set_val(hctx, idx, &new_key_bl); if (rc < 0) { + CLS_LOG_BITX(bitx_inst, 1, + "ERROR: %s: unable to set map val, key=%s, rc=%d", + __func__, idx.c_str(), rc); return rc; } } @@ -1154,6 +1205,9 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist encode(entry, new_key_bl); rc = cls_cxx_map_set_val(hctx, idx, &new_key_bl); if (rc < 0) { + CLS_LOG_BITX(bitx_inst, 1, + "ERROR: %s: unable to log index operation, key=%s(%s), tag=%s, rc=%d", + __func__, op.key.name.c_str(), op.key.instance.c_str(), op.tag.c_str(), rc); return rc; } } // CLS_RGW_OP_ADD @@ -1164,6 +1218,9 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist header.max_marker, op.bilog_flags, NULL, NULL, &op.zones_trace); if (rc < 0) { + CLS_LOG_BITX(bitx_inst, 0, + "%s: log_index_operation failed with rc=%d", + __func__, rc); return rc; } } @@ -1173,12 +1230,24 @@ int rgw_bucket_complete_op(cls_method_context_t hctx, bufferlist *in, bufferlist for (const auto& remove_key : op.remove_objs) { rc = complete_remove_obj(hctx, header, remove_key, default_log_op); if (rc < 0) { + CLS_LOG_BITX(bitx_inst, 1, + "%s: cls_cxx_map_remove_key, failed to remove entry, " + "name=%s instance=%s read_index_entry ret=%d", + __func__, remove_key.name.c_str(), + remove_key.instance.c_str(), rc); continue; // part cleanup errors are not fatal } } - return write_bucket_header(hctx, &header); -} // rgw_bucket_complete_op + rc = write_bucket_header(hctx, &header); + if (rc < 0) { + CLS_LOG_BITX(bitx_inst, 0, + "ERROR: %s: failed to write bucket header ret=%d", + __func__, rc); + } + + return rc; +} template static int write_entry(cls_method_context_t hctx, T& entry, const string& key) @@ -2160,13 +2229,17 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx, const ConfigProxy& conf = cls_get_config(hctx); + // bucket index transaction instrumentation + const bool bitx_inst = + conf->rgw_bucket_index_transaction_instrumentation; + bufferlist header_bl; rgw_bucket_dir_header header; bool header_changed = false; int rc = read_bucket_header(hctx, &header); if (rc < 0) { - CLS_LOG(1, "ERROR: rgw_dir_suggest_changes(): failed to read header\n"); + CLS_LOG_BITX(bitx_inst, 1, "ERROR: %s: failed to read header", __func__); return rc; } @@ -2193,7 +2266,8 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx, decode(op, in_iter); decode(cur_change, in_iter); } catch (ceph::buffer::error& err) { - CLS_LOG(1, "ERROR: rgw_dir_suggest_changes(): failed to decode request\n"); + CLS_LOG_BITX(bitx_inst, 1, + "ERROR: %s: failed to decode request", __func__); return -EINVAL; } @@ -2201,10 +2275,17 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx, string cur_change_key; encode_obj_index_key(cur_change.key, &cur_change_key); int ret = cls_cxx_map_get_val(hctx, cur_change_key, &cur_disk_bl); - if (ret < 0 && ret != -ENOENT) + if (ret < 0 && ret != -ENOENT) { + CLS_LOG_BITX(bitx_inst, 20, + "ERROR: %s: accessing map, key=%s error=%d", __func__, + cur_change_key.c_str(), ret); return -EINVAL; + } if (ret == -ENOENT) { + CLS_LOG_BITX(bitx_inst, 20, + "WARNING: %s: accessing map, key not found key=%s, continuing", + __func__, cur_change_key.c_str()); continue; } @@ -2213,7 +2294,8 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx, try { decode(cur_disk, cur_disk_iter); } catch (ceph::buffer::error& error) { - CLS_LOG(1, "ERROR: rgw_dir_suggest_changes(): failed to decode cur_disk\n"); + CLS_LOG_BITX(bitx_inst, 1, "ERROR: %s: failed to decode cur_disk", + __func__); return -EINVAL; } @@ -2222,18 +2304,25 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx, real_time cur_time = real_clock::now(); auto iter = cur_disk.pending_map.begin(); while(iter != cur_disk.pending_map.end()) { - auto cur_iter = iter++; + auto cur_iter = iter++; // IMPORTANT, cur_iter might be invalidated if (cur_time > (cur_iter->second.timestamp + timespan(tag_timeout))) { + CLS_LOG_BITX(bitx_inst, 0, + "WARNING: %s: expired pending map entry for \"%s\" " + "(pending_state=%d, op=%s) expired and was removed", + __func__, + cur_iter->first.c_str(), + cur_iter->second.state, + std::string(to_string((RGWModifyOp) iter->second.op)).c_str()); cur_disk.pending_map.erase(cur_iter); } - } - } + } // while + } // if - CLS_LOG(20, "cur_disk.pending_map.empty()=%d op=%d cur_disk.exists=%d " - "cur_disk.index_ver=%d cur_change.exists=%d cur_change.index_ver=%d", - cur_disk.pending_map.empty(), (int)op, cur_disk.exists, - (int)cur_disk.index_ver, cur_change.exists, - (int)cur_change.index_ver); + CLS_LOG_BITX(bitx_inst, 20, "cur_disk.pending_map.empty()=%d op=%d cur_disk.exists=%d " + "cur_disk.index_ver=%d cur_change.exists=%d cur_change.index_ver=%d", + cur_disk.pending_map.empty(), (int)op, cur_disk.exists, + (int)cur_disk.index_ver, cur_change.exists, + (int)cur_change.index_ver); if (cur_change.index_ver < cur_disk.index_ver) { // a pending on-disk entry was completed since this suggestion was made, @@ -2245,7 +2334,7 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx, if (cur_disk.pending_map.empty()) { if (cur_disk.exists) { rgw_bucket_category_stats& old_stats = header.stats[cur_disk.meta.category]; - CLS_LOG(10, "total_entries: %" PRId64 " -> %" PRId64 "", old_stats.num_entries, old_stats.num_entries - 1); + CLS_LOG_BITX(bitx_inst, 10, "total_entries: %" PRId64 " -> %" PRId64 "\n", old_stats.num_entries, old_stats.num_entries - 1); old_stats.num_entries--; old_stats.total_size -= cur_disk.meta.accounted_size; old_stats.total_size_rounded -= cls_rgw_get_rounded_size(cur_disk.meta.accounted_size); @@ -2257,22 +2346,29 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx, op &= CEPH_RGW_DIR_SUGGEST_OP_MASK; switch(op) { case CEPH_RGW_REMOVE: - CLS_LOG(10, "CEPH_RGW_REMOVE name=%s instance=%s", cur_change.key.name.c_str(), cur_change.key.instance.c_str()); + CLS_LOG_BITX(bitx_inst, 10, + "CEPH_RGW_REMOVE name=%s instance=%s", + cur_change.key.name.c_str(), cur_change.key.instance.c_str()); ret = cls_cxx_map_remove_key(hctx, cur_change_key); - if (ret < 0) + if (ret < 0) { + CLS_LOG_BITX(bitx_inst, 0, "ERROR: %s: unable to remove key, key=%s, error=%d", + __func__, cur_change_key.c_str(), ret); return ret; + } if (log_op && cur_disk.exists && !header.syncstopped) { ret = log_index_operation(hctx, cur_disk.key, CLS_RGW_OP_DEL, cur_disk.tag, cur_disk.meta.mtime, cur_disk.ver, CLS_RGW_STATE_COMPLETE, header.ver, header.max_marker, 0, NULL, NULL, NULL); if (ret < 0) { - CLS_LOG(0, "ERROR: %s: failed to log operation ret=%d", __func__, ret); + CLS_LOG_BITX(bitx_inst, 0, "ERROR: %s: failed to log operation ret=%d", + __func__, ret); return ret; } } break; case CEPH_RGW_UPDATE: - CLS_LOG(10, "CEPH_RGW_UPDATE name=%s instance=%s total_entries: %" PRId64 " -> %" PRId64 "", - cur_change.key.name.c_str(), cur_change.key.instance.c_str(), stats.num_entries, stats.num_entries + 1); + CLS_LOG_BITX(bitx_inst, 10, + "CEPH_RGW_UPDATE name=%s instance=%s total_entries: %" PRId64 " -> %" PRId64 "\n", + cur_change.key.name.c_str(), cur_change.key.instance.c_str(), stats.num_entries, stats.num_entries + 1); stats.num_entries++; stats.total_size += cur_change.meta.accounted_size; @@ -2283,13 +2379,16 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx, bufferlist cur_state_bl; encode(cur_change, cur_state_bl); ret = cls_cxx_map_set_val(hctx, cur_change_key, &cur_state_bl); - if (ret < 0) + if (ret < 0) { + CLS_LOG_BITX(bitx_inst, 0, "ERROR: %s: unable to set value for key, key=%s, error=%d", + __func__, cur_change_key.c_str(), ret); return ret; + } if (log_op && !header.syncstopped) { ret = log_index_operation(hctx, cur_change.key, CLS_RGW_OP_ADD, cur_change.tag, cur_change.meta.mtime, cur_change.ver, CLS_RGW_STATE_COMPLETE, header.ver, header.max_marker, 0, NULL, NULL, NULL); if (ret < 0) { - CLS_LOG(0, "ERROR: %s: failed to log operation ret=%d", __func__, ret); + CLS_LOG_BITX(bitx_inst, 0, "ERROR: %s: failed to log operation ret=%d", __func__, ret); return ret; } } @@ -2299,7 +2398,13 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx, } // while (!in_iter.end()) if (header_changed) { - return write_bucket_header(hctx, &header); + int ret = write_bucket_header(hctx, &header); + if (ret < 0) { + CLS_LOG_BITX(bitx_inst, 0, + "ERROR: %s: failed to write bucket header ret=%d", + __func__, ret); + } + return ret; } return 0; diff --git a/src/common/options/rgw.yaml.in b/src/common/options/rgw.yaml.in index 39c5d3fb673..1e17bc04a2d 100644 --- a/src/common/options/rgw.yaml.in +++ b/src/common/options/rgw.yaml.in @@ -3317,3 +3317,12 @@ options: - rgw - osd with_legacy: true +- name: rgw_bucket_index_transaction_instrumentation + type: bool + level: dev + default: false + desc: Turns on extra instrumentation surrounding bucket index transactions. + services: + - rgw + - osd + with_legacy: true