From: myoungwon oh Date: Mon, 14 Sep 2020 06:07:01 +0000 (+0900) Subject: osd: add tier_evict for manifest tier X-Git-Tag: v16.1.0~911^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3e429a0362ed5a4f614da142b0def1750eedf283;p=ceph.git osd: add tier_evict for manifest tier Truncate the object size to 0 if the mainfest object is composed of chunks. Signed-off-by: Myoungwon Oh --- diff --git a/src/include/rados.h b/src/include/rados.h index fa920826d42d..178717378016 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -327,6 +327,7 @@ extern const char *ceph_osd_state_name(int s); f(TIER_PROMOTE, __CEPH_OSD_OP(WR, DATA, 41), "tier-promote") \ f(UNSET_MANIFEST, __CEPH_OSD_OP(WR, DATA, 42), "unset-manifest") \ f(TIER_FLUSH, __CEPH_OSD_OP(CACHE, DATA, 43), "tier-flush") \ + f(TIER_EVICT, __CEPH_OSD_OP(CACHE, DATA, 44), "tier-evict") \ \ /** attrs **/ \ /* read */ \ diff --git a/src/include/rados/librados.hpp b/src/include/rados/librados.hpp index d4bf6af35623..a3f64b63f4ec 100644 --- a/src/include/rados/librados.hpp +++ b/src/include/rados/librados.hpp @@ -753,6 +753,11 @@ inline namespace v14_2_0 { * updates. */ void tier_flush(); + /** + * evict a manifest tier object to backing tier; will block racing + * updates. + */ + void tier_evict(); }; /* IoCtx : This is a context in which we can perform I/O. diff --git a/src/librados/librados_cxx.cc b/src/librados/librados_cxx.cc index 24810410b64c..204078367382 100644 --- a/src/librados/librados_cxx.cc +++ b/src/librados/librados_cxx.cc @@ -638,6 +638,13 @@ void librados::ObjectReadOperation::tier_flush() o->tier_flush(); } +void librados::ObjectReadOperation::tier_evict() +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->tier_evict(); +} + void librados::ObjectWriteOperation::set_redirect(const std::string& tgt_obj, const IoCtx& tgt_ioctx, uint64_t tgt_version, diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 50dd62433720..471273822800 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -2375,7 +2375,8 @@ PrimaryLogPG::cache_result_t PrimaryLogPG::maybe_handle_manifest_detail( op.op == CEPH_OSD_OP_SET_CHUNK || op.op == CEPH_OSD_OP_UNSET_MANIFEST || op.op == CEPH_OSD_OP_TIER_PROMOTE || - op.op == CEPH_OSD_OP_TIER_FLUSH) { + op.op == CEPH_OSD_OP_TIER_FLUSH || + op.op == CEPH_OSD_OP_TIER_EVICT) { return cache_result_t::NOOP; } } @@ -5655,6 +5656,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector& ops) case CEPH_OSD_OP_SET_CHUNK: case CEPH_OSD_OP_TIER_PROMOTE: case CEPH_OSD_OP_TIER_FLUSH: + case CEPH_OSD_OP_TIER_EVICT: break; default: if (op.op & CEPH_OSD_OP_MODE_WR) @@ -7038,6 +7040,48 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector& ops) break; + case CEPH_OSD_OP_TIER_EVICT: + ++ctx->num_write; + result = 0; + { + if (pool.info.is_tier()) { + result = -EINVAL; + break; + } + if (!obs.exists) { + result = -ENOENT; + break; + } + if (get_osdmap()->require_osd_release < ceph_release_t::octopus) { + result = -EOPNOTSUPP; + break; + } + if (!obs.oi.has_manifest()) { + result = -EINVAL; + break; + } + + // The chunks already has a reference, so it is just enough to invoke truncate if necessary + uint64_t chunk_length = 0; + for (auto p : obs.oi.manifest.chunk_map) { + chunk_length += p.second.length; + } + if (chunk_length == obs.oi.size) { + // truncate + for (auto p : obs.oi.manifest.chunk_map) { + p.second.set_flag(chunk_info_t::FLAG_MISSING); + } + t->truncate(obs.oi.soid, 0); + ctx->delta_stats.num_bytes -= obs.oi.size; + ctx->delta_stats.num_wr++; + oi.size = 0; + ctx->cache_operation = true; + } + osd->logger->inc(l_osd_tier_evict); + } + + break; + case CEPH_OSD_OP_UNSET_MANIFEST: ++ctx->num_write; result = 0; diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index c30fb20a8a3a..96791d0eb75e 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -1557,6 +1557,10 @@ struct ObjectOperation { add_op(CEPH_OSD_OP_TIER_FLUSH); } + void tier_evict() { + add_op(CEPH_OSD_OP_TIER_EVICT); + } + void set_alloc_hint(uint64_t expected_object_size, uint64_t expected_write_size, uint32_t flags) { diff --git a/src/tools/rados/rados.cc b/src/tools/rados/rados.cc index 504c5a014fd4..1357f8751ae4 100644 --- a/src/tools/rados/rados.cc +++ b/src/tools/rados/rados.cc @@ -132,6 +132,7 @@ void usage(ostream& out) " tier-promote promote the object to the base tier\n" " unset-manifest unset redirect or chunked object\n" " tier-flush flush the chunked object\n" +" tier-evict evict the chunked object\n" "\n" "IMPORT AND EXPORT\n" " export [filename]\n" @@ -3820,6 +3821,29 @@ static int rados_tool_common(const std::map < std::string, std::string > &opts, << cpp_strerror(ret) << std::endl; return 1; } + } else if (strcmp(nargs[0], "tier-evict") == 0) { + if (!pool_name || nargs.size() < 2) { + usage(cerr); + return 1; + } + string oid(nargs[1]); + + ObjectReadOperation op; + op.tier_evict(); + librados::AioCompletion *completion = + librados::Rados::aio_create_completion(); + io_ctx.aio_operate(oid.c_str(), completion, &op, + librados::OPERATION_IGNORE_CACHE | + librados::OPERATION_IGNORE_OVERLAY, + NULL); + completion->wait_for_complete(); + ret = completion->get_return_value(); + completion->release(); + if (ret < 0) { + cerr << "error tier-evict " << pool_name << "/" << oid << " : " + << cpp_strerror(ret) << std::endl; + return 1; + } } else if (strcmp(nargs[0], "export") == 0) { // export [filename] if (!pool_name || nargs.size() > 2) {