}
}
-void PrimaryLogPG::dec_refcount(ObjectContextRef obc, const object_info_t& oi,
- const object_ref_delta_t& refs)
+void PrimaryLogPG::dec_refcount(ObjectContextRef obc, const object_ref_delta_t& refs)
{
for (auto p = refs.begin(); p != refs.end(); ++p) {
- dout(10) << __func__ << ": decrement reference on offset oid: " << p->first << dendl;
- object_locator_t target_oloc(p->first);
- refcount_manifest(obc, obc->obs.oi.soid, target_oloc, p->first,
- SnapContext(), refcount_t::DECREMENT_REF, NULL);
+ int dec_ref_count = p->second;
+ while (dec_ref_count < 0) {
+ dout(10) << __func__ << ": decrement reference on offset oid: " << p->first << dendl;
+ refcount_manifest(obc->obs.oi.soid, p->first,
+ refcount_t::DECREMENT_REF, NULL);
+ dec_ref_count++;
+ }
}
}
-void PrimaryLogPG::dec_all_refcount_head_manifest(object_info_t& oi, OpContext* ctx, const hobject_t &coid)
+void PrimaryLogPG::dec_all_refcount_manifest(object_info_t& oi, OpContext* ctx)
{
SnapSetContext* ssc = ctx->obc->ssc;
ceph_assert(oi.has_manifest());
- ceph_assert(oi.soid.is_head());
// has snapshot
if (ssc && ssc->snapset.clones.size() > 0) {
ceph_assert(ssc);
SnapSet& snapset = ssc->snapset;
// check adjacent clones
- auto s = std::find(snapset.clones.begin(), snapset.clones.end(), coid.snap);
+ auto s = std::find(snapset.clones.begin(), snapset.clones.end(), oi.soid.snap);
int index = std::distance(snapset.clones.begin(), s);
snapid_t l = *snapset.clones.begin(), g = CEPH_NOSNAP;
object_manifest_t* manifest_g = nullptr, * manifest_l = nullptr;
- hobject_t t_oid = coid;
+ hobject_t t_oid = oi.soid;
auto get_context = [this](const hobject_t & oid)
-> ObjectContextRef {
manifest_l = &get_context(t_oid)->obs.oi.manifest;
}
- if (*s != snapset.clones.back() && s != snapset.clones.end()) {
+ if (snapset.clones.size() != 0 && s != snapset.clones.end()
+ && *s != snapset.clones.back()) {
g = snapset.clones[index + 1];
t_oid.snap = g;
manifest_g = &get_context(t_oid)->obs.oi.manifest;
} else if (*s == snapset.clones.back()) {
- manifest_g = &oi.manifest;
+ manifest_g = &get_context(oi.soid.get_head())->obs.oi.manifest;
}
- if (!manifest_g) {
- oi.manifest.calc_refs_to_drop_on_removal(manifest_g, manifest_l, refs);
- } else {
- get_context(coid)->obs.oi.manifest.calc_refs_to_drop_on_removal(manifest_g, manifest_l, refs);
- }
+
+ oi.manifest.calc_refs_to_drop_on_removal(manifest_g, manifest_l, refs);
if (!refs.is_empty()) {
ctx->register_on_commit(
- [oi, ctx, this, refs](){
- dec_refcount(ctx->obc, oi, refs);
+ [ctx, this, refs](){
+ dec_refcount(ctx->obc, refs);
});
}
return;
// no snapshot
if ((oi.flags & object_info_t::FLAG_REDIRECT_HAS_REFERENCE) && oi.manifest.is_redirect()) {
ctx->register_on_commit(
- [oi, ctx, this](){
- object_locator_t target_oloc(oi.manifest.redirect_target);
- refcount_manifest(ctx->obc, oi.soid, target_oloc, oi.manifest.redirect_target,
- SnapContext(), refcount_t::DECREMENT_REF, NULL);
+ [oi, this](){
+ refcount_manifest(oi.soid, oi.manifest.redirect_target,
+ refcount_t::DECREMENT_REF, NULL);
});
} else if (oi.manifest.is_chunked()) {
ctx->register_on_commit(
- [oi, ctx, this](){
+ [oi, this](){
for (auto p : oi.manifest.chunk_map) {
if (p.second.has_reference()) {
- object_locator_t target_oloc(p.second.oid);
- refcount_manifest(ctx->obc, oi.soid, target_oloc, p.second.oid,
- SnapContext(), refcount_t::DECREMENT_REF, NULL);
+ refcount_manifest(oi.soid, p.second.oid,
+ refcount_t::DECREMENT_REF, NULL);
}
}
});
}
}
-void PrimaryLogPG::refcount_manifest(ObjectContextRef obc, hobject_t src_soid, object_locator_t oloc,
- hobject_t tgt_soid, SnapContext snapc, refcount_t type, RefCountCallback* cb)
+void PrimaryLogPG::refcount_manifest(hobject_t src_soid, hobject_t tgt_soid, refcount_t type,
+ RefCountCallback* cb)
{
unsigned flags = CEPH_OSD_FLAG_IGNORE_CACHE | CEPH_OSD_FLAG_IGNORE_OVERLAY |
CEPH_OSD_FLAG_RWORDERED;
Context *c = nullptr;
if (cb) {
C_SetManifestRefCountDone *fin =
- new C_SetManifestRefCountDone(cb, obc->obs.oi.soid);
+ new C_SetManifestRefCountDone(cb, src_soid);
c = new C_OnFinisher(fin, osd->get_objecter_finisher(get_pg_shard()));
}
+ object_locator_t oloc(tgt_soid);
+ ObjectContextRef src_obc = get_object_context(src_soid, false, NULL);
+ ceph_assert(src_obc);
auto tid = osd->objecter->mutate(
- tgt_soid.oid, oloc, obj_op, snapc,
- ceph::real_clock::from_ceph_timespec(obc->obs.oi.mtime),
+ tgt_soid.oid, oloc, obj_op, SnapContext(),
+ ceph::real_clock::from_ceph_timespec(src_obc->obs.oi.mtime),
flags, c);
if (cb) {
- manifest_ops[obc->obs.oi.soid] = std::make_shared<ManifestOp>(cb, tid);
- obc->start_block();
+ manifest_ops[src_soid] = std::make_shared<ManifestOp>(cb, tid);
+ src_obc->start_block();
}
}
if (coi.is_cache_pinned())
ctx->delta_stats.num_objects_pinned--;
if (coi.has_manifest()) {
- dec_all_refcount_head_manifest(head_obc->obs.oi, ctx.get(), coid);
+ dec_all_refcount_manifest(coi, ctx.get());
ctx->delta_stats.num_objects_manifest--;
}
obc->obs.exists = false;
}
if (oi.has_manifest()) {
ctx->delta_stats.num_objects_manifest--;
- dec_all_refcount_head_manifest(oi, ctx.get(), hobject_t());
+ dec_all_refcount_manifest(oi, ctx.get());
}
head_obc->obs.exists = false;
head_obc->obs.oi = object_info_t(head_oid);
ctx->op_finishers[ctx->current_osd_subop_num].reset(
new SetManifestFinisher(osd_op));
RefCountCallback *fin = new RefCountCallback(ctx, osd_op);
- refcount_manifest(ctx->obc, ctx->obc->obs.oi.soid, target_oloc, target,
- SnapContext(), refcount_t::INCREMENT_REF, fin);
+ refcount_manifest(ctx->obc->obs.oi.soid, target,
+ refcount_t::INCREMENT_REF, fin);
result = -EINPROGRESS;
} else {
// finish
ctx->op_finishers[ctx->current_osd_subop_num].reset(
new SetManifestFinisher(osd_op));
RefCountCallback *fin = new RefCountCallback(ctx, osd_op);
- refcount_manifest(ctx->obc, ctx->obc->obs.oi.soid, tgt_oloc, target,
- SnapContext(), refcount_t::INCREMENT_REF, fin);
+ refcount_manifest(ctx->obc->obs.oi.soid, target,
+ refcount_t::INCREMENT_REF, fin);
result = -EINPROGRESS;
} else {
if (op_finisher) {
break;
}
- dec_all_refcount_head_manifest(oi, ctx, hobject_t());
+ dec_all_refcount_manifest(oi, ctx);
oi.clear_flag(object_info_t::FLAG_MANIFEST);
oi.manifest = object_manifest_t();
if (oi.has_manifest()) {
ctx->delta_stats.num_objects_manifest--;
- dec_all_refcount_head_manifest(oi, ctx, hobject_t());
+ dec_all_refcount_manifest(oi, ctx);
}
if (whiteout) {
void handle_manifest_flush(hobject_t oid, ceph_tid_t tid, int r,
uint64_t offset, uint64_t last_offset, epoch_t lpr);
void cancel_manifest_ops(bool requeue, vector<ceph_tid_t> *tids);
- void refcount_manifest(ObjectContextRef obc, hobject_t src_soid, object_locator_t oloc,
- hobject_t tgt_soid, SnapContext snapc, refcount_t type, RefCountCallback* cb);
- void dec_all_refcount_head_manifest(object_info_t& oi, OpContext* ctx, const hobject_t &coid);
- void dec_refcount(ObjectContextRef obc, const object_info_t& oi,
- const object_ref_delta_t& refs);
+ void refcount_manifest(hobject_t src_soid, hobject_t tgt_soid, refcount_t type,
+ RefCountCallback* cb);
+ void dec_all_refcount_manifest(object_info_t& oi, OpContext* ctx);
+ void dec_refcount(ObjectContextRef obc, const object_ref_delta_t& refs);
friend struct C_ProxyChunkRead;
friend class PromoteManifestCallback;
}
}
+TEST_F(LibRadosTwoPoolsPP, ManifestSnapRefcount2) {
+ // skip test if not yet octopus
+ if (_get_required_osd_release(cluster) < "octopus") {
+ cout << "cluster is not yet octopus, skipping test" << std::endl;
+ return;
+ }
+
+ bufferlist inbl;
+ ASSERT_EQ(0, cluster.mon_command(
+ set_pool_str(pool_name, "fingerprint_algorithm", "sha1"),
+ inbl, NULL, NULL));
+ cluster.wait_for_latest_osdmap();
+
+ // create object
+ {
+ bufferlist bl;
+ bl.append("there hiHI");
+ ObjectWriteOperation op;
+ op.write_full(bl);
+ ASSERT_EQ(0, ioctx.operate("foo", &op));
+ }
+ {
+ bufferlist bl;
+ bl.append("there hiHI");
+ ObjectWriteOperation op;
+ op.write_full(bl);
+ ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
+ }
+
+ // wait for maps to settle
+ cluster.wait_for_latest_osdmap();
+
+ // set-chunk (dedup)
+ {
+ ObjectWriteOperation op;
+ op.set_chunk(2, 2, cache_ioctx, "bar", 0,
+ CEPH_OSD_OP_FLAG_WITH_REFERENCE);
+ librados::AioCompletion *completion = cluster.aio_create_completion();
+ ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
+ completion->wait_for_complete();
+ ASSERT_EQ(0, completion->get_return_value());
+ completion->release();
+ }
+ // set-chunk (dedup)
+ {
+ ObjectWriteOperation op;
+ op.set_chunk(6, 2, cache_ioctx, "bar", 0,
+ CEPH_OSD_OP_FLAG_WITH_REFERENCE);
+ librados::AioCompletion *completion = cluster.aio_create_completion();
+ ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
+ completion->wait_for_complete();
+ ASSERT_EQ(0, completion->get_return_value());
+ completion->release();
+ }
+ // set-chunk (dedup)
+ {
+ ObjectWriteOperation op;
+ op.set_chunk(8, 2, cache_ioctx, "bar", 0,
+ CEPH_OSD_OP_FLAG_WITH_REFERENCE);
+ librados::AioCompletion *completion = cluster.aio_create_completion();
+ ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
+ completion->wait_for_complete();
+ ASSERT_EQ(0, completion->get_return_value());
+ completion->release();
+ }
+
+
+ // make all chunks dirty --> flush
+ // foo: [ab] [cd] [ef]
+
+ // make a dirty chunks
+ {
+ bufferlist bl;
+ bl.append("Thabe cd");
+ ObjectWriteOperation op;
+ op.write_full(bl);
+ ASSERT_EQ(0, ioctx.operate("foo", &op));
+ }
+ // flush
+ {
+ ObjectReadOperation op;
+ op.tier_flush();
+ librados::AioCompletion *completion = cluster.aio_create_completion();
+ ASSERT_EQ(0, ioctx.aio_operate(
+ "foo", completion, &op,
+ librados::OPERATION_IGNORE_CACHE, NULL));
+ completion->wait_for_complete();
+ ASSERT_EQ(0, completion->get_return_value());
+ completion->release();
+ }
+
+ // create a snapshot, clone
+ vector<uint64_t> my_snaps(1);
+ ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
+ ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
+ my_snaps));
+
+ // foo: [BB] [BB] [ef]
+ // make a dirty chunks
+ {
+ bufferlist bl;
+ bl.append("ThBBe BB");
+ ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
+ }
+ // flush
+ {
+ ObjectReadOperation op;
+ op.tier_flush();
+ librados::AioCompletion *completion = cluster.aio_create_completion();
+ ASSERT_EQ(0, ioctx.aio_operate(
+ "foo", completion, &op,
+ librados::OPERATION_IGNORE_CACHE, NULL));
+ completion->wait_for_complete();
+ ASSERT_EQ(0, completion->get_return_value());
+ completion->release();
+ }
+
+ // and another
+ my_snaps.resize(2);
+ my_snaps[1] = my_snaps[0];
+ ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
+ ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
+ my_snaps));
+
+ // foo: [ab] [cd] [ef]
+ // make a dirty chunks
+ {
+ bufferlist bl;
+ bl.append("Thabe cd");
+ ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
+ }
+ // flush
+ {
+ ObjectReadOperation op;
+ op.tier_flush();
+ librados::AioCompletion *completion = cluster.aio_create_completion();
+ ASSERT_EQ(0, ioctx.aio_operate(
+ "foo", completion, &op,
+ librados::OPERATION_IGNORE_CACHE, NULL));
+ completion->wait_for_complete();
+ ASSERT_EQ(0, completion->get_return_value());
+ completion->release();
+ }
+
+ /*
+ * snap[1]: [ab] [cd] [ef]
+ * snap[0]: [BB] [BB] [ef]
+ * head: [ab] [cd] [ef]
+ */
+
+ // check chunk's refcount
+ {
+ bufferlist t;
+ SHA1 sha1_gen;
+ int size = strlen("ab");
+ unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
+ char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
+ sha1_gen.Update((const unsigned char *)"ab", size);
+ sha1_gen.Final(fingerprint);
+ buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
+ cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
+ chunk_refs_t refs;
+ try {
+ auto iter = t.cbegin();
+ decode(refs, iter);
+ } catch (buffer::error& err) {
+ ASSERT_TRUE(0);
+ }
+ ASSERT_EQ(2u, refs.count());
+ }
+
+ // check chunk's refcount
+ {
+ bufferlist t;
+ SHA1 sha1_gen;
+ int size = strlen("cd");
+ unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
+ char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
+ sha1_gen.Update((const unsigned char *)"cd", size);
+ sha1_gen.Final(fingerprint);
+ buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
+ cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
+ chunk_refs_t refs;
+ try {
+ auto iter = t.cbegin();
+ decode(refs, iter);
+ } catch (buffer::error& err) {
+ ASSERT_TRUE(0);
+ }
+ ASSERT_EQ(2u, refs.count());
+ }
+
+ // check chunk's refcount
+ {
+ bufferlist t;
+ SHA1 sha1_gen;
+ int size = strlen("BB");
+ unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
+ char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
+ sha1_gen.Update((const unsigned char *)"BB", size);
+ sha1_gen.Final(fingerprint);
+ buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
+ cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
+ chunk_refs_t refs;
+ try {
+ auto iter = t.cbegin();
+ decode(refs, iter);
+ } catch (buffer::error& err) {
+ ASSERT_TRUE(0);
+ }
+ ASSERT_EQ(2u, refs.count());
+ }
+
+ // remove snap
+ ioctx.selfmanaged_snap_remove(my_snaps[0]);
+
+ /*
+ * snap[1]: [ab] [cd] [ef]
+ * head: [ab] [cd] [ef]
+ */
+
+ sleep(10);
+
+ // check chunk's refcount
+ {
+ bufferlist t;
+ SHA1 sha1_gen;
+ int size = strlen("BB");
+ unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
+ char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
+ sha1_gen.Update((const unsigned char *)"BB", size);
+ sha1_gen.Final(fingerprint);
+ buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
+ int r = cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
+ ASSERT_EQ(-ENOENT, r);
+ }
+}
+
TEST_F(LibRadosTwoPoolsPP, ManifestFlushSnap) {
// skip test if not yet octopus
if (_get_required_osd_release(cluster) < "octopus") {