]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: add refcounting chunks for snapshotted manifest object
authormyoungwon oh <ohmyoungwon@gmail.com>
Wed, 24 Jul 2019 16:37:37 +0000 (01:37 +0900)
committermyoungwon oh <ohmyoungwon@gmail.com>
Sun, 14 Jun 2020 05:25:49 +0000 (14:25 +0900)
To prevent removing a manifest object in use (has the reference count), the chunk maps in clone is
introduced.

Signed-off-by: Myoungwon Oh <ohmyoungwon@gmail.com>
src/osd/PrimaryLogPG.cc
src/osd/PrimaryLogPG.h
src/osd/osd_types.cc
src/osd/osd_types.h

index 9738568bad576860225637f656d36480a4830889..1746ea8f1ec0e8c13c4adfa93a51d51b53aaa251 100644 (file)
@@ -2588,6 +2588,7 @@ int PrimaryLogPG::do_manifest_flush(OpRequestRef op, ObjectContextRef obc, Flush
       }();
       bufferlist in;
       if (fp_oid != tgt_soid.oid) {
+       // TODO: don't retain reference to dirty extents
        // decrement old chunk's reference count
        ObjectOperation dec_op;
        cls_cas_chunk_put_ref_op put_call;
@@ -3457,24 +3458,95 @@ void PrimaryLogPG::cancel_manifest_ops(bool requeue, vector<ceph_tid_t> *tids)
   }
 }
 
-void PrimaryLogPG::refcount_manifest(ObjectContextRef obc, object_locator_t oloc, hobject_t soid,
-                                     SnapContext snapc, bool get, RefCountCallback *cb, uint64_t offset)
+void PrimaryLogPG::dec_refcount_non_intersection(ObjectContextRef obc, const object_info_t& oi, 
+                                                 set<uint64_t> intersection_set)
+{
+  for (auto c : oi.manifest.chunk_map) {
+    auto iter = intersection_set.find(c.first);
+    if (intersection_set.end() == iter) {
+      dout(10) << __func__ << " need dereference. " << " offset: " <<  c.first 
+             << " length: " << c.second.length << " oid: " << c.second.oid << dendl;
+      object_locator_t target_oloc(c.second.oid);
+      refcount_manifest(obc, obc->obs.oi.soid, target_oloc, c.second.oid, 
+                       SnapContext(), refcount_t::DECREMENT_REF, NULL);
+    }
+  }
+}
+
+void PrimaryLogPG::dec_all_refcount_head_manifest(object_info_t& oi, OpContext* ctx)
+{
+  SnapSetContext* ssc = ctx->obc->ssc;
+  ceph_assert(oi.has_manifest());
+  ceph_assert(oi.soid.is_head());
+  // has snapshot
+  if (ssc && ssc->snapset.clones.size() > 0) {
+    dout(15) << __func__ <<  " has snapset " << dendl;
+    interval_set<uint64_t> &newest_overlap =
+      ssc->snapset.clone_overlap.rbegin()->second;
+    set<uint64_t> refs;
+    ceph_assert(oi.manifest.is_chunked());
+    ceph_assert(!oi.manifest.is_redirect());
+
+    hobject_t clone_oid = oi.soid;
+    clone_oid.snap = ssc->snapset.clone_overlap.rbegin()->first;
+    ObjectContextRef cobc = get_object_context(clone_oid, false, NULL);
+    if (!cobc) {
+      dout(0) << __func__ << ": Can not find clone obc " << clone_oid << dendl;;
+      return;
+    }
+    object_info_t& coi = cobc->obs.oi;
+    oi.manifest.build_intersection_set(coi.manifest.chunk_map, refs, &newest_overlap);
+
+    ctx->register_on_commit(
+      [oi, ctx, this, refs](){
+       dec_refcount_non_intersection(ctx->obc, oi, refs);
+    });
+    return;
+  }
+
+  // no snapshot
+  if ((oi.flags & object_info_t::FLAG_REDIRECT_HAS_REFERENCE) && oi.manifest.is_redirect()) {
+    ctx->register_on_commit(
+      [oi, ctx, this](){
+      object_locator_t target_oloc(oi.manifest.redirect_target);
+      refcount_manifest(ctx->obc, oi.soid, target_oloc, oi.manifest.redirect_target, 
+                       SnapContext(), refcount_t::DECREMENT_REF, NULL);
+    });
+  } else if (oi.manifest.is_chunked()) {
+    ctx->register_on_commit(
+      [oi, ctx, this](){
+      for (auto p : oi.manifest.chunk_map) {
+       if (p.second.has_reference()) {
+         object_locator_t target_oloc(p.second.oid);
+         refcount_manifest(ctx->obc, oi.soid, target_oloc, p.second.oid, 
+                           SnapContext(), refcount_t::DECREMENT_REF, NULL);
+       }
+      }
+    });
+  } else {
+    ceph_abort_msg("unrecognized manifest type");
+  }
+}
+
+void PrimaryLogPG::refcount_manifest(ObjectContextRef obc, hobject_t src_soid, object_locator_t oloc,
+                                    hobject_t tgt_soid, SnapContext snapc, refcount_t type, RefCountCallback* cb)
 {
   unsigned flags = CEPH_OSD_FLAG_IGNORE_CACHE | CEPH_OSD_FLAG_IGNORE_OVERLAY |
                    CEPH_OSD_FLAG_RWORDERED;                      
 
-  dout(10) << __func__ << " Start refcount for " << soid << dendl;
+  dout(10) << __func__ << " Start refcount from " << src_soid 
+          << " to " << tgt_soid << dendl;
     
   ObjectOperation obj_op;
   bufferlist in;
-  if (get) {             
+  if (type == refcount_t::INCREMENT_REF) {             
     cls_cas_chunk_get_ref_op call;
-    call.source = obc->obs.oi.soid;
+    call.source = src_soid;
     ::encode(call, in);                             
     obj_op.call("cas", "chunk_get_ref", in);
-  } else {                    
+  } else if (type == refcount_t::DECREMENT_REF) {                    
     cls_cas_chunk_put_ref_op call;
-    call.source = obc->obs.oi.soid;
+    call.source = src_soid;
     ::encode(call, in);          
     obj_op.call("cas", "chunk_put_ref", in);
   }                                                     
@@ -3487,7 +3559,7 @@ void PrimaryLogPG::refcount_manifest(ObjectContextRef obc, object_locator_t oloc
   }
 
   auto tid = osd->objecter->mutate(
-    soid.oid, oloc, obj_op, snapc,
+    tgt_soid.oid, oloc, obj_op, snapc,
     ceph::real_clock::from_ceph_timespec(obc->obs.oi.mtime),
     flags, c);
   if (cb) {
@@ -4449,8 +4521,31 @@ int PrimaryLogPG::trim_object(
     ctx->delta_stats.num_object_clones--;
     if (coi.is_cache_pinned())
       ctx->delta_stats.num_objects_pinned--;
-    if (coi.has_manifest())
+    if (coi.has_manifest()) {
+      set<uint64_t> refs;
+      for (auto p : snapset.clones) {
+       hobject_t clone_oid = coid;
+       if (clone_oid.snap == p) {
+         continue;
+       }
+       clone_oid.snap = p;
+       ObjectContextRef cobc = get_object_context(clone_oid, false, NULL);
+       if (!cobc) {
+         close_op_ctx(ctx.release());
+         dout(0) << __func__ << ": Can not find obc " << coid << dendl;;
+         return -ENOENT;
+       }
+
+       // check if the references is still used
+       object_info_t& oi = cobc->obs.oi;
+       if (oi.has_manifest()) {
+         coi.manifest.build_intersection_set(oi.manifest.chunk_map, refs, NULL);
+       }
+      }
+
+      dec_refcount_non_intersection(head_obc, coi, refs);
       ctx->delta_stats.num_objects_manifest--;
+    }
     obc->obs.exists = false;
 
     snapset.clones.erase(p);
@@ -4549,8 +4644,10 @@ int PrimaryLogPG::trim_object(
     if (oi.is_cache_pinned()) {
       ctx->delta_stats.num_objects_pinned--;
     }
-    if (coi.has_manifest())
+    if (oi.has_manifest()) {
       ctx->delta_stats.num_objects_manifest--;
+      dec_all_refcount_head_manifest(oi, ctx.get());
+    }
     head_obc->obs.exists = false;
     head_obc->obs.oi = object_info_t(head_oid);
     t->remove(head_oid);
@@ -6637,27 +6734,6 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
       result = 0;
       tracepoint(osd, do_osd_op_pre_delete, soid.oid.name.c_str(), soid.snap.val);
       {
-       if (oi.has_manifest()) {
-         if ((oi.flags & object_info_t::FLAG_REDIRECT_HAS_REFERENCE) && oi.manifest.is_redirect()) {
-           ctx->register_on_commit(
-             [oi, ctx, this](){
-             object_locator_t target_oloc(oi.manifest.redirect_target);
-             refcount_manifest(ctx->obc, target_oloc, oi.manifest.redirect_target, 
-                               SnapContext(), false, NULL, 0);
-           });
-         } else if (oi.manifest.is_chunked()) {
-           ctx->register_on_commit(
-             [oi, ctx, this](){
-             for (auto p : oi.manifest.chunk_map) {
-               if (p.second.has_reference()) {
-                 object_locator_t target_oloc(p.second.oid);
-                 refcount_manifest(ctx->obc, target_oloc, p.second.oid, 
-                                   SnapContext(), false, NULL, p.first);
-               }
-             }
-           });
-         } 
-       } 
        result = _delete_oid(ctx, false, ctx->ignore_cache);
       }
       break;
@@ -6847,8 +6923,8 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
          ctx->op_finishers[ctx->current_osd_subop_num].reset(
            new SetManifestFinisher(osd_op));
          RefCountCallback *fin = new RefCountCallback(ctx, osd_op);
-         refcount_manifest(ctx->obc, target_oloc, target, SnapContext(),
-                           true, fin, 0);
+         refcount_manifest(ctx->obc, ctx->obc->obs.oi.soid, target_oloc, target, 
+                           SnapContext(), refcount_t::INCREMENT_REF, fin);
          result = -EINPROGRESS;
        } else {
          // finish
@@ -6918,6 +6994,10 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
          result = -EOPNOTSUPP;
          break;
        }
+       if (soid.has_snapset()) {
+         result = -EOPNOTSUPP;
+         break;
+       }
 
        object_locator_t tgt_oloc;
        uint64_t src_offset, src_length, tgt_offset;
@@ -6972,8 +7052,8 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
          ctx->op_finishers[ctx->current_osd_subop_num].reset(
            new SetManifestFinisher(osd_op));
          RefCountCallback *fin = new RefCountCallback(ctx, osd_op);
-         refcount_manifest(ctx->obc, tgt_oloc, target, SnapContext(),
-                           true, fin, src_offset);
+         refcount_manifest(ctx->obc, ctx->obc->obs.oi.soid, tgt_oloc, target, 
+                           SnapContext(), refcount_t::INCREMENT_REF, fin);
          result = -EINPROGRESS;
        } else {
          if (op_finisher) {
@@ -6985,7 +7065,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
          chunk_info.set_flag(chunk_info_t::FLAG_MISSING);
          chunk_info.oid = target;
          chunk_info.offset = tgt_offset;
-         chunk_info.length= src_length;
+         chunk_info.length = src_length;
          oi.manifest.chunk_map[src_offset] = chunk_info;
          if (!oi.has_manifest() && !oi.manifest.is_chunked()) 
            ctx->delta_stats.num_objects_manifest++;
@@ -7037,15 +7117,14 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
 
          if (obs.oi.manifest.is_chunked()) {
            src_hoid = obs.oi.soid;
-           cb = new PromoteManifestCallback(ctx->obc, this, ctx);
          } else if (obs.oi.manifest.is_redirect()) {
            object_locator_t src_oloc(obs.oi.manifest.redirect_target);
            my_oloc = src_oloc;
            src_hoid = obs.oi.manifest.redirect_target;
-           cb = new PromoteManifestCallback(ctx->obc, this, ctx);
          } else {
            ceph_abort_msg("unrecognized manifest type");
          }
+         cb = new PromoteManifestCallback(ctx->obc, this, ctx);
           ctx->op_finishers[ctx->current_osd_subop_num].reset(
             new PromoteFinisher(cb));
          unsigned flags = CEPH_OSD_COPY_FROM_FLAG_IGNORE_OVERLAY |
@@ -7130,29 +7209,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
          break;
        }
 
-       if (oi.manifest.is_redirect()) {
-         if ((oi.flags & object_info_t::FLAG_REDIRECT_HAS_REFERENCE)) {
-           ctx->register_on_commit(
-             [oi, ctx, this](){
-             object_locator_t target_oloc(oi.manifest.redirect_target);
-             refcount_manifest(ctx->obc, target_oloc, oi.manifest.redirect_target, 
-                               SnapContext(), false, NULL, 0);
-           });
-         }
-       } else if (oi.manifest.is_chunked()) {
-           ctx->register_on_commit(
-             [oi, ctx, this](){
-             for (auto p : oi.manifest.chunk_map) {
-               if (p.second.flags & chunk_info_t::FLAG_HAS_REFERENCE) {
-                 object_locator_t target_oloc(p.second.oid);
-                 refcount_manifest(ctx->obc, target_oloc, p.second.oid, 
-                                   SnapContext(), false, NULL, p.first);
-               }
-             }
-           });
-       } else {
-         ceph_abort_msg("unrecognized manifest type");
-       }
+       dec_all_refcount_head_manifest(oi, ctx);
 
        oi.clear_flag(object_info_t::FLAG_MANIFEST);
        oi.manifest = object_manifest_t();
@@ -7935,6 +7992,10 @@ inline int PrimaryLogPG::_delete_oid(
     dout(20) << __func__ << " setting whiteout on " << soid << dendl;
     oi.set_flag(object_info_t::FLAG_WHITEOUT);
     ctx->delta_stats.num_whiteouts++;
+    if (oi.has_manifest()) {
+      ctx->delta_stats.num_objects_manifest--;
+      dec_all_refcount_head_manifest(oi, ctx);
+    }
     t->create(soid);
     osd->logger->inc(l_osd_tier_whiteout);
     return 0;
@@ -7954,6 +8015,7 @@ inline int PrimaryLogPG::_delete_oid(
   }
   if (oi.has_manifest()) {
     ctx->delta_stats.num_objects_manifest--;
+    dec_all_refcount_head_manifest(oi, ctx);
   }
   obs.exists = false;
   return 0;
@@ -8205,6 +8267,20 @@ void PrimaryLogPG::make_writeable(OpContext *ctx)
       if (pool.info.is_erasure())
        ctx->clone_obc->attr_cache = ctx->obc->attr_cache;
       snap_oi = &ctx->clone_obc->obs.oi;
+      if (ctx->obc->obs.oi.has_manifest()) {
+       if ((ctx->obc->obs.oi.flags & object_info_t::FLAG_REDIRECT_HAS_REFERENCE) && 
+           ctx->obc->obs.oi.manifest.is_redirect()) {
+         snap_oi->set_flag(object_info_t::FLAG_MANIFEST);
+         snap_oi->manifest.type = object_manifest_t::TYPE_REDIRECT;
+         snap_oi->manifest.redirect_target = ctx->obc->obs.oi.manifest.redirect_target;
+       } else if (ctx->obc->obs.oi.manifest.is_chunked()) {
+         snap_oi->set_flag(object_info_t::FLAG_MANIFEST);
+         snap_oi->manifest.type = object_manifest_t::TYPE_CHUNKED;
+         snap_oi->manifest.chunk_map = ctx->obc->obs.oi.manifest.chunk_map;
+       } else {
+         ceph_abort_msg("unrecognized manifest type");
+       }
+      }
       bool got = ctx->lock_manager.get_write_greedy(
        coid,
        ctx->clone_obc,
@@ -9135,10 +9211,6 @@ void PrimaryLogPG::_copy_some_manifest(ObjectContextRef obc, CopyOpRef cop, uint
     ObjectOperation op;
     op.dup(sub_cop->chunk_ops);
 
-    dout(20) << __func__ << " tgt_oid: " << soid.oid << " tgt_offset: " 
-           << manifest->chunk_map[iter->first].offset
-           << " length: " << length << " pool id: " << oloc.pool << dendl;
-
     if (cop->results.user_version) {
       op.assert_version(cop->results.user_version);
     } else {
@@ -9161,6 +9233,12 @@ void PrimaryLogPG::_copy_some_manifest(ObjectContextRef obc, CopyOpRef cop, uint
       sub_cop->results.user_version ? NULL : &sub_cop->results.user_version);
     fin->tid = tid;
     sub_cop->objecter_tid = tid;
+
+    dout(20) << __func__ << " tgt_oid: " << soid.oid << " tgt_offset: " 
+           << manifest->chunk_map[iter->first].offset
+           << " length: " << length << " pool id: " << oloc.pool 
+           << " tid: " << tid << dendl;
+
     if (last_offset < iter->first) {
       break;
     }
index ebc36e8117091d7ff1265c34e1e9f4743aeb032c..74cf9f5d4bf6cc9d7b70370c3d5785d0bc4dd9d1 100644 (file)
@@ -1476,6 +1476,10 @@ protected:
   friend struct C_ProxyWrite_Commit;
 
   // -- chunkop --
+  enum class refcount_t {
+    INCREMENT_REF,
+    DECREMENT_REF,
+  };
   void do_proxy_chunked_op(OpRequestRef op, const hobject_t& missing_oid, 
                           ObjectContextRef obc, bool write_ordered);
   void do_proxy_chunked_read(OpRequestRef op, ObjectContextRef obc, int op_index,
@@ -1494,9 +1498,12 @@ protected:
                             uint64_t last_offset);
   void handle_manifest_flush(hobject_t oid, ceph_tid_t tid, int r,
                             uint64_t offset, uint64_t last_offset, epoch_t lpr);
-  void cancel_manifest_ops(bool requeue, std::vector<ceph_tid_t> *tids);
-  void refcount_manifest(ObjectContextRef obc, object_locator_t oloc, hobject_t soid,
-                         SnapContext snapc, bool get, RefCountCallback *cb, uint64_t offset);
+  void cancel_manifest_ops(bool requeue, vector<ceph_tid_t> *tids);
+  void refcount_manifest(ObjectContextRef obc, hobject_t src_soid, object_locator_t oloc,
+                        hobject_t tgt_soid, SnapContext snapc, refcount_t type, RefCountCallback* cb);
+  void dec_all_refcount_head_manifest(object_info_t& oi, OpContext* ctx);
+  void dec_refcount_non_intersection(ObjectContextRef obc, const object_info_t& oi, 
+                                    set<uint64_t> intersection_set);
 
   friend struct C_ProxyChunkRead;
   friend class PromoteManifestCallback;
index 557a6b1a06c7e690bc27f010c18d900dc2a1e6b0..3bdd70095c7beab41ee39233352f88bb802e9c3f 100644 (file)
@@ -5810,6 +5810,23 @@ void chunk_info_t::dump(Formatter *f) const
   f->dump_unsigned("flags", flags);
 }
 
+
+bool chunk_info_t::operator==(const chunk_info_t& cit) const
+{
+  if (has_fingerprint()) {
+    if (oid.oid.name == cit.oid.oid.name) {
+      return true;
+    }
+  } else {
+    if (offset == cit.offset && length == cit.length &&
+       oid.oid.name == cit.oid.oid.name) {
+      return true;
+    }
+
+  }
+  return false;
+}
+
 ostream& operator<<(ostream& out, const chunk_info_t& ci)
 {
   return out << "(len: " << ci.length << " oid: " << ci.oid
@@ -5819,6 +5836,25 @@ ostream& operator<<(ostream& out, const chunk_info_t& ci)
 
 // -- object_manifest_t --
 
+void object_manifest_t::build_intersection_set(std::map<uint64_t, chunk_info_t>& map, 
+             set<uint64_t>& intersection, interval_set<uint64_t>* check_intersection)
+{
+  for (auto p : chunk_map) {
+    if (check_intersection) {
+      if (check_intersection->intersects(p.first, p.second.length)) {
+       intersection.insert(p.first);   
+       continue;
+      }
+    }
+
+    for (auto c : map) {
+      if (p.second == c.second) {
+       intersection.insert(p.first);
+      }
+    }
+  }
+}
+
 void object_manifest_t::encode(ceph::buffer::list& bl) const
 {
   ENCODE_START(1, 1, bl);
index 630ff3d07aa0e5c21125dad22339a3c40f891104..c7814bfd887e01acab1ba513e03096829d7815d1 100644 (file)
@@ -5485,6 +5485,7 @@ struct chunk_info_t {
   void decode(ceph::buffer::list::const_iterator &bl);
   void dump(ceph::Formatter *f) const;
   friend std::ostream& operator<<(std::ostream& out, const chunk_info_t& ci);
+  bool operator==(const chunk_info_t& cit) const;
 };
 WRITE_CLASS_ENCODER(chunk_info_t)
 std::ostream& operator<<(std::ostream& out, const chunk_info_t& ci);
@@ -5529,6 +5530,8 @@ struct object_manifest_t {
     redirect_target = hobject_t();
     chunk_map.clear();
   }
+  void build_intersection_set(std::map<uint64_t, chunk_info_t>& map, 
+               set<uint64_t>& intersection, interval_set<uint64_t>* check_intersection);
   static void generate_test_instances(std::list<object_manifest_t*>& o);
   void encode(ceph::buffer::list &bl) const;
   void decode(ceph::buffer::list::const_iterator &bl);