]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
PG: explicitely delay ops on backfill_pos
authorSamuel Just <sam.just@inktank.com>
Mon, 24 Sep 2012 21:33:17 +0000 (14:33 -0700)
committerSage Weil <sage@inktank.com>
Thu, 27 Sep 2012 20:21:53 +0000 (13:21 -0700)
Previously, we considered backfill_pos degraded in order to delay
ops since a write to backfill_pos could generate a snap before
backfill_pos, and we assume that (0, backfill_pos) is fully
backfilled.  This is a problem since it's possible that
backfill_pos is a valid object, but not one that currently exists.
For example, it might have been deleted since last_backfill was
last changed.  Instead, we will explicitly delay ops on
backfill_pos in waiting_for_backfill_pos.

This error resulted in #2691 since wait_for_degraded_object also
attempts to recover the object. At this point, the primary would
attempt to recover the object, find that it isn't there, and put
it in the missing set with need=0,0.  Eventually, recover_primary
attempts to recover that object, finds that it has been deleted
in the log, and asserts.

Signed-off-by: Samuel Just <sam.just@inktank.com>
src/osd/PG.h
src/osd/ReplicatedPG.cc
src/osd/ReplicatedPG.h

index 469d293a0dd258eb36d7bc1aef6568036c9bfb48..5390a15f910d471bad8089bbf6153ff2027d7d43 100644 (file)
@@ -634,6 +634,10 @@ protected:
 
   // pg waiters
   bool flushed;
+
+  // Ops waiting on backfill_pos to change
+  list<OpRequestRef> waiting_for_backfill_pos;
+
   list<OpRequestRef> waiting_for_map;
   list<OpRequestRef>            waiting_for_active;
   list<OpRequestRef>            waiting_for_all_missing;
index c5f4e1b5fff3cbf6f3bc08299e11ff4021c655b7..4df7872d9eac60940ecd816b3ab1e59a079ab78f 100644 (file)
@@ -140,12 +140,6 @@ bool ReplicatedPG::is_degraded_object(const hobject_t& soid)
        peer_missing[peer].missing.count(soid))
       return true;
 
-    // If soid == backfill_pos, we may implicitly write to
-    // the largest snap of soid for make_writeable.
-    if (peer == backfill_target &&
-       backfill_pos == soid)
-      return true;
-
     // Object is degraded if after last_backfill AND
     // we have are backfilling it
     if (peer == backfill_target &&
@@ -187,6 +181,16 @@ void ReplicatedPG::wait_for_degraded_object(const hobject_t& soid, OpRequestRef
   op->mark_delayed();
 }
 
+void ReplicatedPG::wait_for_backfill_pos(OpRequestRef op)
+{
+  waiting_for_backfill_pos.push_back(op);
+}
+
+void ReplicatedPG::release_waiting_for_backfill_pos()
+{
+  requeue_ops(waiting_for_backfill_pos);
+}
+
 bool PGLSParentFilter::filter(bufferlist& xattr_data, bufferlist& outdata)
 {
   bufferlist::iterator iter = xattr_data.begin();
@@ -1193,6 +1197,7 @@ void ReplicatedPG::do_scan(OpRequestRef op)
 
       backfill_pos = backfill_info.begin > peer_backfill_info.begin ?
        peer_backfill_info.begin : backfill_info.begin;
+      release_waiting_for_backfill_pos();
       dout(10) << " backfill_pos now " << backfill_pos << dendl;
 
       assert(waiting_on_backfill);
@@ -5912,6 +5917,7 @@ void ReplicatedPG::on_change()
   context_registry_on_change();
 
   // requeue object waiters
+  requeue_ops(waiting_for_backfill_pos);
   requeue_object_waiters(waiting_for_missing_object);
   for (map<hobject_t,list<OpRequestRef> >::iterator p = waiting_for_degraded_object.begin();
        p != waiting_for_degraded_object.end();
@@ -6535,6 +6541,7 @@ int ReplicatedPG::recover_backfill(int max)
     push_backfill_object(i->first, i->second.first, i->second.second, backfill_target);
   }
 
+  release_waiting_for_backfill_pos();
   dout(5) << "backfill_pos is " << backfill_pos << " and pinfo.last_backfill is "
          << pinfo.last_backfill << dendl;
   for (set<hobject_t>::iterator i = backfills_in_flight.begin();
index 9c2836e7e090c7ab4a00a59c0a062521aaad67f4..d8c2d2408e4d8cb7066a4e31d0d6f945140b06df 100644 (file)
@@ -1039,6 +1039,8 @@ public:
   bool is_missing_object(const hobject_t& oid);
   void wait_for_missing_object(const hobject_t& oid, OpRequestRef op);
   void wait_for_all_missing(OpRequestRef op);
+  void wait_for_backfill_pos(OpRequestRef op);
+  void release_waiting_for_backfill_pos();
 
   bool is_degraded_object(const hobject_t& oid);
   void wait_for_degraded_object(const hobject_t& oid, OpRequestRef op);