]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/ReplicatedPG: block requests to cache PGs when they are full
authorSage Weil <sage@inktank.com>
Wed, 12 Feb 2014 00:25:51 +0000 (16:25 -0800)
committerSage Weil <sage@inktank.com>
Sun, 16 Feb 2014 06:09:40 +0000 (22:09 -0800)
If we are full and get a write request to a new object, put the op on a
wait list.  Wake up when the agent frees up some space.

Note that we do not block writes to existing objects.  That would be a
more aggressive strategy, but it is difficult to know up front whether we
will increase the size of the object or not, so we just leave it be.  I
suspect this strategy is "good enough".

Also note that we do not yet prioritize agent attention to PGs that most
need eviction (e.g., those that are full).

Signed-off-by: Sage Weil <sage@inktank.com>
src/osd/PG.cc
src/osd/PG.h
src/osd/ReplicatedPG.cc

index 588dd5167fa5e58a29e91d5de5fe38b73a6c0d5f..51c0b4271d27675a3ac2536dacf5757f2f3677ef 100644 (file)
@@ -1740,6 +1740,7 @@ static void split_replay_queue(
 void PG::split_ops(PG *child, unsigned split_bits) {
   unsigned match = child->info.pgid.m_seed;
   assert(waiting_for_all_missing.empty());
+  assert(waiting_for_cache_not_full.empty());
   assert(waiting_for_missing_object.empty());
   assert(waiting_for_degraded_object.empty());
   assert(waiting_for_ack.empty());
index 064abb2669863765a7e6e8d2264e07abc3aa3272..43395cb26367038ead17bce1e1f1df0866f2f6ca 100644 (file)
@@ -530,6 +530,7 @@ protected:
 
   // Ops waiting on backfill_pos to change
   list<OpRequestRef>            waiting_for_active;
+  list<OpRequestRef>            waiting_for_cache_not_full;
   list<OpRequestRef>            waiting_for_all_missing;
   map<hobject_t, list<OpRequestRef> > waiting_for_missing_object,
                             waiting_for_degraded_object,
index b9b5c5e8e2bf1a46acd6fb4e56a49fc434e6d250..da44223285c62186091452cc5046da167d2a3934 100644 (file)
@@ -1424,7 +1424,9 @@ bool ReplicatedPG::maybe_handle_cache(OpRequestRef op,
        do_cache_redirect(op, obc);
        return true;
       }
-      // FIXME: do something clever with writes.
+      dout(20) << __func__ << " cache pool full, waiting" << dendl;
+      waiting_for_cache_not_full.push_back(op);
+      return true;
     }
     if (!must_promote && can_skip_promote(op, obc)) {
       return false;
@@ -8783,10 +8785,13 @@ void ReplicatedPG::on_change(ObjectStore::Transaction *t)
       p->second.clear();
   }
 
-  if (is_primary())
+  if (is_primary()) {
+    requeue_ops(waiting_for_cache_not_full);
     requeue_ops(waiting_for_all_missing);
-  else
+  } else {
+    waiting_for_cache_not_full.clear();
     waiting_for_all_missing.clear();
+  }
 
   // this will requeue ops we were working on but didn't finish, and
   // any dups
@@ -10542,6 +10547,9 @@ bool ReplicatedPG::agent_choose_mode()
            << " -> "
            << TierAgentState::get_evict_mode_name(evict_mode)
            << dendl;
+    if (agent_state->evict_mode == TierAgentState::EVICT_MODE_FULL) {
+      requeue_ops(waiting_for_cache_not_full);
+    }
     agent_state->evict_mode = evict_mode;
     changed = true;
   }