]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: simplify pg write mode transitions
authorSage Weil <sage@newdream.net>
Wed, 27 Jan 2010 00:35:11 +0000 (16:35 -0800)
committerSage Weil <sage@newdream.net>
Wed, 27 Jan 2010 00:35:11 +0000 (16:35 -0800)
There is no reason to track client on the per-pg AccessMode, since we only
care about read/write races on a per-object state, and need to track that
per-object anyway due to the new async commit/apply model.  (Though this
is semi-broken even without that...)

Currently we will move from delayed to rmw, but there is no rmw->delayed
transition, since the logic for that is a bit more complex.. it'll be
difficult to determine when it is really a win.

In general, though, RMW is probably a better all around policy!

src/TODO
src/osd/ReplicatedPG.h

index b149f47f5c82a534958e87144522c3b85eefe1ef..1ed636879763274586a3abfad8e3eb8eb7be6241 100644 (file)
--- a/src/TODO
+++ b/src/TODO
@@ -50,6 +50,9 @@ v0.19
 
 - kclient: retry alloc on ENOMEM when reading from connection?
 
+filestore
+- need to check per-object state for conflicting reads and unapplied writes
+- use want_delayed(), want_rmw() to toggle delayed/rmw mode?  or just for rmw->delayed transition?
 
 pending wire format changes
 /- include a __u64 tid in ceph_msg_header
index d558cfbb791e005580a9d83fdb2e675de79ad2b3..0faf48fcdd2437a67cc64c6b34c9cb9424f118cf 100644 (file)
@@ -95,7 +95,6 @@ public:
     }
     state_t state;
     int num_wr;
-    entity_inst_t client;
     list<Message*> waiting;
     bool wake;
 
@@ -107,15 +106,33 @@ public:
        state = IDLE;
     }
 
-    bool try_read(entity_inst_t& c) {
+    bool want_delayed() {
       check_mode();
       switch (state) {
       case IDLE:
+       state = DELAYED;
       case DELAYED:
        return true;
       case RMW:
-       if (c == client)
-         return true;
+       state = RMW_FLUSHING;
+       return true;
+      case DELAYED_FLUSHING:
+      case RMW_FLUSHING:
+       return false;
+      default:
+       assert(0);
+      }
+    }
+    bool want_rmw() {
+      check_mode();
+      switch (state) {
+      case IDLE:
+       state = RMW;
+       return true;
+      case DELAYED:
+       state = DELAYED_FLUSHING;
+       return false;
+      case RMW:
        state = RMW_FLUSHING;
        return false;
       case DELAYED_FLUSHING:
@@ -125,22 +142,28 @@ public:
        assert(0);
       }
     }
-    bool try_write(entity_inst_t& c) {
+
+    bool try_read(entity_inst_t& c) {
       check_mode();
       switch (state) {
       case IDLE:
-       if (g_conf.filestore_journal_writeahead ||
-           g_conf.filestore_journal_parallel) {
-         state = RMW;
-         client = c;
-       } else
-         state = DELAYED;
       case DELAYED:
+      case RMW:
        return true;
+      case DELAYED_FLUSHING:
+      case RMW_FLUSHING:
+       return false;
+      default:
+       assert(0);
+      }
+    }
+    bool try_write(entity_inst_t& c) {
+      check_mode();
+      switch (state) {
+      case IDLE:
+       state = RMW;  /* default to RMW; it's a better all around policy */
+      case DELAYED:
       case RMW:
-       if (c == client)
-         return true;
-       state = RMW_FLUSHING;
        return true;
       case DELAYED_FLUSHING:
       case RMW_FLUSHING:
@@ -154,16 +177,12 @@ public:
       switch (state) {
       case IDLE:
        state = RMW;
-       client = c;
        return true;
       case DELAYED:
        state = DELAYED_FLUSHING;
        return false;
       case RMW:
-       if (c == client)
-         return true;
-       state = RMW_FLUSHING;
-       return false;
+       return true;
       case DELAYED_FLUSHING:
       case RMW_FLUSHING:
        return false;