From 8b42318b89123da536691021b065e20b2b13645c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 26 Jan 2010 16:35:11 -0800 Subject: [PATCH] osd: simplify pg write mode transitions There is no reason to track client on the per-pg AccessMode, since we only care about read/write races on a per-object state, and need to track that per-object anyway due to the new async commit/apply model. (Though this is semi-broken even without that...) Currently we will move from delayed to rmw, but there is no rmw->delayed transition, since the logic for that is a bit more complex.. it'll be difficult to determine when it is really a win. In general, though, RMW is probably a better all around policy! --- src/TODO | 3 +++ src/osd/ReplicatedPG.h | 57 ++++++++++++++++++++++++++++-------------- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/src/TODO b/src/TODO index b149f47f5c82a..1ed6368797632 100644 --- a/src/TODO +++ b/src/TODO @@ -50,6 +50,9 @@ v0.19 - kclient: retry alloc on ENOMEM when reading from connection? +filestore +- need to check per-object state for conflicting reads and unapplied writes +- use want_delayed(), want_rmw() to toggle delayed/rmw mode? or just for rmw->delayed transition? pending wire format changes /- include a __u64 tid in ceph_msg_header diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index d558cfbb791e0..0faf48fcdd243 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -95,7 +95,6 @@ public: } state_t state; int num_wr; - entity_inst_t client; list waiting; bool wake; @@ -107,15 +106,33 @@ public: state = IDLE; } - bool try_read(entity_inst_t& c) { + bool want_delayed() { check_mode(); switch (state) { case IDLE: + state = DELAYED; case DELAYED: return true; case RMW: - if (c == client) - return true; + state = RMW_FLUSHING; + return true; + case DELAYED_FLUSHING: + case RMW_FLUSHING: + return false; + default: + assert(0); + } + } + bool want_rmw() { + check_mode(); + switch (state) { + case IDLE: + state = RMW; + return true; + case DELAYED: + state = DELAYED_FLUSHING; + return false; + case RMW: state = RMW_FLUSHING; return false; case DELAYED_FLUSHING: @@ -125,22 +142,28 @@ public: assert(0); } } - bool try_write(entity_inst_t& c) { + + bool try_read(entity_inst_t& c) { check_mode(); switch (state) { case IDLE: - if (g_conf.filestore_journal_writeahead || - g_conf.filestore_journal_parallel) { - state = RMW; - client = c; - } else - state = DELAYED; case DELAYED: + case RMW: return true; + case DELAYED_FLUSHING: + case RMW_FLUSHING: + return false; + default: + assert(0); + } + } + bool try_write(entity_inst_t& c) { + check_mode(); + switch (state) { + case IDLE: + state = RMW; /* default to RMW; it's a better all around policy */ + case DELAYED: case RMW: - if (c == client) - return true; - state = RMW_FLUSHING; return true; case DELAYED_FLUSHING: case RMW_FLUSHING: @@ -154,16 +177,12 @@ public: switch (state) { case IDLE: state = RMW; - client = c; return true; case DELAYED: state = DELAYED_FLUSHING; return false; case RMW: - if (c == client) - return true; - state = RMW_FLUSHING; - return false; + return true; case DELAYED_FLUSHING: case RMW_FLUSHING: return false; -- 2.39.5