From 762eda88a18ba707bd5410f38e21e95c4a6b3a46 Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 6 Nov 2014 11:46:29 +0000 Subject: [PATCH] osdc: fix Journaler write error handling Since we started wrapping the write error handler in a finisher, multiple calls to handle_write_error would hit the assert() on the second call before the actual handler had been called (at the other end of the finisher) from the first call. The symptom was that the MDS was intermittently failing to respawn on blacklist, seen in #10011. Signed-off-by: John Spray --- src/osdc/Journaler.cc | 8 ++++++++ src/osdc/Journaler.h | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/osdc/Journaler.cc b/src/osdc/Journaler.cc index b16809e2f3018..a8712e622e58d 100644 --- a/src/osdc/Journaler.cc +++ b/src/osdc/Journaler.cc @@ -1172,10 +1172,17 @@ void Journaler::_finish_trim(int r, uint64_t to) void Journaler::handle_write_error(int r) { + assert(lock.is_locked_by_me()); + lderr(cct) << "handle_write_error " << cpp_strerror(r) << dendl; if (on_write_error) { on_write_error->complete(r); on_write_error = NULL; + called_write_error = true; + } else if (called_write_error) { + /* We don't call error handler more than once, subsequent errors are dropped -- + * this is okay as long as the error handler does something dramatic like respawn */ + lderr(cct) << __func__ << ": multiple write errors, handler already called" << dendl; } else { assert(0 == "unhandled write error"); } @@ -1325,6 +1332,7 @@ void Journaler::set_write_error_handler(Context *c) { Mutex::Locker l(lock); assert(!on_write_error); on_write_error = wrap_finisher(c); + called_write_error = false; } diff --git a/src/osdc/Journaler.h b/src/osdc/Journaler.h index d642609399994..d171c263663ce 100644 --- a/src/osdc/Journaler.h +++ b/src/osdc/Journaler.h @@ -319,6 +319,7 @@ private: // for wait_for_readable() C_OnFinisher *on_readable; C_OnFinisher *on_write_error; + bool called_write_error; void _finish_read(int r, uint64_t offset, bufferlist &bl); // read completion callback void _finish_retry_read(int r); @@ -383,7 +384,7 @@ public: waiting_for_zero(false), read_pos(0), requested_pos(0), received_pos(0), fetch_len(0), temp_fetch_len(0), - on_readable(0), on_write_error(NULL), + on_readable(0), on_write_error(NULL), called_write_error(false), expire_pos(0), trimming_pos(0), trimmed_pos(0) { memset(&layout, 0, sizeof(layout)); -- 2.39.5