From: John Spray Date: Thu, 6 Nov 2014 11:46:29 +0000 (+0000) Subject: osdc: fix Journaler write error handling X-Git-Tag: v0.87.1~57^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9158326eeb69312283a6e8174352f36ea30d0cbf;p=ceph.git osdc: fix Journaler write error handling Since we started wrapping the write error handler in a finisher, multiple calls to handle_write_error would hit the assert() on the second call before the actual handler had been called (at the other end of the finisher) from the first call. The symptom was that the MDS was intermittently failing to respawn on blacklist, seen in #10011. Signed-off-by: John Spray (cherry picked from commit 762eda88a18ba707bd5410f38e21e95c4a6b3a46) --- diff --git a/src/osdc/Journaler.cc b/src/osdc/Journaler.cc index b16809e2f30..a8712e622e5 100644 --- a/src/osdc/Journaler.cc +++ b/src/osdc/Journaler.cc @@ -1172,10 +1172,17 @@ void Journaler::_finish_trim(int r, uint64_t to) void Journaler::handle_write_error(int r) { + assert(lock.is_locked_by_me()); + lderr(cct) << "handle_write_error " << cpp_strerror(r) << dendl; if (on_write_error) { on_write_error->complete(r); on_write_error = NULL; + called_write_error = true; + } else if (called_write_error) { + /* We don't call error handler more than once, subsequent errors are dropped -- + * this is okay as long as the error handler does something dramatic like respawn */ + lderr(cct) << __func__ << ": multiple write errors, handler already called" << dendl; } else { assert(0 == "unhandled write error"); } @@ -1325,6 +1332,7 @@ void Journaler::set_write_error_handler(Context *c) { Mutex::Locker l(lock); assert(!on_write_error); on_write_error = wrap_finisher(c); + called_write_error = false; } diff --git a/src/osdc/Journaler.h b/src/osdc/Journaler.h index d6426093999..d171c263663 100644 --- a/src/osdc/Journaler.h +++ b/src/osdc/Journaler.h @@ -319,6 +319,7 @@ private: // for wait_for_readable() C_OnFinisher *on_readable; C_OnFinisher *on_write_error; + bool called_write_error; void _finish_read(int r, uint64_t offset, bufferlist &bl); // read completion callback void _finish_retry_read(int r); @@ -383,7 +384,7 @@ public: waiting_for_zero(false), read_pos(0), requested_pos(0), received_pos(0), fetch_len(0), temp_fetch_len(0), - on_readable(0), on_write_error(NULL), + on_readable(0), on_write_error(NULL), called_write_error(false), expire_pos(0), trimming_pos(0), trimmed_pos(0) { memset(&layout, 0, sizeof(layout));