]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: API for quiesce callbacks
authorMykola Golub <mgolub@suse.com>
Fri, 27 Mar 2020 06:53:31 +0000 (06:53 +0000)
committerMykola Golub <mgolub@suse.com>
Tue, 5 May 2020 16:02:09 +0000 (17:02 +0100)
The callbacks are fired by librbd before/after creating a
snapshot. The callback users like QEMU could attempt to freeze
the FS before allowing librbd to actually perform the snapshot.

Signed-off-by: Mykola Golub <mgolub@suse.com>
17 files changed:
src/common/options.cc
src/include/rbd/librbd.h
src/include/rbd/librbd.hpp
src/librbd/ImageState.cc
src/librbd/ImageState.h
src/librbd/ImageWatcher.cc
src/librbd/ImageWatcher.h
src/librbd/Operations.cc
src/librbd/WatchNotifyTypes.cc
src/librbd/WatchNotifyTypes.h
src/librbd/librbd.cc
src/librbd/operation/SnapshotCreateRequest.cc
src/librbd/operation/SnapshotCreateRequest.h
src/test/librbd/mock/MockImageState.h
src/test/librbd/mock/MockImageWatcher.h
src/test/librbd/operation/test_mock_SnapshotCreateRequest.cc
src/test/librbd/test_librbd.cc

index c78d9b69d7591e0ac287a94e04967b1b00a98730..85871c2ba08703a561f907b27d33b726768cbe9c 100644 (file)
@@ -7478,6 +7478,11 @@ static std::vector<Option> get_rbd_options() {
     Option("rbd_rwl_path", Option::TYPE_STR, Option::LEVEL_ADVANCED)
     .set_default("/tmp")
     .set_description("location of the persistent write back cache in a DAX-enabled filesystem on persistent memory"),
+
+    Option("rbd_quiesce_notification_attempts", Option::TYPE_UINT, Option::LEVEL_DEV)
+    .set_default(10)
+    .set_min(1)
+    .set_description("the number of quiesce notification attempts"),
   });
 }
 
index d85cd36504b66def6c2599dbf1eae7f450f9ab6e..b90cd3eb5fa6c2c8bf604c33d6fd23fa8f297a9b 100644 (file)
@@ -1366,6 +1366,38 @@ CEPH_RBD_API int rbd_pool_stats_option_add_uint64(rbd_pool_stats_t stats,
                                                   uint64_t* stat_val);
 CEPH_RBD_API int rbd_pool_stats_get(rados_ioctx_t io, rbd_pool_stats_t stats);
 
+/**
+ * Register a quiesce/unquiesce watcher.
+ *
+ * @param image the image to watch
+ * @param quiesce_cb what to do when librbd wants to quiesce
+ * @param unquiesce_cb what to do when librbd wants to unquiesce
+ * @param arg opaque value to pass to the callbacks
+ * @param handle where to store the internal id assigned to this watch
+ * @returns 0 on success, negative error code on failure
+ */
+CEPH_RBD_API int rbd_quiesce_watch(rbd_image_t image,
+                                   rbd_update_callback_t quiesce_cb,
+                                   rbd_update_callback_t unquiesce_cb,
+                                   void *arg, uint64_t *handle);
+
+/**
+ * Notify quiesce is complete
+ *
+ * @param image the image to notify
+ * @returns 0 on success
+ */
+CEPH_RADOS_API void rbd_quiesce_complete(rbd_image_t image);
+
+/**
+ * Unregister a quiesce/unquiesce watcher.
+ *
+ * @param image the image to unwatch
+ * @param handle which watch to unregister
+ * @returns 0 on success, negative error code on failure
+ */
+CEPH_RBD_API int rbd_quiesce_unwatch(rbd_image_t image, uint64_t handle);
+
 #if __GNUC__ >= 4
   #pragma GCC diagnostic pop
 #endif
index ca2d7449baf39fa3a0d34761967e9ac720a72391..8d3944a01e9436e5c0c181ae82e94fd62ca5f6a1 100644 (file)
@@ -489,6 +489,20 @@ public:
   virtual void handle_notify() = 0;
 };
 
+class CEPH_RBD_API QuiesceWatchCtx {
+public:
+  virtual ~QuiesceWatchCtx() {}
+  /**
+   * Callback activated when we want to quiesce.
+   */
+  virtual void handle_quiesce() = 0;
+
+  /**
+   * Callback activated when we want to unquiesce.
+   */
+  virtual void handle_unquiesce() = 0;
+};
+
 class CEPH_RBD_API Image
 {
 public:
@@ -768,6 +782,10 @@ public:
 
   int config_list(std::vector<config_option_t> *options);
 
+  int quiesce_watch(QuiesceWatchCtx *ctx, uint64_t *handle);
+  int quiesce_unwatch(uint64_t handle);
+  void quiesce_complete();
+
 private:
   friend class RBD;
 
index fda3148f3199d298c18b707e50b3ec283322bf6a..889a36609dac192c7055607172ad9cbc86028551 100644 (file)
@@ -76,7 +76,8 @@ public:
   }
 
   void register_watcher(UpdateWatchCtx *watcher, uint64_t *handle) {
-    ldout(m_cct, 20) << __func__ << ": watcher=" << watcher << dendl;
+    ldout(m_cct, 20) << "ImageUpdateWatchers::" << __func__ << ": watcher="
+                     << watcher << dendl;
 
     std::lock_guard locker{m_lock};
     ceph_assert(m_on_shut_down_finish == nullptr);
@@ -229,18 +230,207 @@ private:
   }
 };
 
+class QuiesceWatchers {
+public:
+  explicit QuiesceWatchers(CephContext *cct)
+    : m_cct(cct),
+      m_lock(ceph::make_mutex(util::unique_lock_name(
+        "librbd::QuiesceWatchers::m_lock", this))) {
+    ThreadPool *thread_pool;
+    ImageCtx::get_thread_pool_instance(m_cct, &thread_pool, &m_work_queue);
+  }
+
+  ~QuiesceWatchers() {
+    ceph_assert(m_pending_unregister.empty());
+    ceph_assert(m_on_notify == nullptr);
+  }
+
+  void register_watcher(QuiesceWatchCtx *watcher, uint64_t *handle) {
+    ldout(m_cct, 20) << "QuiesceWatchers::" << __func__ << ": watcher="
+                     << watcher << dendl;
+
+    std::lock_guard locker{m_lock};
+
+    *handle = m_next_handle++;
+    m_watchers[*handle] = watcher;
+  }
+
+  void unregister_watcher(uint64_t handle, Context *on_finish) {
+    int r = 0;
+    {
+      std::lock_guard locker{m_lock};
+      auto it = m_watchers.find(handle);
+      if (it == m_watchers.end()) {
+        r = -ENOENT;
+      } else {
+        if (m_on_notify != nullptr) {
+          ceph_assert(!m_pending_unregister.count(handle));
+          m_pending_unregister[handle] = on_finish;
+          on_finish = nullptr;
+        }
+        m_watchers.erase(it);
+      }
+    }
+
+    if (on_finish) {
+      ldout(m_cct, 20) << "QuiesceWatchers::" << __func__
+                       << ": completing unregister " << handle << dendl;
+      on_finish->complete(r);
+    }
+  }
+
+  void notify_quiesce(Context *on_finish) {
+    std::lock_guard locker{m_lock};
+    if (m_on_notify != nullptr) {
+      m_pending_notify.push_back(on_finish);
+      return;
+    }
+
+    notify(QUIESCE, on_finish);
+  }
+
+  void notify_unquiesce(Context *on_finish) {
+    std::lock_guard locker{m_lock};
+
+    notify(UNQUIESCE, on_finish);
+  }
+
+  void quiesce_complete() {
+    Context *on_notify = nullptr;
+    {
+      std::lock_guard locker{m_lock};
+      ceph_assert(m_on_notify != nullptr);
+      ceph_assert(m_handle_quiesce_cnt > 0);
+
+      m_handle_quiesce_cnt--;
+
+      if (m_handle_quiesce_cnt > 0) {
+        return;
+      }
+
+      std::swap(on_notify, m_on_notify);
+    }
+
+    on_notify->complete(0);
+  }
+
+private:
+  enum EventType {QUIESCE, UNQUIESCE};
+
+  CephContext *m_cct;
+  ContextWQ *m_work_queue;
+
+  ceph::mutex m_lock;
+  std::map<uint64_t, QuiesceWatchCtx*> m_watchers;
+  uint64_t m_next_handle = 0;
+  Context *m_on_notify = nullptr;
+  std::list<Context *> m_pending_notify;
+  std::map<uint64_t, Context*> m_pending_unregister;
+  uint64_t m_handle_quiesce_cnt = 0;
+
+  void notify(EventType event_type, Context *on_finish) {
+    ceph_assert(ceph_mutex_is_locked(m_lock));
+
+    if (m_watchers.empty()) {
+      m_work_queue->queue(on_finish);
+      return;
+    }
+
+    ldout(m_cct, 20) << "QuiesceWatchers::" << __func__ << " event: "
+                     << event_type << dendl;
+
+    Context *ctx = nullptr;
+    if (event_type == UNQUIESCE) {
+      ctx = create_async_context_callback(
+        m_work_queue, create_context_callback<
+          QuiesceWatchers, &QuiesceWatchers::handle_notify_unquiesce>(this));
+    }
+    auto gather_ctx = new C_Gather(m_cct, ctx);
+
+    ceph_assert(m_on_notify == nullptr);
+    ceph_assert(m_handle_quiesce_cnt == 0);
+
+    m_on_notify = on_finish;
+
+    for (auto it : m_watchers) {
+      send_notify(it.first, it.second, event_type, gather_ctx->new_sub());
+    }
+
+    gather_ctx->activate();
+  }
+
+  void send_notify(uint64_t handle, QuiesceWatchCtx *watcher,
+                   EventType event_type, Context *on_finish) {
+    auto ctx = new LambdaContext(
+      [this, handle, watcher, event_type, on_finish](int) {
+        ldout(m_cct, 20) << "QuiesceWatchers::" << __func__ << ": handle="
+                         << handle << ", event_type=" << event_type << dendl;
+        switch (event_type) {
+        case QUIESCE:
+          m_handle_quiesce_cnt++;
+          watcher->handle_quiesce();
+          break;
+        case UNQUIESCE:
+          watcher->handle_unquiesce();
+          break;
+        default:
+          ceph_abort_msgf("invalid event_type %d", event_type);
+        }
+
+        on_finish->complete(0);
+      });
+
+    m_work_queue->queue(ctx);
+  }
+
+  void handle_notify_unquiesce(int r) {
+    ldout(m_cct, 20) << "QuiesceWatchers::" << __func__ << ": r=" << r
+                     << dendl;
+
+    ceph_assert(r == 0);
+
+    std::unique_lock locker{m_lock};
+
+    if (!m_pending_unregister.empty()) {
+      std::map<uint64_t, Context*> pending_unregister;
+      std::swap(pending_unregister, m_pending_unregister);
+      locker.unlock();
+      for (auto &it : pending_unregister) {
+        ldout(m_cct, 20) << "QuiesceWatchers::" << __func__
+                         << ": completing unregister " << it.first << dendl;
+        it.second->complete(0);
+      }
+      locker.lock();
+    }
+
+    Context *on_notify = nullptr;
+    std::swap(on_notify, m_on_notify);
+
+    if (!m_pending_notify.empty()) {
+      auto on_finish = m_pending_notify.front();
+      m_pending_notify.pop_front();
+      notify(QUIESCE, on_finish);
+    }
+
+    locker.unlock();
+    on_notify->complete(0);
+  }
+};
+
 template <typename I>
 ImageState<I>::ImageState(I *image_ctx)
   : m_image_ctx(image_ctx), m_state(STATE_UNINITIALIZED),
     m_lock(ceph::make_mutex(util::unique_lock_name("librbd::ImageState::m_lock", this))),
     m_last_refresh(0), m_refresh_seq(0),
-    m_update_watchers(new ImageUpdateWatchers(image_ctx->cct)) {
+    m_update_watchers(new ImageUpdateWatchers(image_ctx->cct)),
+    m_quiesce_watchers(new QuiesceWatchers(image_ctx->cct)) {
 }
 
 template <typename I>
 ImageState<I>::~ImageState() {
   ceph_assert(m_state == STATE_UNINITIALIZED || m_state == STATE_CLOSED);
   delete m_update_watchers;
+  delete m_quiesce_watchers;
 }
 
 template <typename I>
@@ -751,6 +941,49 @@ void ImageState<I>::send_prepare_lock_unlock() {
   on_ready->complete(0);
 }
 
+template <typename I>
+int ImageState<I>::register_quiesce_watcher(QuiesceWatchCtx *watcher,
+                                                uint64_t *handle) {
+  CephContext *cct = m_image_ctx->cct;
+  ldout(cct, 20) << __func__ << dendl;
+
+  m_quiesce_watchers->register_watcher(watcher, handle);
+
+  ldout(cct, 20) << __func__ << ": handle=" << *handle << dendl;
+  return 0;
+}
+
+template <typename I>
+int ImageState<I>::unregister_quiesce_watcher(uint64_t handle) {
+  CephContext *cct = m_image_ctx->cct;
+  ldout(cct, 20) << __func__ << ": handle=" << handle << dendl;
+
+  C_SaferCond ctx;
+  m_quiesce_watchers->unregister_watcher(handle, &ctx);
+  return ctx.wait();
+}
+
+template <typename I>
+void ImageState<I>::notify_quiesce(Context *on_finish) {
+  CephContext *cct = m_image_ctx->cct;
+  ldout(cct, 20) << __func__ << dendl;
+
+  m_quiesce_watchers->notify_quiesce(on_finish);
+}
+
+template <typename I>
+void ImageState<I>::notify_unquiesce(Context *on_finish) {
+  CephContext *cct = m_image_ctx->cct;
+  ldout(cct, 20) << __func__ << dendl;
+
+  m_quiesce_watchers->notify_unquiesce(on_finish);
+}
+
+template <typename I>
+void ImageState<I>::quiesce_complete() {
+  m_quiesce_watchers->quiesce_complete();
+}
+
 } // namespace librbd
 
 template class librbd::ImageState<librbd::ImageCtx>;
index e51f18e94bdd62ce9c0e0036fd885a73838a57d8..446028805d8db6a81e209f461c7586a32d51164f 100644 (file)
@@ -16,6 +16,8 @@ class RWLock;
 
 namespace librbd {
 
+class QuiesceWatchCtx;
+class QuiesceWatchers;
 class ImageCtx;
 class ImageUpdateWatchers;
 class UpdateWatchCtx;
@@ -51,6 +53,12 @@ public:
   void flush_update_watchers(Context *on_finish);
   void shut_down_update_watchers(Context *on_finish);
 
+  int register_quiesce_watcher(QuiesceWatchCtx *watcher, uint64_t *handle);
+  int unregister_quiesce_watcher(uint64_t handle);
+  void notify_quiesce(Context *on_finish);
+  void notify_unquiesce(Context *on_finish);
+  void quiesce_complete();
+
 private:
   enum State {
     STATE_UNINITIALIZED,
@@ -110,6 +118,7 @@ private:
   uint64_t m_refresh_seq;
 
   ImageUpdateWatchers *m_update_watchers;
+  QuiesceWatchers *m_quiesce_watchers;
 
   uint64_t m_open_flags;
 
index d66dfa7fcf22a582c5c9d1f28549d87a1aeca46c..08bc01e6d7bf745cbeed22c8d04902b6b73d8a19 100644 (file)
@@ -333,6 +333,57 @@ void ImageWatcher<I>::notify_header_update(librados::IoCtx &io_ctx,
   io_ctx.notify2(oid, bl, watcher::Notifier::NOTIFY_TIMEOUT, nullptr);
 }
 
+template <typename I>
+void ImageWatcher<I>::notify_quiesce(uint64_t request_id, Context *on_finish) {
+  ldout(m_image_ctx.cct, 10) << this << " " << __func__ << ": request_id="
+                             << request_id << dendl;
+
+  AsyncRequestId async_request_id(get_client_id(), request_id);
+
+  auto attempts = m_image_ctx.config.template get_val<uint64_t>(
+    "rbd_quiesce_notification_attempts");
+
+  notify_quiesce(async_request_id, attempts, on_finish);
+}
+
+template <typename I>
+void ImageWatcher<I>::notify_quiesce(const AsyncRequestId &async_request_id,
+                                     size_t attempts, Context *on_finish) {
+  ldout(m_image_ctx.cct, 10) << this << " " << __func__ << ": async_request_id="
+                             << async_request_id << " attempts=" << attempts
+                             << dendl;
+
+  ceph_assert(attempts > 0);
+  auto on_notify = new LambdaContext(
+    [this, async_request_id, on_finish, attempts=attempts-1](int r) {
+      if (r == -ETIMEDOUT) {
+        ldout(m_image_ctx.cct, 10) << this << " " << __func__ << ": async_request_id="
+                                   << async_request_id << " timed out" << dendl;
+        if (attempts > 0) {
+          notify_quiesce(async_request_id, attempts, on_finish);
+          return;
+        }
+      }
+      if (r < 0) {
+        lderr(m_image_ctx.cct) << this << " failed to notify quiesce: "
+                               << cpp_strerror(r) << dendl;
+      }
+      on_finish->complete(r);
+    });
+
+  send_notify(new QuiescePayload(async_request_id), on_notify);
+}
+
+template <typename I>
+void ImageWatcher<I>::notify_unquiesce(uint64_t request_id, Context *on_finish) {
+  ldout(m_image_ctx.cct, 10) << this << " " << __func__ << ": request_id="
+                             << request_id << dendl;
+
+  AsyncRequestId async_request_id(get_client_id(), request_id);
+
+  send_notify(new UnquiescePayload(async_request_id), on_finish);
+}
+
 template <typename I>
 void ImageWatcher<I>::schedule_cancel_async_requests() {
   auto ctx = new LambdaContext(
@@ -343,12 +394,15 @@ void ImageWatcher<I>::schedule_cancel_async_requests() {
 template <typename I>
 void ImageWatcher<I>::cancel_async_requests() {
   std::unique_lock l{m_async_request_lock};
-  for (std::map<AsyncRequestId, AsyncRequest>::iterator iter =
-        m_async_requests.begin();
-       iter != m_async_requests.end(); ++iter) {
-    iter->second.first->complete(-ERESTART);
+  for (auto iter = m_async_requests.begin(); iter != m_async_requests.end(); ) {
+    if (iter->second.second == nullptr) {
+      // Quiesce notify request. Skip.
+      iter++;
+    } else {
+      iter->second.first->complete(-ERESTART);
+      iter = m_async_requests.erase(iter);
+    }
   }
-  m_async_requests.clear();
 }
 
 template <typename I>
@@ -577,6 +631,85 @@ int ImageWatcher<I>::prepare_async_request(const AsyncRequestId& async_request_i
   return 0;
 }
 
+template <typename I>
+Context *ImageWatcher<I>::prepare_quiesce_request(
+    const AsyncRequestId &request, C_NotifyAck *ack_ctx) {
+  std::unique_lock locker{m_async_request_lock};
+
+  auto timeout = 2 * watcher::Notifier::NOTIFY_TIMEOUT / 1000;
+
+  if (m_async_pending.count(request) != 0) {
+    auto it = m_async_requests.find(request);
+    if (it != m_async_requests.end()) {
+      delete it->second.first;
+      it->second.first = ack_ctx;
+    } else {
+      m_task_finisher->queue(new C_ResponseMessage(ack_ctx), -ESTALE);
+    }
+    locker.unlock();
+
+    m_task_finisher->reschedule_event_after(Task(TASK_CODE_QUIESCE, request),
+                                            timeout);
+    return nullptr;
+  }
+
+  m_async_pending.insert(request);
+  m_async_requests[request] = AsyncRequest(ack_ctx, nullptr);
+  m_async_op_tracker.start_op();
+
+  auto unquiesce_ctx = new LambdaContext(
+    [this, request](int r) {
+      if (r == 0) {
+        ldout(m_image_ctx.cct, 10) << this << " quiesce request " << request
+                                   << " timed out" << dendl;
+      }
+
+      auto on_finish = new LambdaContext(
+        [this, request](int r) {
+          std::unique_lock async_request_locker{m_async_request_lock};
+          m_async_pending.erase(request);
+        });
+
+      m_image_ctx.state->notify_unquiesce(on_finish);
+    });
+
+  return new LambdaContext(
+    [this, request, unquiesce_ctx, timeout](int r) {
+      ceph_assert(r == 0);
+
+      m_task_finisher->add_event_after(Task(TASK_CODE_QUIESCE, request),
+                                       timeout, unquiesce_ctx);
+
+      auto ctx = remove_async_request(request);
+      ceph_assert(ctx != nullptr);
+      ctx = new C_ResponseMessage(static_cast<C_NotifyAck *>(ctx));
+      ctx->complete(r);
+      m_async_op_tracker.finish_op();
+    });
+}
+
+template <typename I>
+Context *ImageWatcher<I>::prepare_unquiesce_request(const AsyncRequestId &request) {
+  {
+    std::unique_lock async_request_locker{m_async_request_lock};
+    bool found = m_async_pending.erase(request);
+    if (!found) {
+      return nullptr;
+    }
+  }
+
+  bool canceled = m_task_finisher->cancel(Task(TASK_CODE_QUIESCE, request));
+  if (!canceled) {
+    return nullptr;
+  }
+
+  m_async_op_tracker.start_op();
+  return new LambdaContext(
+    [this](int r) {
+      m_async_op_tracker.finish_op();
+    });
+}
+
 template <typename I>
 bool ImageWatcher<I>::handle_payload(const HeaderUpdatePayload &payload,
                                     C_NotifyAck *ack_ctx) {
@@ -1025,6 +1158,39 @@ bool ImageWatcher<I>::handle_payload(const SparsifyPayload &payload,
   return true;
 }
 
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const QuiescePayload &payload,
+                                    C_NotifyAck *ack_ctx) {
+  auto on_finish = prepare_quiesce_request(payload.async_request_id, ack_ctx);
+  if (on_finish == nullptr) {
+    ldout(m_image_ctx.cct, 10) << this << " duplicate quiesce request: "
+                               << payload.async_request_id << dendl;
+    return false;
+  }
+
+  ldout(m_image_ctx.cct, 10) << this << " quiesce request: "
+                             << payload.async_request_id << dendl;
+  m_image_ctx.state->notify_quiesce(on_finish);
+  return false;
+}
+
+template <typename I>
+bool ImageWatcher<I>::handle_payload(const UnquiescePayload &payload,
+                                    C_NotifyAck *ack_ctx) {
+  auto on_finish = prepare_unquiesce_request(payload.async_request_id);
+  if (on_finish == nullptr) {
+    ldout(m_image_ctx.cct, 10) << this
+                               << " duplicate or unknown unquiesce request: "
+                               << payload.async_request_id << dendl;
+    return true;
+  }
+
+  ldout(m_image_ctx.cct, 10) << this << " unquiesce request: "
+                             << payload.async_request_id << dendl;
+  m_image_ctx.state->notify_unquiesce(on_finish);
+  return true;
+}
+
 template <typename I>
 bool ImageWatcher<I>::handle_payload(const UnknownPayload &payload,
                                     C_NotifyAck *ack_ctx) {
@@ -1113,6 +1279,12 @@ void ImageWatcher<I>::process_payload(uint64_t notify_id, uint64_t handle,
   case NOTIFY_OP_SPARSIFY:
     complete = handle_payload(*(static_cast<SparsifyPayload *>(payload)), ctx);
     break;
+  case NOTIFY_OP_QUIESCE:
+    complete = handle_payload(*(static_cast<QuiescePayload *>(payload)), ctx);
+    break;
+  case NOTIFY_OP_UNQUIESCE:
+    complete = handle_payload(*(static_cast<UnquiescePayload *>(payload)), ctx);
+    break;
   default:
     ceph_assert(payload->get_notify_op() == static_cast<NotifyOp>(-1));
     complete = handle_payload(*(static_cast<UnknownPayload *>(payload)), ctx);
@@ -1144,6 +1316,7 @@ void ImageWatcher<I>::handle_notify(uint64_t notify_id, uint64_t handle,
   // if an image refresh is required, refresh before processing the request
   if (notify_message.check_for_refresh() &&
       m_image_ctx.state->is_refresh_required()) {
+
     m_image_ctx.state->refresh(
       new C_ProcessPayload(this, notify_id, handle,
                            std::move(notify_message.payload)));
index f22fc1f6489ca915af848f2185190cbf4f8f7e70..203b6763cf607da44d4255880f7eabc9c46d83ee 100644 (file)
@@ -23,7 +23,6 @@ template <typename> class TaskFinisher;
 
 template <typename ImageCtxT = ImageCtx>
 class ImageWatcher : public Watcher {
-
 public:
   ImageWatcher(ImageCtxT& image_ctx);
   ~ImageWatcher() override;
@@ -71,13 +70,17 @@ public:
   static void notify_header_update(librados::IoCtx &io_ctx,
                                    const std::string &oid);
 
+  void notify_quiesce(uint64_t request_id, Context *on_finish);
+  void notify_unquiesce(uint64_t request_id, Context *on_finish);
+
 private:
   enum TaskCode {
     TASK_CODE_REQUEST_LOCK,
     TASK_CODE_CANCEL_ASYNC_REQUESTS,
     TASK_CODE_REREGISTER_WATCH,
     TASK_CODE_ASYNC_REQUEST,
-    TASK_CODE_ASYNC_PROGRESS
+    TASK_CODE_ASYNC_PROGRESS,
+    TASK_CODE_QUIESCE,
   };
 
   typedef std::pair<Context *, ProgressContext *> AsyncRequest;
@@ -198,6 +201,13 @@ private:
                             bool* new_request, Context** ctx,
                             ProgressContext** prog_ctx);
 
+  Context *prepare_quiesce_request(const watch_notify::AsyncRequestId &request,
+                                   C_NotifyAck *ack_ctx);
+  Context *prepare_unquiesce_request(const watch_notify::AsyncRequestId &request);
+
+  void notify_quiesce(const watch_notify::AsyncRequestId &async_request_id,
+                      size_t attempts, Context *on_finish);
+
   bool handle_payload(const watch_notify::HeaderUpdatePayload& payload,
                       C_NotifyAck *ctx);
   bool handle_payload(const watch_notify::AcquiredLockPayload& payload,
@@ -234,6 +244,10 @@ private:
                       C_NotifyAck *ctx);
   bool handle_payload(const watch_notify::SparsifyPayload& payload,
                       C_NotifyAck *ctx);
+  bool handle_payload(const watch_notify::QuiescePayload& payload,
+                      C_NotifyAck *ctx);
+  bool handle_payload(const watch_notify::UnquiescePayload& payload,
+                      C_NotifyAck *ctx);
   bool handle_payload(const watch_notify::UnknownPayload& payload,
                       C_NotifyAck *ctx);
   void process_payload(uint64_t notify_id, uint64_t handle,
index d53d7637e3f130588e280046efb1e213fa996a67..fce2d1d6c2b883471e9fd4ce3a46ba60d0ee2262 100644 (file)
@@ -768,10 +768,11 @@ void Operations<I>::execute_snap_create(const cls::rbd::SnapshotNamespace &snap_
   }
   m_image_ctx.image_lock.unlock_shared();
 
+  uint64_t request_id = ++m_async_request_seq;
   operation::SnapshotCreateRequest<I> *req =
     new operation::SnapshotCreateRequest<I>(
       m_image_ctx, new C_NotifyUpdate<I>(m_image_ctx, on_finish),
-      snap_namespace, snap_name, journal_op_tid, skip_object_map);
+      snap_namespace, snap_name, journal_op_tid, request_id, skip_object_map);
   req->send();
 }
 
index a4448f77b1862d295b337559b4f74cfdd50ae18b..39599541b61db52e47f588be14d395df290c9ac4 100644 (file)
@@ -356,6 +356,12 @@ void NotifyMessage::decode(bufferlist::const_iterator& iter) {
   case NOTIFY_OP_SPARSIFY:
     payload.reset(new SparsifyPayload());
     break;
+  case NOTIFY_OP_QUIESCE:
+    payload.reset(new QuiescePayload());
+    break;
+  case NOTIFY_OP_UNQUIESCE:
+    payload.reset(new UnquiescePayload());
+    break;
   }
 
   payload->decode(struct_v, iter);
@@ -389,6 +395,8 @@ void NotifyMessage::generate_test_instances(std::list<NotifyMessage *> &o) {
   o.push_back(new NotifyMessage(new UpdateFeaturesPayload(1, true)));
   o.push_back(new NotifyMessage(new MigratePayload(AsyncRequestId(ClientId(0, 1), 2))));
   o.push_back(new NotifyMessage(new SparsifyPayload(AsyncRequestId(ClientId(0, 1), 2), 1)));
+  o.push_back(new NotifyMessage(new QuiescePayload(AsyncRequestId(ClientId(0, 1), 2))));
+  o.push_back(new NotifyMessage(new UnquiescePayload(AsyncRequestId(ClientId(0, 1), 2))));
 }
 
 void ResponseMessage::encode(bufferlist& bl) const {
@@ -470,6 +478,12 @@ std::ostream &operator<<(std::ostream &out,
   case NOTIFY_OP_SPARSIFY:
     out << "Sparsify";
     break;
+  case NOTIFY_OP_QUIESCE:
+    out << "Quiesce";
+    break;
+  case NOTIFY_OP_UNQUIESCE:
+    out << "Unquiesce";
+    break;
   default:
     out << "Unknown (" << static_cast<uint32_t>(op) << ")";
     break;
index e866e066f47bce7dd3be3c150c11668ddcebe4c7..c5fc125774391f5fa88118b6cc37970a0b4529e4 100644 (file)
@@ -68,6 +68,8 @@ enum NotifyOp {
   NOTIFY_OP_UPDATE_FEATURES    = 15,
   NOTIFY_OP_MIGRATE            = 16,
   NOTIFY_OP_SPARSIFY           = 17,
+  NOTIFY_OP_QUIESCE            = 18,
+  NOTIFY_OP_UNQUIESCE          = 19,
 };
 
 struct Payload {
@@ -403,6 +405,30 @@ struct SparsifyPayload : public AsyncRequestPayloadBase {
   void dump(Formatter *f) const override;
 };
 
+struct QuiescePayload : public AsyncRequestPayloadBase {
+  QuiescePayload() {}
+  QuiescePayload(const AsyncRequestId &id) : AsyncRequestPayloadBase(id) {}
+
+  NotifyOp get_notify_op() const override {
+    return NOTIFY_OP_QUIESCE;
+  }
+  bool check_for_refresh() const override {
+    return false;
+  }
+};
+
+struct UnquiescePayload : public AsyncRequestPayloadBase {
+  UnquiescePayload() {}
+  UnquiescePayload(const AsyncRequestId &id) : AsyncRequestPayloadBase(id) {}
+
+  NotifyOp get_notify_op() const override {
+    return NOTIFY_OP_UNQUIESCE;
+  }
+  bool check_for_refresh() const override {
+    return false;
+  }
+};
+
 struct UnknownPayload : public Payload {
   NotifyOp get_notify_op() const override {
     return static_cast<NotifyOp>(-1);
index 18e18cb625c204b88234b3557ca5200ff940920d..239e078f28261dc9ceb6205b22bc714399b7a9df 100644 (file)
@@ -187,6 +187,24 @@ struct C_UpdateWatchCB : public librbd::UpdateWatchCtx {
   }
 };
 
+struct C_QuiesceWatchCB : public librbd::QuiesceWatchCtx {
+  rbd_update_callback_t quiesce_cb;
+  rbd_update_callback_t unquiesce_cb;
+  void *arg;
+  uint64_t handle = 0;
+
+  C_QuiesceWatchCB(rbd_update_callback_t quiesce_cb,
+                   rbd_update_callback_t unquiesce_cb, void *arg) :
+    quiesce_cb(quiesce_cb), unquiesce_cb(unquiesce_cb), arg(arg) {
+  }
+  void handle_quiesce() override {
+    quiesce_cb(arg);
+  }
+  void handle_unquiesce() override {
+    unquiesce_cb(arg);
+  }
+};
+
 void group_image_status_cpp_to_c(const librbd::group_image_info_t &cpp_info,
                                 rbd_group_image_info_t *c_info) {
   c_info->name = strdup(cpp_info.name.c_str());
@@ -3005,6 +3023,23 @@ namespace librbd {
     return librbd::api::Config<>::list(ictx, options);
   }
 
+  int Image::quiesce_watch(QuiesceWatchCtx *wctx, uint64_t *handle) {
+    ImageCtx *ictx = (ImageCtx *)ctx;
+    int r = ictx->state->register_quiesce_watcher(wctx, handle);
+    return r;
+  }
+
+  int Image::quiesce_unwatch(uint64_t handle) {
+    ImageCtx *ictx = (ImageCtx *)ctx;
+    int r = ictx->state->unregister_quiesce_watcher(handle);
+    return r;
+  }
+
+  void Image::quiesce_complete() {
+    ImageCtx *ictx = (ImageCtx *)ctx;
+    ictx->state->quiesce_complete();
+  }
+
 } // namespace librbd
 
 extern "C" void rbd_version(int *major, int *minor, int *extra)
@@ -7093,3 +7128,34 @@ extern "C" void rbd_config_image_list_cleanup(rbd_config_option_t *options,
     config_option_cleanup(options[i]);
   }
 }
+
+extern "C" int rbd_quiesce_watch(rbd_image_t image,
+                                 rbd_update_callback_t quiesce_cb,
+                                 rbd_update_callback_t unquiesce_cb,
+                                 void *arg, uint64_t *handle)
+{
+  librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
+  auto wctx = new C_QuiesceWatchCB(quiesce_cb, unquiesce_cb, arg);
+  int r = ictx->state->register_quiesce_watcher(wctx, &wctx->handle);
+  if (r < 0) {
+    delete wctx;
+    return r;
+  }
+  *handle = reinterpret_cast<uint64_t>(wctx);
+  return 0;
+}
+
+extern "C" int rbd_quiesce_unwatch(rbd_image_t image, uint64_t handle)
+{
+  librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
+  auto *wctx = reinterpret_cast<C_QuiesceWatchCB *>(handle);
+  int r = ictx->state->unregister_quiesce_watcher(wctx->handle);
+  delete wctx;
+  return r;
+}
+
+extern "C" void rbd_quiesce_complete(rbd_image_t image)
+{
+  librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
+  ictx->state->quiesce_complete();
+}
index e51e4389506c5a33a4e5e77cbeb76019a962260f..a8649153a1db28851430f2e3115167764130b828 100644 (file)
@@ -7,6 +7,7 @@
 #include "common/errno.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
 #include "librbd/ObjectMap.h"
 #include "librbd/Utils.h"
 #include "librbd/io/ImageRequestWQ.h"
@@ -29,10 +30,11 @@ SnapshotCreateRequest<I>::SnapshotCreateRequest(I &image_ctx,
                                                const cls::rbd::SnapshotNamespace &snap_namespace,
                                                 const std::string &snap_name,
                                                 uint64_t journal_op_tid,
+                                                uint64_t request_id,
                                                 bool skip_object_map)
   : Request<I>(image_ctx, on_finish, journal_op_tid),
     m_snap_namespace(snap_namespace), m_snap_name(snap_name),
-    m_skip_object_map(skip_object_map), m_ret_val(0), m_snap_id(CEPH_NOSNAP) {
+    m_request_id(request_id), m_skip_object_map(skip_object_map) {
 }
 
 template <typename I>
@@ -46,7 +48,36 @@ void SnapshotCreateRequest<I>::send_op() {
     return;
   }
 
+  send_notify_quiesce();
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::send_notify_quiesce() {
+  I &image_ctx = this->m_image_ctx;
+
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << dendl;
+
+  image_ctx.image_watcher->notify_quiesce(
+    m_request_id, create_async_context_callback(
+      image_ctx, create_context_callback<SnapshotCreateRequest<I>,
+      &SnapshotCreateRequest<I>::handle_notify_quiesce>(this)));
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::handle_notify_quiesce(int *result) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+  if (*result < 0) {
+    lderr(cct) << "failed to notify quiesce: " << cpp_strerror(*result)
+               << dendl;
+    return this->create_context_finisher(*result);
+  }
+
   send_suspend_requests();
+  return nullptr;
 }
 
 template <typename I>
@@ -73,7 +104,7 @@ Context *SnapshotCreateRequest<I>::handle_suspend_requests(int *result) {
 template <typename I>
 void SnapshotCreateRequest<I>::send_suspend_aio() {
   I &image_ctx = this->m_image_ctx;
-  ceph_assert(ceph_mutex_is_locked(image_ctx.owner_lock));
+  std::shared_lock owner_locker{image_ctx.owner_lock};
 
   CephContext *cct = image_ctx.cct;
   ldout(cct, 5) << this << " " << __func__ << dendl;
@@ -91,8 +122,9 @@ Context *SnapshotCreateRequest<I>::handle_suspend_aio(int *result) {
 
   if (*result < 0) {
     lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl;
-    image_ctx.io_work_queue->unblock_writes();
-    return this->create_context_finisher(*result);
+    save_result(result);
+    send_notify_unquiesce();
+    return nullptr;
   }
 
   send_append_op_event();
@@ -120,10 +152,11 @@ Context *SnapshotCreateRequest<I>::handle_append_op_event(int *result) {
   ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
 
   if (*result < 0) {
-    image_ctx.io_work_queue->unblock_writes();
     lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result)
                << dendl;
-    return this->create_context_finisher(*result);
+    save_result(result);
+    send_notify_unquiesce();
+    return nullptr;
   }
 
   send_allocate_snap_id();
@@ -151,11 +184,11 @@ Context *SnapshotCreateRequest<I>::handle_allocate_snap_id(int *result) {
                 << "snap_id=" << m_snap_id << dendl;
 
   if (*result < 0) {
-    save_result(result);
-    image_ctx.io_work_queue->unblock_writes();
     lderr(cct) << "failed to allocate snapshot id: " << cpp_strerror(*result)
                << dendl;
-    return this->create_context_finisher(*result);
+    save_result(result);
+    send_notify_unquiesce();
+    return nullptr;
   }
 
   send_create_snap();
@@ -245,9 +278,10 @@ Context *SnapshotCreateRequest<I>::handle_create_object_map(int *result) {
     lderr(cct) << this << " " << __func__ << ": failed to snapshot object map: "
                << cpp_strerror(*result) << dendl;
 
+    save_result(result);
     update_snap_context();
-    image_ctx.io_work_queue->unblock_writes();
-    return this->create_context_finisher(*result);
+    send_notify_unquiesce();
+    return nullptr;
   }
 
   return send_create_image_state();
@@ -260,8 +294,8 @@ Context *SnapshotCreateRequest<I>::send_create_image_state() {
     &m_snap_namespace);
   if (mirror_ns == nullptr || !mirror_ns->is_primary()) {
     update_snap_context();
-    image_ctx.io_work_queue->unblock_writes();
-    return this->create_context_finisher(0);
+    send_notify_unquiesce();
+    return nullptr;
   }
 
   CephContext *cct = image_ctx.cct;
@@ -282,14 +316,14 @@ Context *SnapshotCreateRequest<I>::handle_create_image_state(int *result) {
   ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
 
   update_snap_context();
-  image_ctx.io_work_queue->unblock_writes();
   if (*result < 0) {
     lderr(cct) << this << " " << __func__ << ": failed to snapshot object map: "
                << cpp_strerror(*result) << dendl;
-    return this->create_context_finisher(*result);
+    save_result(result);
   }
 
-  return this->create_context_finisher(0);
+  send_notify_unquiesce();
+  return nullptr;
 }
 
 template <typename I>
@@ -313,10 +347,38 @@ Context *SnapshotCreateRequest<I>::handle_release_snap_id(int *result) {
   CephContext *cct = image_ctx.cct;
   ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
 
-  ceph_assert(m_ret_val < 0);
-  *result = m_ret_val;
+  send_notify_unquiesce();
+  return nullptr;
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::send_notify_unquiesce() {
+  I &image_ctx = this->m_image_ctx;
+
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << dendl;
 
   image_ctx.io_work_queue->unblock_writes();
+
+  image_ctx.image_watcher->notify_unquiesce(
+    m_request_id, create_context_callback<
+      SnapshotCreateRequest<I>,
+      &SnapshotCreateRequest<I>::handle_notify_unquiesce>(this));
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::handle_notify_unquiesce(int *result) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+  if (*result < 0) {
+    lderr(cct) << "failed to notify unquiesce: " << cpp_strerror(*result)
+               << dendl;
+    // ignore error
+  }
+
+  *result = m_ret_val;
   return this->create_context_finisher(m_ret_val);
 }
 
index 1754f26bfba288cce458584f8e08104b67b86c7d..ea6d67449c9c63f1dc0e1ff54c4e9d7c6716984e 100644 (file)
@@ -28,33 +28,38 @@ public:
    *            <start>
    *               |
    *               v
+   *           STATE_NOTIFY_QUIESCE
+   *               |
+   *               v
    *           STATE_SUSPEND_REQUESTS
    *               |
    *               v
-   *           STATE_SUSPEND_AIO * * * * * * * * * * * * *
-   *               |                                     *
-   *               v                                     *
-   *           STATE_APPEND_OP_EVENT (skip if journal    *
-   *               |                  disabled)          *
-   *   (retry)     v                                     *
-   *   . . . > STATE_ALLOCATE_SNAP_ID                    *
-   *   .           |                                     *
-   *   .           v                                     *
-   *   . . . . STATE_CREATE_SNAP * * * * * * * * * * *   *
-   *               |                                 *   *
-   *               v                                 *   *
-   *           STATE_CREATE_OBJECT_MAP (skip if      *   *
-   *               |                    disabled)    *   *
-   *               v                                 *   *
-   *           STATE_CREATE_IMAGE_STATE (skip if     *   *
-   *               |                     not mirror  *   *
-   *               |                     snapshot)   *   *
-   *               |                                 v   *
-   *               |              STATE_RELEASE_SNAP_ID  *
-   *               |                     |               *
-   *               |                     v               *
-   *               \----------------> <finish> < * * * * *
-   *
+   *           STATE_SUSPEND_AIO * * * * * * * * * * * * * * *
+   *               |                                         *
+   *               v                                         *
+   *           STATE_APPEND_OP_EVENT (skip if journal        *
+   *               |                  disabled)              *
+   *   (retry)     v                                         *
+   *   . . . > STATE_ALLOCATE_SNAP_ID                        *
+   *   .           |                                         *
+   *   .           v                                         *
+   *   . . . . STATE_CREATE_SNAP * * * * * * * * * * *       *
+   *               |                                 *       *
+   *               v                                 *       *
+   *           STATE_CREATE_OBJECT_MAP (skip if      *       *
+   *               |                    disabled)    *       *
+   *               v                                 *       *
+   *           STATE_CREATE_IMAGE_STATE (skip if     *       *
+   *               |                     not mirror  *       *
+   *               |                     snapshot)   *       *
+   *               |                                 v       *
+   *               |              STATE_RELEASE_SNAP_ID      *
+   *               |                     |                   *
+   *               |                     v                   *
+   *               \------------> STATE_NOTIFY_UNQUIESCE < * *
+   *                                     |
+   *                                     v
+   *                                  <finish>
    * @endverbatim
    *
    * The _CREATE_STATE state may repeat back to the _ALLOCATE_SNAP_ID state
@@ -63,10 +68,9 @@ public:
    * (if enabled) and bubble the originating error code back to the client.
    */
   SnapshotCreateRequest(ImageCtxT &image_ctx, Context *on_finish,
-                       const cls::rbd::SnapshotNamespace &snap_namespace,
-                       const std::string &snap_name,
-                       uint64_t journal_op_tid,
-                        bool skip_object_map);
+                        const cls::rbd::SnapshotNamespace &snap_namespace,
+                        const std::string &snap_name, uint64_t journal_op_tid,
+                        uint64_t request_id, bool skip_object_map);
 
 protected:
   void send_op() override;
@@ -83,14 +87,18 @@ protected:
 private:
   cls::rbd::SnapshotNamespace m_snap_namespace;
   std::string m_snap_name;
+  uint64_t m_request_id;
   bool m_skip_object_map;
 
-  int m_ret_val;
+  int m_ret_val = 0;
 
-  uint64_t m_snap_id;
+  uint64_t m_snap_id = CEPH_NOSNAP;
   uint64_t m_size;
   ParentImageInfo m_parent_info;
 
+  void send_notify_quiesce();
+  Context *handle_notify_quiesce(int *result);
+
   void send_suspend_requests();
   Context *handle_suspend_requests(int *result);
 
@@ -115,6 +123,9 @@ private:
   void send_release_snap_id();
   Context *handle_release_snap_id(int *result);
 
+  void send_notify_unquiesce();
+  Context *handle_notify_unquiesce(int *result);
+
   void update_snap_context();
 
   void save_result(int *result) {
index b9c8c8d07590b02c3016a984b1fea95b0d742402..9b9dc8b879a55e5eacbcc475f56266df2f46b7cf 100644 (file)
@@ -29,7 +29,7 @@ struct MockImageState {
   MOCK_METHOD0(handle_prepare_lock_complete, void());
 
   MOCK_METHOD2(register_update_watcher, int(UpdateWatchCtx *, uint64_t *));
-  MOCK_METHOD2(unregister_update_watcher, void(uint64_t, Context *));  
+  MOCK_METHOD2(unregister_update_watcher, void(uint64_t, Context *));
 };
 
 } // namespace librbd
index c7e9427923738a0d97ba0667861a24b84a834ece..8044c9e97653f158d22cf1763668011ed39f67b1 100644 (file)
@@ -22,6 +22,9 @@ struct MockImageWatcher {
   MOCK_METHOD0(notify_acquired_lock, void());
   MOCK_METHOD0(notify_released_lock, void());
   MOCK_METHOD0(notify_request_lock, void());
+  
+  MOCK_METHOD2(notify_quiesce, void(uint64_t, Context *));
+  MOCK_METHOD2(notify_unquiesce, void(uint64_t, Context *));
 };
 
 } // namespace librbd
index 47506a96da29ab1a2fedfc1581279dfa29a05116..e32eda54bc675464d2b93feab961635b6db95c4c 100644 (file)
@@ -62,6 +62,12 @@ public:
   typedef SnapshotCreateRequest<MockImageCtx> MockSnapshotCreateRequest;
   typedef mirror::snapshot::SetImageStateRequest<MockImageCtx> MockSetImageStateRequest;
 
+  void expect_notify_quiesce(MockImageCtx &mock_image_ctx, int r) {
+    EXPECT_CALL(*mock_image_ctx.image_watcher, notify_quiesce(_, _))
+      .WillOnce(WithArg<1>(
+                  CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue)));
+  }
+
   void expect_block_writes(MockImageCtx &mock_image_ctx) {
     EXPECT_CALL(*mock_image_ctx.io_work_queue, block_writes(_))
                   .WillOnce(CompleteContext(0, mock_image_ctx.image_ctx->op_work_queue));
@@ -136,6 +142,12 @@ public:
     EXPECT_CALL(*mock_image_ctx.io_work_queue, unblock_writes())
                   .Times(1);
   }
+
+  void expect_notify_unquiesce(MockImageCtx &mock_image_ctx, int r) {
+    EXPECT_CALL(*mock_image_ctx.image_watcher, notify_unquiesce(_, _))
+      .WillOnce(WithArg<1>(
+                  CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue)));
+  }
 };
 
 TEST_F(TestMockOperationSnapshotCreateRequest, Success) {
@@ -160,6 +172,7 @@ TEST_F(TestMockOperationSnapshotCreateRequest, Success) {
   expect_op_work_queue(mock_image_ctx);
 
   ::testing::InSequence seq;
+  expect_notify_quiesce(mock_image_ctx, 0);
   expect_block_writes(mock_image_ctx);
   expect_allocate_snap_id(mock_image_ctx, 0);
   expect_snap_create(mock_image_ctx, 0);
@@ -168,11 +181,12 @@ TEST_F(TestMockOperationSnapshotCreateRequest, Success) {
     expect_update_snap_context(mock_image_ctx);
   }
   expect_unblock_writes(mock_image_ctx);
+  expect_notify_unquiesce(mock_image_ctx, -EINVAL);
 
   C_SaferCond cond_ctx;
   MockSnapshotCreateRequest *req = new MockSnapshotCreateRequest(
     mock_image_ctx, &cond_ctx, cls::rbd::UserSnapshotNamespace(),
-      "snap1", 0, false);
+      "snap1", 0, 0, false);
   {
     std::shared_lock owner_locker{mock_image_ctx.owner_lock};
     req->send();
@@ -180,6 +194,28 @@ TEST_F(TestMockOperationSnapshotCreateRequest, Success) {
   ASSERT_EQ(0, cond_ctx.wait());
 }
 
+TEST_F(TestMockOperationSnapshotCreateRequest, NotifyQuiesceError) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockImageCtx mock_image_ctx(*ictx);
+
+  expect_op_work_queue(mock_image_ctx);
+
+  ::testing::InSequence seq;
+  expect_notify_quiesce(mock_image_ctx, -EINVAL);
+
+  C_SaferCond cond_ctx;
+  MockSnapshotCreateRequest *req = new MockSnapshotCreateRequest(
+    mock_image_ctx, &cond_ctx, cls::rbd::UserSnapshotNamespace(),
+    "snap1", 0, 0, false);
+  {
+    std::shared_lock owner_locker{mock_image_ctx.owner_lock};
+    req->send();
+  }
+  ASSERT_EQ(-EINVAL, cond_ctx.wait());
+}
+
 TEST_F(TestMockOperationSnapshotCreateRequest, AllocateSnapIdError) {
   librbd::ImageCtx *ictx;
   ASSERT_EQ(0, open_image(m_image_name, &ictx));
@@ -195,14 +231,16 @@ TEST_F(TestMockOperationSnapshotCreateRequest, AllocateSnapIdError) {
   expect_op_work_queue(mock_image_ctx);
 
   ::testing::InSequence seq;
+  expect_notify_quiesce(mock_image_ctx, 0);
   expect_block_writes(mock_image_ctx);
   expect_allocate_snap_id(mock_image_ctx, -EINVAL);
   expect_unblock_writes(mock_image_ctx);
+  expect_notify_unquiesce(mock_image_ctx, 0);
 
   C_SaferCond cond_ctx;
   MockSnapshotCreateRequest *req = new MockSnapshotCreateRequest(
     mock_image_ctx, &cond_ctx, cls::rbd::UserSnapshotNamespace(),
-      "snap1", 0, false);
+    "snap1", 0, 0, false);
   {
     std::shared_lock owner_locker{mock_image_ctx.owner_lock};
     req->send();
@@ -229,6 +267,7 @@ TEST_F(TestMockOperationSnapshotCreateRequest, CreateSnapStale) {
   expect_verify_lock_ownership(mock_image_ctx);
   expect_op_work_queue(mock_image_ctx);
 
+  expect_notify_quiesce(mock_image_ctx, 0);
   expect_block_writes(mock_image_ctx);
   expect_allocate_snap_id(mock_image_ctx, -ESTALE);
   expect_snap_create(mock_image_ctx, -ESTALE);
@@ -237,11 +276,12 @@ TEST_F(TestMockOperationSnapshotCreateRequest, CreateSnapStale) {
     expect_update_snap_context(mock_image_ctx);
   }
   expect_unblock_writes(mock_image_ctx);
+  expect_notify_unquiesce(mock_image_ctx, 0);
 
   C_SaferCond cond_ctx;
   MockSnapshotCreateRequest *req = new MockSnapshotCreateRequest(
     mock_image_ctx, &cond_ctx, cls::rbd::UserSnapshotNamespace(),
-      "snap1", 0, false);
+    "snap1", 0, 0, false);
   {
     std::shared_lock owner_locker{mock_image_ctx.owner_lock};
     req->send();
@@ -263,16 +303,18 @@ TEST_F(TestMockOperationSnapshotCreateRequest, CreateSnapError) {
   expect_verify_lock_ownership(mock_image_ctx);
   expect_op_work_queue(mock_image_ctx);
 
+  expect_notify_quiesce(mock_image_ctx, 0);
   expect_block_writes(mock_image_ctx);
   expect_allocate_snap_id(mock_image_ctx, 0);
   expect_snap_create(mock_image_ctx, -EINVAL);
   expect_release_snap_id(mock_image_ctx, 0);
   expect_unblock_writes(mock_image_ctx);
+  expect_notify_unquiesce(mock_image_ctx, 0);
 
   C_SaferCond cond_ctx;
   MockSnapshotCreateRequest *req = new MockSnapshotCreateRequest(
     mock_image_ctx, &cond_ctx, cls::rbd::UserSnapshotNamespace(),
-      "snap1", 0, false);
+    "snap1", 0, 0, false);
   {
     std::shared_lock owner_locker{mock_image_ctx.owner_lock};
     req->send();
@@ -294,16 +336,18 @@ TEST_F(TestMockOperationSnapshotCreateRequest, ReleaseSnapIdError) {
   expect_verify_lock_ownership(mock_image_ctx);
   expect_op_work_queue(mock_image_ctx);
 
+  expect_notify_quiesce(mock_image_ctx, 0);
   expect_block_writes(mock_image_ctx);
   expect_allocate_snap_id(mock_image_ctx, 0);
   expect_snap_create(mock_image_ctx, -EINVAL);
   expect_release_snap_id(mock_image_ctx, -ESTALE);
   expect_unblock_writes(mock_image_ctx);
+  expect_notify_unquiesce(mock_image_ctx, 0);
 
   C_SaferCond cond_ctx;
   MockSnapshotCreateRequest *req = new MockSnapshotCreateRequest(
     mock_image_ctx, &cond_ctx, cls::rbd::UserSnapshotNamespace(),
-      "snap1", 0, false);
+    "snap1", 0, 0, false);
   {
     std::shared_lock owner_locker{mock_image_ctx.owner_lock};
     req->send();
@@ -331,16 +375,18 @@ TEST_F(TestMockOperationSnapshotCreateRequest, SkipObjectMap) {
   expect_op_work_queue(mock_image_ctx);
 
   ::testing::InSequence seq;
+  expect_notify_quiesce(mock_image_ctx, 0);
   expect_block_writes(mock_image_ctx);
   expect_allocate_snap_id(mock_image_ctx, 0);
   expect_snap_create(mock_image_ctx, 0);
   expect_update_snap_context(mock_image_ctx);
   expect_unblock_writes(mock_image_ctx);
+  expect_notify_unquiesce(mock_image_ctx, 0);
 
   C_SaferCond cond_ctx;
   MockSnapshotCreateRequest *req = new MockSnapshotCreateRequest(
     mock_image_ctx, &cond_ctx, cls::rbd::UserSnapshotNamespace(),
-      "snap1", 0, true);
+    "snap1", 0, 0, true);
   {
     std::shared_lock owner_locker{mock_image_ctx.owner_lock};
     req->send();
@@ -370,6 +416,7 @@ TEST_F(TestMockOperationSnapshotCreateRequest, SetImageState) {
   expect_op_work_queue(mock_image_ctx);
 
   ::testing::InSequence seq;
+  expect_notify_quiesce(mock_image_ctx, 0);
   expect_block_writes(mock_image_ctx);
   expect_allocate_snap_id(mock_image_ctx, 0);
   expect_snap_create(mock_image_ctx, 0);
@@ -378,13 +425,14 @@ TEST_F(TestMockOperationSnapshotCreateRequest, SetImageState) {
   expect_set_image_state(mock_image_ctx, mock_set_image_state_request, 0);
   expect_update_snap_context(mock_image_ctx);
   expect_unblock_writes(mock_image_ctx);
+  expect_notify_unquiesce(mock_image_ctx, 0);
 
   C_SaferCond cond_ctx;
   MockSnapshotCreateRequest *req = new MockSnapshotCreateRequest(
     mock_image_ctx, &cond_ctx,
     cls::rbd::MirrorSnapshotNamespace{
       cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY, {}, "", CEPH_NOSNAP},
-    "snap1", 0, false);
+    "snap1", 0, 0, false);
   {
     std::shared_lock owner_locker{mock_image_ctx.owner_lock};
     req->send();
index 2c83af9839b4cef897bdb3fde054af9ef9331502..e0b1b2de1c1821c7fa4bd61db023723b69721e27 100644 (file)
@@ -8175,6 +8175,257 @@ TEST_F(TestLibRBD, SnapRemoveWithChildMissing)
   rados_ioctx_destroy(ioctx1);
 }
 
+TEST_F(TestLibRBD, QuiesceWatch)
+{
+  rados_ioctx_t ioctx;
+  rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx);
+
+  int order = 0;
+  std::string name = get_temp_image_name();
+  uint64_t size = 2 << 20;
+  ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order));
+
+  uint64_t handle1, handle2;
+  rbd_image_t image1, image2;
+  ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image1, NULL));
+  ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image2, NULL));
+
+  struct Watcher {
+    static void quiesce_cb(void *arg) {
+      Watcher *watcher = static_cast<Watcher *>(arg);
+      watcher->handle_quiesce();
+    }
+    static void unquiesce_cb(void *arg) {
+      Watcher *watcher = static_cast<Watcher *>(arg);
+      watcher->handle_unquiesce();
+    }
+
+    rbd_image_t &image;
+    size_t quiesce_count = 0;
+    size_t unquiesce_count = 0;
+
+    Watcher(rbd_image_t &image) : image(image) {
+    }
+
+    void handle_quiesce() {
+      ASSERT_EQ(quiesce_count, unquiesce_count);
+      quiesce_count++;
+      rbd_quiesce_complete(image);
+    }
+    void handle_unquiesce() {
+      unquiesce_count++;
+      ASSERT_EQ(quiesce_count, unquiesce_count);
+    }
+  } watcher1(image1), watcher2(image2);
+
+  ASSERT_EQ(0, rbd_quiesce_watch(image1, Watcher::quiesce_cb,
+                                 Watcher::unquiesce_cb, &watcher1, &handle1));
+  ASSERT_EQ(0, rbd_quiesce_watch(image2, Watcher::quiesce_cb,
+                                 Watcher::unquiesce_cb, &watcher2, &handle2));
+
+  ASSERT_EQ(0, rbd_snap_create(image1, "snap1"));
+  ASSERT_EQ(1U, watcher1.quiesce_count);
+  ASSERT_EQ(1U, watcher1.unquiesce_count);
+  ASSERT_EQ(1U, watcher2.quiesce_count);
+  ASSERT_EQ(1U, watcher2.unquiesce_count);
+
+  ASSERT_EQ(0, rbd_snap_create(image2, "snap2"));
+  ASSERT_EQ(2U, watcher1.quiesce_count);
+  ASSERT_EQ(2U, watcher1.unquiesce_count);
+  ASSERT_EQ(2U, watcher2.quiesce_count);
+  ASSERT_EQ(2U, watcher2.unquiesce_count);
+
+  ASSERT_EQ(0, rbd_quiesce_unwatch(image1, handle1));
+
+  ASSERT_EQ(0, rbd_snap_create(image1, "snap3"));
+  ASSERT_EQ(2U, watcher1.quiesce_count);
+  ASSERT_EQ(2U, watcher1.unquiesce_count);
+  ASSERT_EQ(3U, watcher2.quiesce_count);
+  ASSERT_EQ(3U, watcher2.unquiesce_count);
+
+  ASSERT_EQ(0, rbd_quiesce_unwatch(image2, handle2));
+
+  ASSERT_EQ(0, rbd_snap_remove(image1, "snap1"));
+  ASSERT_EQ(0, rbd_snap_remove(image1, "snap2"));
+  ASSERT_EQ(0, rbd_snap_remove(image1, "snap3"));
+  ASSERT_EQ(0, rbd_close(image1));
+  ASSERT_EQ(0, rbd_close(image2));
+  ASSERT_EQ(0, rbd_remove(ioctx, name.c_str()));
+  rados_ioctx_destroy(ioctx);
+}
+
+TEST_F(TestLibRBD, QuiesceWatchPP)
+{
+  librbd::RBD rbd;
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx));
+  std::string name = get_temp_image_name();
+  int order = 0;
+  uint64_t size = 2 << 20;
+  ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order));
+
+  {
+    librbd::Image image1, image2;
+    ASSERT_EQ(0, rbd.open(ioctx, image1, name.c_str(), NULL));
+    ASSERT_EQ(0, rbd.open(ioctx, image2, name.c_str(), NULL));
+
+    struct Watcher : public librbd::QuiesceWatchCtx {
+      librbd::Image &image;
+      size_t quiesce_count = 0;
+      size_t unquiesce_count = 0;
+
+      Watcher(librbd::Image &image) : image(image) {
+      }
+
+      void handle_quiesce() override {
+        ASSERT_EQ(quiesce_count, unquiesce_count);
+        quiesce_count++;
+        image.quiesce_complete();
+      }
+      void handle_unquiesce() override {
+        unquiesce_count++;
+        ASSERT_EQ(quiesce_count, unquiesce_count);
+      }
+    } watcher1(image1), watcher2(image2);
+    uint64_t handle1, handle2;
+
+    ASSERT_EQ(0, image1.quiesce_watch(&watcher1, &handle1));
+    ASSERT_EQ(0, image2.quiesce_watch(&watcher2, &handle2));
+
+    ASSERT_EQ(0, image1.snap_create("snap1"));
+    ASSERT_EQ(1U, watcher1.quiesce_count);
+    ASSERT_EQ(1U, watcher1.unquiesce_count);
+    ASSERT_EQ(1U, watcher2.quiesce_count);
+    ASSERT_EQ(1U, watcher2.unquiesce_count);
+
+    ASSERT_EQ(0, image2.snap_create("snap2"));
+    ASSERT_EQ(2U, watcher1.quiesce_count);
+    ASSERT_EQ(2U, watcher1.unquiesce_count);
+    ASSERT_EQ(2U, watcher2.quiesce_count);
+    ASSERT_EQ(2U, watcher2.unquiesce_count);
+
+    ASSERT_EQ(0, image1.quiesce_unwatch(handle1));
+
+    ASSERT_EQ(0, image1.snap_create("snap3"));
+    ASSERT_EQ(2U, watcher1.quiesce_count);
+    ASSERT_EQ(2U, watcher1.unquiesce_count);
+    ASSERT_EQ(3U, watcher2.quiesce_count);
+    ASSERT_EQ(3U, watcher2.unquiesce_count);
+
+    ASSERT_EQ(0, image2.quiesce_unwatch(handle2));
+
+    ASSERT_EQ(0, image1.snap_remove("snap1"));
+    ASSERT_EQ(0, image1.snap_remove("snap2"));
+    ASSERT_EQ(0, image1.snap_remove("snap3"));
+  }
+
+  ASSERT_EQ(0, rbd.remove(ioctx, name.c_str()));
+  ioctx.close();
+}
+
+TEST_F(TestLibRBD, QuiesceWatchTimeout)
+{
+  REQUIRE(!is_librados_test_stub(_rados));
+
+  ASSERT_EQ(0, _rados.conf_set("rbd_quiesce_notification_attempts", "2"));
+
+  librbd::RBD rbd;
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx));
+  std::string name = get_temp_image_name();
+  int order = 0;
+  uint64_t size = 2 << 20;
+  ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order));
+
+  {
+    librbd::Image image;
+    ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL));
+
+    struct Watcher : public librbd::QuiesceWatchCtx {
+      librbd::Image &image;
+      mutex m_lock;
+      condition_variable m_cond;
+      size_t quiesce_count = 0;
+      size_t unquiesce_count = 0;
+
+      Watcher(librbd::Image &image) : image(image) {
+      }
+
+      void handle_quiesce() override {
+        lock_guard<mutex> locker(m_lock);
+        quiesce_count++;
+        m_cond.notify_one();
+      }
+
+      void handle_unquiesce() override {
+        lock_guard<mutex> locker(m_lock);
+        unquiesce_count++;
+        m_cond.notify_one();
+      }
+
+      void wait_for_quiesce_count(size_t count) {
+        unique_lock<mutex> locker(m_lock);
+        ASSERT_TRUE(m_cond.wait_for(locker, seconds(60),
+                                    [this, count] {
+                                      return this->quiesce_count == count;
+                                    }));
+      }
+
+      void wait_for_unquiesce_count(size_t count) {
+        unique_lock<mutex> locker(m_lock);
+        ASSERT_TRUE(m_cond.wait_for(locker, seconds(60),
+                                    [this, count] {
+                                      return this->unquiesce_count == count;
+                                    }));
+      }
+    } watcher(image);
+    uint64_t handle;
+
+    ASSERT_EQ(0, image.quiesce_watch(&watcher, &handle));
+
+    thread quiesce1([&image, &watcher]() {
+      watcher.wait_for_quiesce_count(1);
+      sleep(8);
+      image.quiesce_complete();
+    });
+
+    ASSERT_EQ(0, image.snap_create("snap1"));
+    quiesce1.join();
+    ASSERT_EQ(1U, watcher.quiesce_count);
+    watcher.wait_for_unquiesce_count(1);
+    ASSERT_EQ(1U, watcher.unquiesce_count);
+
+    thread quiesce2([&image, &watcher]() {
+      watcher.wait_for_quiesce_count(2);
+      sleep(13);
+      image.quiesce_complete();
+    });
+
+    ASSERT_EQ(-ETIMEDOUT, image.snap_create("snap2"));
+    quiesce2.join();
+    ASSERT_EQ(2U, watcher.quiesce_count);
+    watcher.wait_for_unquiesce_count(2);
+    ASSERT_EQ(2U, watcher.unquiesce_count);
+
+    thread quiesce3([&image, &watcher]() {
+      watcher.wait_for_quiesce_count(3);
+      image.quiesce_complete();
+    });
+
+    ASSERT_EQ(0, image.snap_create("snap2"));
+    quiesce3.join();
+    ASSERT_EQ(3U, watcher.quiesce_count);
+    watcher.wait_for_unquiesce_count(3);
+    ASSERT_EQ(3U, watcher.unquiesce_count);
+
+    ASSERT_EQ(0, image.snap_remove("snap1"));
+    ASSERT_EQ(0, image.snap_remove("snap2"));
+  }
+
+  ASSERT_EQ(0, rbd.remove(ioctx, name.c_str()));
+  ioctx.close();
+}
+
 // poorman's ceph_assert()
 namespace ceph {
   void __ceph_assert_fail(const char *assertion, const char *file, int line,