bool AbstractAioObjectWrite::should_complete(int r)
{
- ldout(m_ictx->cct, 20) << get_write_type() << " " << this << " " << m_oid
+ ldout(m_ictx->cct, 20) << get_op_type() << " " << this << " " << m_oid
<< " " << m_object_off << "~" << m_object_len
<< " should_complete: r = " << r << dendl;
return true;
}
- send_write();
+ send_write_op();
finished = false;
break;
break;
}
- finished = send_post();
+ finished = send_post_object_map_update();
break;
case LIBRBD_AIO_WRITE_COPYUP:
complete(r);
finished = false;
} else {
- finished = send_post();
+ finished = send_post_object_map_update();
}
break;
case LIBRBD_AIO_WRITE_FLAT:
ldout(m_ictx->cct, 20) << "WRITE_FLAT" << dendl;
- finished = send_post();
+ finished = send_post_object_map_update();
break;
case LIBRBD_AIO_WRITE_ERROR:
}
void AbstractAioObjectWrite::send() {
- ldout(m_ictx->cct, 20) << "send " << get_write_type() << " " << this <<" "
+ ldout(m_ictx->cct, 20) << "send " << get_op_type() << " " << this <<" "
<< m_oid << " " << m_object_off << "~"
<< m_object_len << dendl;
- send_pre();
-}
-
-void AbstractAioObjectWrite::send_pre() {
{
RWLock::RLocker snap_lock(m_ictx->snap_lock);
if (m_ictx->object_map == nullptr) {
// should have been flushed prior to releasing lock
assert(m_ictx->exclusive_lock->is_lock_owner());
m_object_exist = m_ictx->object_map->object_may_exist(m_object_no);
+ }
+ }
+ send_write();
+}
+
+void AbstractAioObjectWrite::send_pre_object_map_update() {
+ ldout(m_ictx->cct, 20) << __func__ << dendl;
+
+ {
+ RWLock::RLocker snap_lock(m_ictx->snap_lock);
+ if (m_ictx->object_map != nullptr) {
uint8_t new_state;
pre_object_map_update(&new_state);
-
RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
- ldout(m_ictx->cct, 20) << "send_pre " << this << " " << m_oid << " "
+ ldout(m_ictx->cct, 20) << __func__ << this << " " << m_oid << " "
<< m_object_off << "~" << m_object_len
<< dendl;
m_state = LIBRBD_AIO_WRITE_PRE;
}
}
- // no object map update required
- send_write();
+ send_write_op();
}
-bool AbstractAioObjectWrite::send_post() {
+bool AbstractAioObjectWrite::send_post_object_map_update() {
RWLock::RLocker snap_locker(m_ictx->snap_lock);
if (m_ictx->object_map == nullptr || !post_object_map_update()) {
return true;
assert(m_ictx->exclusive_lock->is_lock_owner());
RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
- ldout(m_ictx->cct, 20) << "send_post " << this << " " << m_oid << " "
+ ldout(m_ictx->cct, 20) << __func__ << this << " " << m_oid << " "
<< m_object_off << "~" << m_object_len << dendl;
m_state = LIBRBD_AIO_WRITE_POST;
m_state = LIBRBD_AIO_WRITE_GUARD;
handle_write_guard();
} else {
- send_write_op(true);
+ send_pre_object_map_update();
}
}
m_ictx->copyup_list_lock.Unlock();
}
}
-void AbstractAioObjectWrite::send_write_op(bool write_guard)
+void AbstractAioObjectWrite::send_write_op()
{
m_state = LIBRBD_AIO_WRITE_FLAT;
- if (write_guard)
+ if (m_guard) {
guard_write();
+ }
+
add_write_ops(&m_write);
assert(m_write.size() != 0);
<< " object exist " << m_object_exist
<< " write_full " << write_full << dendl;
if (write_full && !has_parent()) {
- send_write_op(false);
- } else {
- AbstractAioObjectWrite::send_write();
+ m_guard = false;
}
+
+ AbstractAioObjectWrite::send_write();
}
void AioObjectRemove::guard_write() {
void AioObjectRemove::send_write() {
ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid << " "
<< m_object_off << "~" << m_object_len << dendl;
- send_write_op(true);
+ send_pre_object_map_update();
}
+
void AioObjectTruncate::send_write() {
ldout(m_ictx->cct, 20) << "send_write " << this << " " << m_oid
<< " truncate " << m_object_off << dendl;
return m_has_parent;
}
+ virtual bool is_op_payload_empty() const {
+ return false;
+ }
+
+ virtual const char *get_op_type() const = 0;
+ virtual bool pre_object_map_update(uint8_t *new_state) = 0;
+
protected:
bool compute_parent_extents();
ExtentMap &get_extent_map() {
return m_ext_map;
}
+
+ const char *get_op_type() const {
+ return "read";
+ }
+
+ bool pre_object_map_update(uint8_t *new_state) {
+ return false;
+ }
+
private:
Extents m_buffer_extents;
bool m_tried_parent;
* Writes go through the following state machine to deal with
* layering and the object map:
*
- * <start>
- * . |
- * . |
- * . \---> LIBRBD_AIO_WRITE_PRE
- * . | |
- * . . . . . . | . . . . | . . . . . . . . . . .
- * . | -or- | .
- * . | | v
- * . | \----------------> LIBRBD_AIO_WRITE_FLAT . . .
- * . | | .
- * v v need copyup | .
+ * <start>
+ * |
+ * |\
+ * | \ -or-
+ * | ---------------------------------> LIBRBD_AIO_WRITE_PRE
+ * | . |
+ * | . |
+ * | . v
+ * | . . . . > LIBRBD_AIO_WRITE_FLAT. . .
+ * | | .
+ * | | .
+ * | | .
+ * v need copyup (copyup performs pre) | .
* LIBRBD_AIO_WRITE_GUARD -----------> LIBRBD_AIO_WRITE_COPYUP | .
* . | | . | .
* . | | . | .
uint64_t m_snap_seq;
std::vector<librados::snap_t> m_snaps;
bool m_object_exist;
+ bool m_guard = true;
virtual void add_write_ops(librados::ObjectWriteOperation *wr) = 0;
- virtual const char* get_write_type() const = 0;
virtual void guard_write();
- virtual void pre_object_map_update(uint8_t *new_state) = 0;
virtual bool post_object_map_update() {
return false;
}
virtual void send_write();
- virtual void send_write_op(bool write_guard);
+ virtual void send_write_op();
virtual void handle_write_guard();
+ void send_pre_object_map_update();
+
private:
- void send_pre();
- bool send_post();
+ bool send_post_object_map_update();
void send_copyup();
};
m_write_data(data), m_op_flags(op_flags) {
}
-protected:
- virtual void add_write_ops(librados::ObjectWriteOperation *wr);
+ bool is_op_payload_empty() const {
+ return (m_write_data.length() == 0);
+ }
- virtual const char* get_write_type() const {
+ virtual const char *get_op_type() const {
return "write";
}
- virtual void pre_object_map_update(uint8_t *new_state) {
+ virtual bool pre_object_map_update(uint8_t *new_state) {
*new_state = OBJECT_EXISTS;
+ return true;
}
+
+protected:
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr);
+
virtual void send_write();
private:
m_object_state(OBJECT_NONEXISTENT) {
}
-protected:
- virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
- if (has_parent()) {
- wr->truncate(0);
- } else {
- wr->remove();
- }
- }
-
- virtual const char* get_write_type() const {
+ virtual const char* get_op_type() const {
if (has_parent()) {
return "remove (trunc)";
}
return "remove";
}
- virtual void pre_object_map_update(uint8_t *new_state) {
+
+ virtual bool pre_object_map_update(uint8_t *new_state) {
if (has_parent()) {
m_object_state = OBJECT_EXISTS;
} else {
m_object_state = OBJECT_PENDING;
}
*new_state = m_object_state;
+ return true;
}
virtual bool post_object_map_update() {
virtual void guard_write();
virtual void send_write();
+protected:
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+ if (has_parent()) {
+ wr->truncate(0);
+ } else {
+ wr->remove();
+ }
+ }
+
private:
uint8_t m_object_state;
};
: AbstractAioObjectWrite(ictx, oid, object_no, 0, 0, snapc, completion,
true), m_post_object_map_update(post_object_map_update) { }
-protected:
- virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
- wr->remove();
- }
-
- virtual const char* get_write_type() const {
+ virtual const char* get_op_type() const {
return "remove (trim)";
}
- virtual void pre_object_map_update(uint8_t *new_state) {
+ virtual bool pre_object_map_update(uint8_t *new_state) {
*new_state = OBJECT_PENDING;
+ return true;
}
virtual bool post_object_map_update() {
return m_post_object_map_update;
}
+protected:
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+ wr->remove();
+ }
+
private:
bool m_post_object_map_update;
};
completion, true) {
}
-protected:
- virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
- wr->truncate(m_object_off);
- }
-
- virtual const char* get_write_type() const {
+ virtual const char* get_op_type() const {
return "truncate";
}
- virtual void pre_object_map_update(uint8_t *new_state) {
+ virtual bool pre_object_map_update(uint8_t *new_state) {
if (!m_object_exist && !has_parent())
*new_state = OBJECT_NONEXISTENT;
else
*new_state = OBJECT_EXISTS;
+ return true;
}
+
virtual void send_write();
+
+protected:
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+ wr->truncate(m_object_off);
+ }
};
class AioObjectZero : public AbstractAioObjectWrite {
snapc, completion, true) {
}
-protected:
- virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
- wr->zero(m_object_off, m_object_len);
- }
-
- virtual const char* get_write_type() const {
+ virtual const char* get_op_type() const {
return "zero";
}
- virtual void pre_object_map_update(uint8_t *new_state) {
+ virtual bool pre_object_map_update(uint8_t *new_state) {
*new_state = OBJECT_EXISTS;
+ return true;
+ }
+
+protected:
+ virtual void add_write_ops(librados::ObjectWriteOperation *wr) {
+ wr->zero(m_object_off, m_object_len);
}
};
return false;
}
+bool CopyupRequest::is_copyup_required() {
+ bool noop = true;
+ for (const AioObjectRequest<> *req : m_pending_requests) {
+ if (!req->is_op_payload_empty()) {
+ noop = false;
+ break;
+ }
+ }
+
+ return (m_copyup_data.is_zero() && noop);
+}
+
void CopyupRequest::send()
{
m_state = STATE_READ_FROM_PARENT;
ldout(cct, 20) << "READ_FROM_PARENT" << dendl;
remove_from_list();
if (r >= 0 || r == -ENOENT) {
- return send_object_map();
+ if (is_copyup_required()) {
+ ldout(cct, 20) << __func__ << " " << this << " nop, skipping" << dendl;
+ return true;
+ }
+
+ return send_object_map_head();
}
break;
+ case STATE_OBJECT_MAP_HEAD:
+ ldout(cct, 20) << "OBJECT_MAP_HEAD" << dendl;
+ assert(r == 0);
+ return send_object_map();
+
case STATE_OBJECT_MAP:
ldout(cct, 20) << "OBJECT_MAP" << dendl;
assert(r == 0);
m_ictx->copyup_list.erase(it);
}
-bool CopyupRequest::send_object_map() {
+bool CopyupRequest::send_object_map_head() {
+ CephContext *cct = m_ictx->cct;
+ ldout(cct, 20) << __func__ << " " << this << dendl;
+
+ m_state = STATE_OBJECT_MAP_HEAD;
+
{
+ RWLock::RLocker owner_locker(m_ictx->owner_lock);
RWLock::RLocker snap_locker(m_ictx->snap_lock);
if (m_ictx->object_map != nullptr) {
bool copy_on_read = m_pending_requests.empty();
assert(m_ictx->exclusive_lock->is_lock_owner());
RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
- if (copy_on_read &&
- (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) {
- // CoW already updates the HEAD object map
- m_snap_ids.push_back(CEPH_NOSNAP);
- }
if (!m_ictx->snaps.empty()) {
m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(),
m_ictx->snaps.end());
}
+ if (copy_on_read &&
+ (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) {
+ m_snap_ids.insert(m_snap_ids.begin(), CEPH_NOSNAP);
+ object_map_locker.unlock();
+ snap_locker.unlock();
+ owner_locker.unlock();
+ return send_object_map();
+ }
+
+ bool may_update = false;
+ uint8_t new_state, current_state;
+
+ vector<AioObjectRequest<> *>::reverse_iterator r_it = m_pending_requests.rbegin();
+ for (; r_it != m_pending_requests.rend(); ++r_it) {
+ AioObjectRequest<> *req = *r_it;
+ if (!req->pre_object_map_update(&new_state)) {
+ continue;
+ }
+
+ current_state = (*m_ictx->object_map)[m_object_no];
+ ldout(cct, 20) << __func__ << " " << req->get_op_type() << " object no "
+ << m_object_no << " current state "
+ << stringify(static_cast<uint32_t>(current_state))
+ << " new state " << stringify(static_cast<uint32_t>(new_state))
+ << dendl;
+ may_update = true;
+ break;
+ }
+
+ if (may_update && (new_state != current_state) &&
+ m_ictx->object_map->aio_update<CopyupRequest>(
+ CEPH_NOSNAP, m_object_no, new_state, current_state, this)) {
+ return false;
+ }
}
}
+ return send_object_map();
+}
+
+bool CopyupRequest::send_object_map() {
// avoid possible recursive lock attempts
if (m_snap_ids.empty()) {
// no object map update required
} else {
// update object maps for HEAD and all existing snapshots
ldout(m_ictx->cct, 20) << __func__ << " " << this
- << ": oid " << m_oid
- << dendl;
+ << ": oid " << m_oid << dendl;
m_state = STATE_OBJECT_MAP;
RWLock::RLocker owner_locker(m_ictx->owner_lock);
*
* @verbatim
*
- * <start>
- * |
- * v
- * STATE_READ_FROM_PARENT
- * . . |
- * . . v
- * . . STATE_OBJECT_MAP . .
- * . . | .
- * . . v .
- * . . . > STATE_COPYUP .
- * . | .
- * . v .
- * . . . . > <finish> < . . . . .
+ * <start>
+ * |
+ * v
+ * . . .STATE_READ_FROM_PARENT. . .
+ * . . | .
+ * . . v .
+ * . . STATE_OBJECT_MAP_HEAD v (copy on read /
+ * . . | . no HEAD rev. update)
+ * v v v .
+ * . . STATE_OBJECT_MAP. . . . .
+ * . . |
+ * . . v
+ * . . . . > STATE_COPYUP
+ * . |
+ * . v
+ * . . . . > <finish>
*
* @endverbatim
*
*/
enum State {
STATE_READ_FROM_PARENT,
- STATE_OBJECT_MAP,
+ STATE_OBJECT_MAP_HEAD, // only update the HEAD revision
+ STATE_OBJECT_MAP, // update HEAD+snaps (if any)
STATE_COPYUP
};
void remove_from_list();
+ bool send_object_map_head();
bool send_object_map();
bool send_copyup();
+ bool is_copyup_required();
};
} // namespace librbd