From 5559a5fe0f593c0e7af47c6d8a8359dbcbfd1f08 Mon Sep 17 00:00:00 2001 From: Guang Yang Date: Mon, 15 Sep 2014 11:41:06 +0000 Subject: [PATCH] For pgls OP, get/put budget on per list session basis, instead of per OP basis, which could lead to deadlock. Signed-off-by: Guang Yang (yguang@yahoo-inc.com) (cherry picked from commit 0f884fdb31a26f241401ce2a9329dc0f2c4eccc7) Conflicts: src/librados/IoCtxImpl.cc In firefly, return value of objecter->pg_read() is not assigned to c->tid. src/osdc/Objecter.cc src/osdc/Objecter.h There is no _op_submit_with_budget() function in firefly. There is no Objecter::_finish_op() function in firefly. In firefly, _take_op_budget() is called take_op_budget(). --- src/librados/IoCtxImpl.cc | 4 ++-- src/osdc/Objecter.cc | 29 ++++++++++++++++++++---- src/osdc/Objecter.h | 46 ++++++++++++++++++++++++++++++++------- 3 files changed, 65 insertions(+), 14 deletions(-) diff --git a/src/librados/IoCtxImpl.cc b/src/librados/IoCtxImpl.cc index 6fc22ade16c04..887b390eca1d5 100644 --- a/src/librados/IoCtxImpl.cc +++ b/src/librados/IoCtxImpl.cc @@ -815,7 +815,7 @@ int librados::IoCtxImpl::hit_set_list(uint32_t hash, AioCompletionImpl *c, ::ObjectOperation rd; rd.hit_set_ls(pls, NULL); object_locator_t oloc(poolid); - objecter->pg_read(hash, oloc, rd, NULL, 0, onack, NULL); + objecter->pg_read(hash, oloc, rd, NULL, 0, onack, NULL, NULL); return 0; } @@ -831,7 +831,7 @@ int librados::IoCtxImpl::hit_set_get(uint32_t hash, AioCompletionImpl *c, ::ObjectOperation rd; rd.hit_set_get(utime_t(stamp, 0), pbl, 0); object_locator_t oloc(poolid); - objecter->pg_read(hash, oloc, rd, NULL, 0, onack, NULL); + objecter->pg_read(hash, oloc, rd, NULL, 0, onack, NULL, NULL); return 0; } diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index fcf211f71a243..a72f40e690927 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -1220,7 +1220,7 @@ public: } }; -ceph_tid_t Objecter::op_submit(Op *op) +ceph_tid_t Objecter::op_submit(Op *op, int *ctx_budget) { assert(client_lock.is_locked()); assert(initialized); @@ -1236,7 +1236,14 @@ ceph_tid_t Objecter::op_submit(Op *op) // throttle. before we look at any state, because // take_op_budget() may drop our lock while it blocks. - take_op_budget(op); + if (!op->ctx_budgeted || (ctx_budget && (*ctx_budget == -1))) { + int op_budget = take_op_budget(op); + // take and pass out the budget for the first OP + // in the context session + if (ctx_budget && (*ctx_budget == -1)) { + *ctx_budget = op_budget; + } + } return _op_submit(op); } @@ -1641,7 +1648,7 @@ void Objecter::finish_op(Op *op) ldout(cct, 15) << "finish_op " << op->tid << dendl; op->session_item.remove_myself(); - if (op->budgeted) + if (!op->ctx_budgeted && op->budgeted) put_op_budget(op); ops.erase(op->tid); @@ -1946,6 +1953,10 @@ void Objecter::list_objects(ListContext *list_context, Context *onfinish) } } if (list_context->at_end_of_pool) { + // release the listing context's budget once all + // OPs (in the session) are finished + put_list_context_budget(list_context); + onfinish->complete(0); return; } @@ -1974,7 +1985,7 @@ void Objecter::list_objects(ListContext *list_context, Context *onfinish) C_List *onack = new C_List(list_context, onfinish, this); object_locator_t oloc(list_context->pool_id, list_context->nspace); pg_read(list_context->current_pg, oloc, op, - &list_context->bl, 0, onack, &onack->epoch); + &list_context->bl, 0, onack, &onack->epoch, &list_context->ctx_budget); } void Objecter::_list_reply(ListContext *list_context, int r, @@ -2020,6 +2031,9 @@ void Objecter::_list_reply(ListContext *list_context, int r, } if (!list_context->list.empty()) { ldout(cct, 20) << " returning results so far" << dendl; + // release the listing context's budget once all + // OPs (in the session) are finished + put_list_context_budget(list_context); final_finish->complete(0); return; } @@ -2028,6 +2042,13 @@ void Objecter::_list_reply(ListContext *list_context, int r, list_objects(list_context, final_finish); } +void Objecter::put_list_context_budget(ListContext *list_context) { + if (list_context->ctx_budget >= 0) { + ldout(cct, 10) << " release listing context's budget " << list_context->ctx_budget << dendl; + put_op_budget_bytes(list_context->ctx_budget); + list_context->ctx_budget = -1; + } + } //snapshots diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index db60095644ed3..ee9cea55faa7a 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -1142,6 +1142,11 @@ public: epoch_t last_force_resend; + /// true if the throttle budget is get/put on a series of OPs, instead of + /// per OP basis, when this flag is set, the budget is acquired before sending + /// the very first OP of the series and released upon receiving the last OP reply. + bool ctx_budgeted; + Op(const object_t& o, const object_locator_t& ol, vector& op, int f, Context *ac, Context *co, version_t *ov) : session(NULL), session_item(this), incarnation(0), @@ -1156,7 +1161,8 @@ public: map_dne_bound(0), budgeted(false), should_resend(true), - last_force_resend(0) { + last_force_resend(0), + ctx_budgeted(false) { ops.swap(op); /* initialize out_* to match op vector */ @@ -1259,11 +1265,24 @@ public: bufferlist extra_info; + // The budget associated with this context, once it is set (>= 0), + // the budget is not get/released on OP basis, instead the budget + // is acquired before sending the first OP and released upon receiving + // the last op reply. + int ctx_budget; + ListContext() : current_pg(0), current_pg_epoch(0), starting_pg_num(0), at_end_of_pool(false), at_end_of_pg(false), pool_id(0), - pool_snap_seq(0), max_entries(0) {} + pool_snap_seq(0), + max_entries(0), + nspace(), + bl(), + list(), + filter(), + extra_info(), + ctx_budget(-1) {} bool at_end() const { return at_end_of_pool; @@ -1529,7 +1548,7 @@ public: */ int calc_op_budget(Op *op); void throttle_op(Op *op, int op_size=0); - void take_op_budget(Op *op) { + int take_op_budget(Op *op) { int op_budget = calc_op_budget(op); if (keep_balanced_budget) { throttle_op(op, op_budget); @@ -1538,13 +1557,19 @@ public: op_throttle_ops.take(1); } op->budgeted = true; + return op_budget; + } + void put_op_budget_bytes(int op_budget) { + assert(op_budget >= 0); + op_throttle_bytes.put(op_budget); + op_throttle_ops.put(1); } void put_op_budget(Op *op) { assert(op->budgeted); int op_budget = calc_op_budget(op); - op_throttle_bytes.put(op_budget); - op_throttle_ops.put(1); + put_op_budget_bytes(op_budget); } + void put_list_context_budget(ListContext *list_context); Throttle op_throttle_bytes, op_throttle_ops; public: @@ -1615,7 +1640,7 @@ private: // public interface public: - ceph_tid_t op_submit(Op *op); + ceph_tid_t op_submit(Op *op, int *ctx_budget = NULL); bool is_active() { return !(ops.empty() && linger_ops.empty() && poolstat_ops.empty() && statfs_ops.empty()); } @@ -1719,7 +1744,8 @@ public: ObjectOperation& op, bufferlist *pbl, int flags, Context *onack, - epoch_t *reply_epoch) { + epoch_t *reply_epoch, + int *ctx_budget) { Op *o = new Op(object_t(), oloc, op.ops, flags | global_op_flags | CEPH_OSD_FLAG_READ, onack, NULL, NULL); @@ -1732,7 +1758,11 @@ public: o->out_handler.swap(op.out_handler); o->out_rval.swap(op.out_rval); o->reply_epoch = reply_epoch; - return op_submit(o); + if (ctx_budget) { + // budget is tracked by listing context + o->ctx_budgeted = true; + } + return op_submit(o, ctx_budget); } ceph_tid_t linger_mutate(const object_t& oid, const object_locator_t& oloc, ObjectOperation& op, -- 2.39.5