From: Mike Ryan Date: Tue, 4 Sep 2012 23:37:54 +0000 (-0700) Subject: pg: store scrubber state in its own object X-Git-Tag: v0.53~180^2~1^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a08a28d01901f25bb3fcf30d2a0e284427448510;p=ceph.git pg: store scrubber state in its own object Signed-off-by: Mike Ryan --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 7fea23ba5a39..3e8135b47507 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -78,15 +78,6 @@ PG::PG(OSDService *o, OSDMapRef curmap, pg_stats_valid(false), osr(osd->osr_registry.lookup_or_create(p, (stringify(p)))), finish_sync_event(NULL), - finalizing_scrub(false), - scrub_block_writes(false), - scrub_active(false), - scrub_reserved(false), scrub_reserve_failed(false), - scrub_waiting_on(0), - active_rep_scrub(0), - scrub_is_chunky(false), - scrub_errors(0), - scrub_fixed(0), active_pushes(0), recovery_state(this) { @@ -985,7 +976,7 @@ void PG::clear_primary_state() log.reset_recovery_pointers(); - scrub_reserved_peers.clear(); + scrubber.reserved_peers.clear(); osd->recovery_wq.dequeue(this); osd->snap_trim_wq.dequeue(this); } @@ -2689,22 +2680,22 @@ void PG::requeue_ops(list &ls) /* * when holding pg and sched_scrub_lock, then the states are: * scheduling: - * scrub_reserved = true + * scrubber.reserved = true * scrub_rserved_peers includes whoami * osd->scrub_pending++ * scheduling, replica declined: - * scrub_reserved = true - * scrub_reserved_peers includes -1 + * scrubber.reserved = true + * scrubber.reserved_peers includes -1 * osd->scrub_pending++ * pending: - * scrub_reserved = true - * scrub_reserved_peers.size() == acting.size(); + * scrubber.reserved = true + * scrubber.reserved_peers.size() == acting.size(); * pg on scrub_wq * osd->scrub_pending++ * scrubbing: - * scrub_reserved = false; - * scrub_reserved_peers empty - * osd->scrub_active++ + * scrubber.reserved = false; + * scrubber.reserved_peers empty + * osd->scrubber.active++ */ // returns false if waiting for a reply @@ -2722,30 +2713,30 @@ bool PG::sched_scrub() } bool ret = false; - if (!scrub_reserved) { - assert(scrub_reserved_peers.empty()); + if (!scrubber.reserved) { + assert(scrubber.reserved_peers.empty()); if (osd->inc_scrubs_pending()) { dout(20) << "sched_scrub: reserved locally, reserving replicas" << dendl; - scrub_reserved = true; - scrub_reserved_peers.insert(osd->whoami); + scrubber.reserved = true; + scrubber.reserved_peers.insert(osd->whoami); scrub_reserve_replicas(); } else { dout(20) << "sched_scrub: failed to reserve locally" << dendl; } } - if (scrub_reserved) { - if (scrub_reserve_failed) { + if (scrubber.reserved) { + if (scrubber.reserve_failed) { dout(20) << "sched_scrub: failed, a peer declined" << dendl; clear_scrub_reserved(); scrub_unreserve_replicas(); ret = true; - } else if (scrub_reserved_peers.size() == acting.size()) { + } else if (scrubber.reserved_peers.size() == acting.size()) { dout(20) << "sched_scrub: success, reserved self and replicas" << dendl; queue_scrub(); ret = true; } else { - // none declined, since scrub_reserved is set - dout(20) << "sched_scrub: reserved " << scrub_reserved_peers << ", waiting for replicas" << dendl; + // none declined, since scrubber.reserved is set + dout(20) << "sched_scrub: reserved " << scrubber.reserved_peers << ", waiting for replicas" << dendl; } } @@ -2772,37 +2763,37 @@ void PG::sub_op_scrub_map(OpRequestRef op) dout(10) << " got osd." << from << " scrub map" << dendl; bufferlist::iterator p = m->get_data().begin(); - if (scrub_is_chunky) { // chunky scrub - scrub_received_maps[from].decode(p, info.pgid.pool()); - dout(10) << "map version is " << scrub_received_maps[from].valid_through << dendl; + if (scrubber.is_chunky) { // chunky scrub + scrubber.received_maps[from].decode(p, info.pgid.pool()); + dout(10) << "map version is " << scrubber.received_maps[from].valid_through << dendl; } else { // classic scrub - if (scrub_received_maps.count(from)) { + if (scrubber.received_maps.count(from)) { ScrubMap incoming; incoming.decode(p, info.pgid.pool()); dout(10) << "from replica " << from << dendl; dout(10) << "map version is " << incoming.valid_through << dendl; - scrub_received_maps[from].merge_incr(incoming); + scrubber.received_maps[from].merge_incr(incoming); } else { - scrub_received_maps[from].decode(p, info.pgid.pool()); + scrubber.received_maps[from].decode(p, info.pgid.pool()); } } - --scrub_waiting_on; - scrub_waiting_on_whom.erase(from); + --scrubber.waiting_on; + scrubber.waiting_on_whom.erase(from); - if (scrub_waiting_on == 0) { - if (scrub_is_chunky) { // chunky scrub + if (scrubber.waiting_on == 0) { + if (scrubber.is_chunky) { // chunky scrub osd->scrub_wq.queue(this); } else { // classic scrub - if (finalizing_scrub) { // incremental lists received + if (scrubber.finalizing) { // incremental lists received osd->scrub_finalize_wq.queue(this); } else { // initial lists received - scrub_block_writes = true; + scrubber.block_writes = true; if (last_update_applied == info.last_update) { - finalizing_scrub = true; + scrubber.finalizing = true; scrub_gather_replica_maps(); - ++scrub_waiting_on; - scrub_waiting_on_whom.insert(osd->whoami); + ++scrubber.waiting_on; + scrubber.waiting_on_whom.insert(osd->whoami); osd->scrub_wq.queue(this); } } @@ -2866,17 +2857,17 @@ void PG::sub_op_scrub_reserve(OpRequestRef op) assert(m->get_header().type == MSG_OSD_SUBOP); dout(7) << "sub_op_scrub_reserve" << dendl; - if (scrub_reserved) { + if (scrubber.reserved) { dout(10) << "Ignoring reserve request: Already reserved" << dendl; return; } op->mark_started(); - scrub_reserved = osd->inc_scrubs_pending(); + scrubber.reserved = osd->inc_scrubs_pending(); MOSDSubOpReply *reply = new MOSDSubOpReply(m, 0, get_osdmap()->get_epoch(), CEPH_OSD_FLAG_ACK); - ::encode(scrub_reserved, reply->get_data()); + ::encode(scrubber.reserved, reply->get_data()); osd->cluster_messenger->send_message(reply, m->get_connection()); } @@ -2886,7 +2877,7 @@ void PG::sub_op_scrub_reserve_reply(OpRequestRef op) assert(reply->get_header().type == MSG_OSD_SUBOPREPLY); dout(7) << "sub_op_scrub_reserve_reply" << dendl; - if (!scrub_reserved) { + if (!scrubber.reserved) { dout(10) << "ignoring obsolete scrub reserve reply" << dendl; return; } @@ -2898,16 +2889,16 @@ void PG::sub_op_scrub_reserve_reply(OpRequestRef op) bool reserved; ::decode(reserved, p); - if (scrub_reserved_peers.find(from) != scrub_reserved_peers.end()) { + if (scrubber.reserved_peers.find(from) != scrubber.reserved_peers.end()) { dout(10) << " already had osd." << from << " reserved" << dendl; } else { if (reserved) { dout(10) << " osd." << from << " scrub reserve = success" << dendl; - scrub_reserved_peers.insert(from); + scrubber.reserved_peers.insert(from); } else { /* One decline stops this pg from being scheduled for scrubbing. */ dout(10) << " osd." << from << " scrub reserve = fail" << dendl; - scrub_reserve_failed = true; + scrubber.reserve_failed = true; } sched_scrub(); } @@ -2932,7 +2923,7 @@ void PG::sub_op_scrub_stop(OpRequestRef op) dout(7) << "sub_op_scrub_stop" << dendl; // see comment in sub_op_scrub_reserve - scrub_reserved = false; + scrubber.reserved = false; MOSDSubOpReply *reply = new MOSDSubOpReply(m, 0, get_osdmap()->get_epoch(), CEPH_OSD_FLAG_ACK); osd->cluster_messenger->send_message(reply, m->get_connection()); @@ -2941,11 +2932,11 @@ void PG::sub_op_scrub_stop(OpRequestRef op) void PG::clear_scrub_reserved() { osd->scrub_wq.dequeue(this); - scrub_reserved_peers.clear(); - scrub_reserve_failed = false; + scrubber.reserved_peers.clear(); + scrubber.reserve_failed = false; - if (scrub_reserved) { - scrub_reserved = false; + if (scrubber.reserved) { + scrubber.reserved = false; osd->dec_scrubs_pending(); } } @@ -3118,7 +3109,7 @@ void PG::repair_object(const hobject_t& soid, ScrubMap::object *po, int bad_peer * If msg->scrub_from is not set, replica_scrub calls build_scrubmap to * build a complete map (with the pg lock dropped). * - * If msg->scrub_from is set, replica_scrub sets finalizing_scrub. + * If msg->scrub_from is set, replica_scrub sets scrubber.finalizing. * Similarly to scrub, if last_update_applied is behind info.last_update * replica_scrub returns to be requeued by sub_op_modify_applied. * replica_scrub then builds an incremental scrub map with the @@ -3132,13 +3123,13 @@ void PG::repair_object(const hobject_t& soid, ScrubMap::object *po, int bad_peer */ void PG::replica_scrub(MOSDRepScrub *msg) { - assert(!active_rep_scrub); + assert(!scrubber.active_rep_scrub); dout(7) << "replica_scrub" << dendl; if (msg->map_epoch < info.history.same_interval_since) { - if (finalizing_scrub) { + if (scrubber.finalizing) { dout(10) << "scrub pg changed, aborting" << dendl; - finalizing_scrub = 0; + scrubber.finalizing = 0; } else { dout(10) << "replica_scrub discarding old replica_scrub from " << msg->map_epoch << " < " << info.history.same_interval_since @@ -3152,14 +3143,14 @@ void PG::replica_scrub(MOSDRepScrub *msg) if (msg->chunky) { // chunky scrub if (last_update_applied < msg->scrub_to) { dout(10) << "waiting for last_update_applied to catch up" << dendl; - active_rep_scrub = msg; + scrubber.active_rep_scrub = msg; msg->get(); return; } if (active_pushes > 0) { dout(10) << "waiting for active pushes to finish" << dendl; - active_rep_scrub = msg; + scrubber.active_rep_scrub = msg; msg->get(); return; } @@ -3168,19 +3159,19 @@ void PG::replica_scrub(MOSDRepScrub *msg) } else { if (msg->scrub_from > eversion_t()) { - if (finalizing_scrub) { + if (scrubber.finalizing) { assert(last_update_applied == info.last_update); assert(last_update_applied == msg->scrub_to); } else { - finalizing_scrub = 1; + scrubber.finalizing = 1; if (last_update_applied != msg->scrub_to) { - active_rep_scrub = msg; + scrubber.active_rep_scrub = msg; msg->get(); return; } } build_inc_scrub_map(map, msg->scrub_from); - finalizing_scrub = 0; + scrubber.finalizing = 0; } else { build_scrub_map(map); } @@ -3225,25 +3216,25 @@ void PG::scrub() return; } - // when we're starting a scrub, we need to determine which type of scrub to do - if (!scrub_active) { + // when the scrub is not active, we need to determine which type of scrub to do + if (!scrubber.active) { OSDMapRef curmap = osd->get_osdmap(); - scrub_is_chunky = true; + scrubber.is_chunky = true; for (unsigned i=1; icluster_messenger->get_connection(curmap->get_cluster_inst(acting[i])); if (!(con->features & CEPH_FEATURE_CHUNKY_SCRUB)) { dout(20) << "OSD " << acting[i] << " does not support chunky scrubs, falling back to classic" << dendl; - scrub_is_chunky = false; + scrubber.is_chunky = false; break; } } - dout(10) << "starting a new " << (scrub_is_chunky ? "chunky" : "classic") << " scrub" << dendl; + dout(10) << "starting a new " << (scrubber.is_chunky ? "chunky" : "classic") << " scrub" << dendl; } - if (scrub_is_chunky) { + if (scrubber.is_chunky) { chunky_scrub(); } else { classic_scrub(); @@ -3271,8 +3262,8 @@ void PG::scrub() * * Relevant variables: * - * scrub_waiting_on (int) - * scrub_waiting_on_whom + * scrubber.waiting_on (int) + * scrubber.waiting_on_whom * Number of people who still need to build an initial/incremental scrub map. * This is decremented in sub_op_scrub_map. * @@ -3284,7 +3275,7 @@ void PG::scrub() * * This is checked in op_applied. * - * scrub_block_writes + * scrubber.block_writes * Flag to determine if writes are blocked. * * finalizing scrub @@ -3293,30 +3284,30 @@ void PG::scrub() */ void PG::classic_scrub() { - if (!scrub_active) { + if (!scrubber.active) { dout(10) << "scrub start" << dendl; - scrub_active = true; + scrubber.active = true; update_stats(); - scrub_received_maps.clear(); - scrub_epoch_start = info.history.same_interval_since; + scrubber.received_maps.clear(); + scrubber.epoch_start = info.history.same_interval_since; osd->sched_scrub_lock.Lock(); - if (scrub_reserved) { + if (scrubber.reserved) { --(osd->scrubs_pending); assert(osd->scrubs_pending >= 0); - scrub_reserved = false; - scrub_reserved_peers.clear(); + scrubber.reserved = false; + scrubber.reserved_peers.clear(); } ++(osd->scrubs_active); osd->sched_scrub_lock.Unlock(); - /* scrub_waiting_on == 0 iff all replicas have sent the requested maps and + /* scrubber.waiting_on == 0 iff all replicas have sent the requested maps and * the primary has done a final scrub (which in turn can only happen if * last_update_applied == info.last_update) */ - scrub_waiting_on = acting.size(); - scrub_waiting_on_whom.insert(acting.begin(), acting.end()); + scrubber.waiting_on = acting.size(); + scrubber.waiting_on_whom.insert(acting.begin(), acting.end()); // request maps from replicas for (unsigned i=1; iwhoami); + --scrubber.waiting_on; + scrubber.waiting_on_whom.erase(osd->whoami); - if (scrub_waiting_on == 0) { + if (scrubber.waiting_on == 0) { // the replicas have completed their scrub map, so lock out writes - scrub_block_writes = true; + scrubber.block_writes = true; } else { dout(10) << "wait for replicas to build initial scrub map" << dendl; return; @@ -3350,35 +3341,35 @@ void PG::classic_scrub() return; } - // fall through if last_update_applied == info.last_update and scrub_waiting_on == 0 + // fall through if last_update_applied == info.last_update and scrubber.waiting_on == 0 // request incrementals from replicas scrub_gather_replica_maps(); - ++scrub_waiting_on; - scrub_waiting_on_whom.insert(osd->whoami); + ++scrubber.waiting_on; + scrubber.waiting_on_whom.insert(osd->whoami); } dout(10) << "clean up scrub" << dendl; assert(last_update_applied == info.last_update); - finalizing_scrub = true; + scrubber.finalizing = true; - if (scrub_epoch_start != info.history.same_interval_since) { + if (scrubber.epoch_start != info.history.same_interval_since) { dout(10) << "scrub pg changed, aborting" << dendl; scrub_clear_state(); scrub_unreserve_replicas(); return; } - if (primary_scrubmap.valid_through != log.head) { + if (scrubber.primary_scrubmap.valid_through != log.head) { ScrubMap incr; - build_inc_scrub_map(incr, primary_scrubmap.valid_through); - primary_scrubmap.merge_incr(incr); + build_inc_scrub_map(incr, scrubber.primary_scrubmap.valid_through); + scrubber.primary_scrubmap.merge_incr(incr); } - --scrub_waiting_on; - scrub_waiting_on_whom.erase(osd->whoami); - if (scrub_waiting_on == 0) { + --scrubber.waiting_on; + scrubber.waiting_on_whom.erase(osd->whoami); + if (scrubber.waiting_on == 0) { assert(last_update_applied == info.last_update); osd->scrub_finalize_wq.queue(this); } @@ -3400,50 +3391,50 @@ void PG::classic_scrub() * * This logic is encoded in the very linear state machine: * - * +---------------------+ - * ____________v_____________ | - * | | | - * | SCRUB_INACTIVE | | - * |__________________________| | - * | | - * | +-------------+ | - * ____________v___v_________ | | - * | | | | - * | SCRUB_NEW_CHUNK | | | - * |__________________________| | | - * | | | - * ___________v_____________ | | - * | | | | - * | SCRUB_WAIT_PUSHES | | | - * |_________________________| | | - * | | | - * ____________v_____________ | | - * | | | | - * | SCRUB_WAIT_LAST_UPDATE | | | - * |__________________________| | | - * | | | - * ____________v_____________ | | - * | | | | - * | SCRUB_BUILD_MAP | | | - * |__________________________| | | - * | | | - * ____________v_____________ | | - * | | | | - * | SCRUB_WAIT_REPLICAS | | | - * |__________________________| | | - * | | | - * ____________v_____________ | | - * | | | | - * | SCRUB_COMPARE_MAPS | | | - * |__________________________| | | - * | | | | - * | +-------------+ | - * ____________v_____________ | - * | | | - * | SCRUB_FINISH | | - * |__________________________| | - * | | - * +---------------------+ + * +------------------+ + * _________v__________ | + * | | | + * | INACTIVE | | + * |____________________| | + * | | + * | +----------+ | + * _________v___v______ | | + * | | | | + * | NEW_CHUNK | | | + * |____________________| | | + * | | | + * _________v__________ | | + * | | | | + * | WAIT_PUSHES | | | + * |____________________| | | + * | | | + * _________v__________ | | + * | | | | + * | WAIT_LAST_UPDATE | | | + * |____________________| | | + * | | | + * _________v__________ | | + * | | | | + * | BUILD_MAP | | | + * |____________________| | | + * | | | + * _________v__________ | | + * | | | | + * | WAIT_REPLICAS | | | + * |____________________| | | + * | | | + * _________v__________ | | + * | | | | + * | COMPARE_MAPS | | | + * |____________________| | | + * | | | | + * | +----------+ | + * _________v__________ | + * | | | + * | FINISH | | + * |____________________| | + * | | + * +------------------+ * * The primary determines the last update from the subset by walking the log. If * it sees a log entry pertaining to a file in the chunk, it tells the replicas @@ -3452,13 +3443,13 @@ void PG::classic_scrub() * * In contrast to classic_scrub, chunky_scrub is entirely handled by scrub_wq. * - * scrub_state encodes the current state of the scrub (refer to state diagram + * scrubber.state encodes the current state of the scrub (refer to state diagram * for details). */ void PG::chunky_scrub() { // check for map changes - if (is_chunky_scrub_active()) { - if (scrub_epoch_start != info.history.same_interval_since) { + if (scrubber.is_chunky_scrub_active()) { + if (scrubber.epoch_start != info.history.same_interval_since) { dout(10) << "scrub pg changed, aborting" << dendl; scrub_clear_state(); scrub_unreserve_replicas(); @@ -3470,34 +3461,34 @@ void PG::chunky_scrub() { int ret; while (!done) { - dout(20) << "scrub state " << scrub_string(scrub_state) << dendl; + dout(20) << "scrub state " << Scrubber::state_string(scrubber.state) << dendl; - switch (scrub_state) { - case SCRUB_INACTIVE: + switch (scrubber.state) { + case PG::Scrubber::INACTIVE: dout(10) << "scrub start" << dendl; update_stats(); - scrub_epoch_start = info.history.same_interval_since; - scrub_active = true; + scrubber.epoch_start = info.history.same_interval_since; + scrubber.active = true; osd->sched_scrub_lock.Lock(); - if (scrub_reserved) { + if (scrubber.reserved) { --(osd->scrubs_pending); assert(osd->scrubs_pending >= 0); - scrub_reserved = false; - scrub_reserved_peers.clear(); + scrubber.reserved = false; + scrubber.reserved_peers.clear(); } ++(osd->scrubs_active); osd->sched_scrub_lock.Unlock(); - scrub_start = hobject_t(); - scrub_state = SCRUB_NEW_CHUNK; + scrubber.start = hobject_t(); + scrubber.state = PG::Scrubber::NEW_CHUNK; break; - case SCRUB_NEW_CHUNK: - primary_scrubmap = ScrubMap(); - scrub_received_maps.clear(); + case PG::Scrubber::NEW_CHUNK: + scrubber.primary_scrubmap = ScrubMap(); + scrubber.received_maps.clear(); { @@ -3509,16 +3500,16 @@ void PG::chunky_scrub() { // list, and so forth. bool boundary_found = false; - hobject_t start = scrub_start; + hobject_t start = scrubber.start; while (!boundary_found) { vector objects; ret = osd->store->collection_list_partial(coll, start, 5, 5, 0, - &objects, &scrub_end); + &objects, &scrubber.end); assert(ret >= 0); // in case we don't find a boundary: start again at the end - start = scrub_end; + start = scrubber.end; // special case: reached end of file store, implicitly a boundary if (objects.size() == 0) { @@ -3526,58 +3517,58 @@ void PG::chunky_scrub() { } // search backward from the end looking for a boundary - objects.push_back(scrub_end); + objects.push_back(scrubber.end); while (!boundary_found && objects.size() > 1) { hobject_t end = objects.back(); objects.pop_back(); if (objects.back().get_filestore_key() != end.get_filestore_key()) { - scrub_end = end; + scrubber.end = end; boundary_found = true; } } } } - scrub_block_writes = true; + scrubber.block_writes = true; // walk the log to find the latest update that affects our chunk - scrub_subset_last_update = eversion_t(); + scrubber.subset_last_update = eversion_t(); for (list::iterator p = log.log.begin(); p != log.log.end(); ++p) { - if (p->soid >= scrub_start && p->soid < scrub_end) - scrub_subset_last_update = p->version; + if (p->soid >= scrubber.start && p->soid < scrubber.end) + scrubber.subset_last_update = p->version; } - // ask replicas to wait until last_update_applied >= scrub_subset_last_update and then scan - scrub_waiting_on_whom.insert(osd->whoami); - ++scrub_waiting_on; + // ask replicas to wait until last_update_applied >= scrubber.subset_last_update and then scan + scrubber.waiting_on_whom.insert(osd->whoami); + ++scrubber.waiting_on; // request maps from replicas for (unsigned i=1; i= scrub_subset_last_update) { - scrub_state = SCRUB_BUILD_MAP; + case PG::Scrubber::WAIT_LAST_UPDATE: + if (last_update_applied >= scrubber.subset_last_update) { + scrubber.state = PG::Scrubber::BUILD_MAP; } else { // will be requeued by op_applied dout(15) << "wait for writes to flush" << dendl; @@ -3585,11 +3576,11 @@ void PG::chunky_scrub() { } break; - case SCRUB_BUILD_MAP: - assert(last_update_applied >= scrub_subset_last_update); + case PG::Scrubber::BUILD_MAP: + assert(last_update_applied >= scrubber.subset_last_update); // build my own scrub map - ret = build_scrub_map_chunk(primary_scrubmap, scrub_start, scrub_end); + ret = build_scrub_map_chunk(scrubber.primary_scrubmap, scrubber.start, scrubber.end); if (ret < 0) { dout(5) << "error building scrub map: " << ret << ", aborting" << dendl; scrub_clear_state(); @@ -3597,48 +3588,48 @@ void PG::chunky_scrub() { return; } - --scrub_waiting_on; - scrub_waiting_on_whom.erase(osd->whoami); + --scrubber.waiting_on; + scrubber.waiting_on_whom.erase(osd->whoami); - scrub_state = SCRUB_WAIT_REPLICAS; + scrubber.state = PG::Scrubber::WAIT_REPLICAS; break; - case SCRUB_WAIT_REPLICAS: - if (scrub_waiting_on > 0) { + case PG::Scrubber::WAIT_REPLICAS: + if (scrubber.waiting_on > 0) { // will be requeued by sub_op_scrub_map dout(10) << "wait for replicas to build scrub map" << dendl; done = true; } else { - scrub_state = SCRUB_COMPARE_MAPS; + scrubber.state = PG::Scrubber::COMPARE_MAPS; } break; - case SCRUB_COMPARE_MAPS: - assert(last_update_applied >= scrub_subset_last_update); - assert(scrub_waiting_on == 0); + case PG::Scrubber::COMPARE_MAPS: + assert(last_update_applied >= scrubber.subset_last_update); + assert(scrubber.waiting_on == 0); scrub_compare_maps(); - scrub_block_writes = false; + scrubber.block_writes = false; // requeue the writes from the chunk that just finished requeue_ops(waiting_for_active); - if (scrub_end < hobject_t::get_max()) { + if (scrubber.end < hobject_t::get_max()) { // schedule another leg of the scrub - scrub_start = scrub_end; + scrubber.start = scrubber.end; - scrub_state = SCRUB_NEW_CHUNK; + scrubber.state = PG::Scrubber::NEW_CHUNK; osd->scrub_wq.queue(this); done = true; } else { - scrub_state = SCRUB_FINISH; + scrubber.state = PG::Scrubber::FINISH; } break; - case SCRUB_FINISH: + case PG::Scrubber::FINISH: scrub_finish(); - scrub_state = SCRUB_INACTIVE; + scrubber.state = PG::Scrubber::INACTIVE; done = true; break; @@ -3661,45 +3652,29 @@ void PG::scrub_clear_state() requeue_ops(waiting_for_active); - finalizing_scrub = false; - scrub_block_writes = false; - scrub_active = false; - scrub_waiting_on = 0; - scrub_waiting_on_whom.clear(); - if (active_rep_scrub) { - active_rep_scrub->put(); - active_rep_scrub = NULL; - } - scrub_received_maps.clear(); - - scrub_state = SCRUB_INACTIVE; - scrub_start = hobject_t(); - scrub_end = hobject_t(); - scrub_subset_last_update = eversion_t(); - scrub_errors = 0; - scrub_fixed = 0; + scrubber.reset(); // type-specific state clear _scrub_clear_state(); } bool PG::scrub_gather_replica_maps() { - assert(scrub_waiting_on == 0); + assert(scrubber.waiting_on == 0); assert(_lock.is_locked()); - for (map::iterator p = scrub_received_maps.begin(); - p != scrub_received_maps.end(); + for (map::iterator p = scrubber.received_maps.begin(); + p != scrubber.received_maps.end(); p++) { - if (scrub_received_maps[p->first].valid_through != log.head) { - scrub_waiting_on++; - scrub_waiting_on_whom.insert(p->first); + if (scrubber.received_maps[p->first].valid_through != log.head) { + scrubber.waiting_on++; + scrubber.waiting_on_whom.insert(p->first); // Need to request another incremental map _request_scrub_map_classic(p->first, p->second.valid_through); } } - if (scrub_waiting_on > 0) { + if (scrubber.waiting_on > 0) { return false; } else { return true; @@ -3821,12 +3796,12 @@ void PG::scrub_compare_maps() { map maps; dout(2) << "scrub osd." << acting[0] << " has " - << primary_scrubmap.objects.size() << " items" << dendl; - maps[0] = &primary_scrubmap; + << scrubber.primary_scrubmap.objects.size() << " items" << dendl; + maps[0] = &scrubber.primary_scrubmap; for (unsigned i=1; iclog.error(oss); else osd->clog.info(oss); @@ -5242,15 +5217,15 @@ boost::statechart::result PG::RecoveryState::Active::react(const QueryState& q) { q.f->open_object_section("scrub"); - q.f->dump_stream("scrub_epoch_start") << pg->scrub_epoch_start; - q.f->dump_int("scrub_active", pg->scrub_active); - q.f->dump_int("scrub_block_writes", pg->scrub_block_writes); - q.f->dump_int("finalizing_scrub", pg->finalizing_scrub); - q.f->dump_int("scrub_waiting_on", pg->scrub_waiting_on); + q.f->dump_stream("scrubber.epoch_start") << pg->scrubber.epoch_start; + q.f->dump_int("scrubber.active", pg->scrubber.active); + q.f->dump_int("scrubber.block_writes", pg->scrubber.block_writes); + q.f->dump_int("scrubber.finalizing", pg->scrubber.finalizing); + q.f->dump_int("scrubber.waiting_on", pg->scrubber.waiting_on); { - q.f->open_array_section("scrub_waiting_on_whom"); - for (set::iterator p = pg->scrub_waiting_on_whom.begin(); - p != pg->scrub_waiting_on_whom.end(); + q.f->open_array_section("scrubber.waiting_on_whom"); + for (set::iterator p = pg->scrubber.waiting_on_whom.begin(); + p != pg->scrubber.waiting_on_whom.end(); ++p) { q.f->dump_int("osd", *p); } @@ -5347,7 +5322,7 @@ boost::statechart::result PG::RecoveryState::ReplicaActive::react(const QuerySta q.f->open_object_section("state"); q.f->dump_string("name", state_name); q.f->dump_stream("enter_time") << enter_time; - q.f->dump_int("finalizing_scrub", pg->finalizing_scrub); + q.f->dump_int("scrubber.finalizing", pg->scrubber.finalizing); q.f->close_section(); return forward_event(); } diff --git a/src/osd/PG.h b/src/osd/PG.h index e2f240f7ded4..5c39cd89f101 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -773,54 +773,93 @@ public: // -- scrub -- - set scrub_reserved_peers; - map scrub_received_maps; - bool finalizing_scrub; - bool scrub_block_writes; - bool scrub_active; - bool scrub_reserved, scrub_reserve_failed; - int scrub_waiting_on; - set scrub_waiting_on_whom; - epoch_t scrub_epoch_start; - ScrubMap primary_scrubmap; - MOSDRepScrub *active_rep_scrub; - - bool scrub_is_chunky; - - hobject_t scrub_start; - hobject_t scrub_end; - eversion_t scrub_subset_last_update; - int scrub_errors; - int scrub_fixed; + struct Scrubber { + Scrubber() : + reserved(false), reserve_failed(false), + block_writes(false), active(false), waiting_on(0), + errors(0), fixed(0), active_rep_scrub(0), + finalizing(false), is_chunky(false), state(INACTIVE) + { + } - int active_pushes; + // metadata + set reserved_peers; + bool reserved, reserve_failed; + epoch_t epoch_start; + + // common to both scrubs + bool block_writes; + bool active; + int waiting_on; + set waiting_on_whom; + int errors; + int fixed; + ScrubMap primary_scrubmap; + map received_maps; + MOSDRepScrub *active_rep_scrub; + + // classic scrub + bool finalizing; + + // chunky scrub + bool is_chunky; + hobject_t start, end; + eversion_t subset_last_update; + + // chunky scrub state + enum State { + INACTIVE, + NEW_CHUNK, + WAIT_PUSHES, + WAIT_LAST_UPDATE, + BUILD_MAP, + WAIT_REPLICAS, + COMPARE_MAPS, + FINISH, + } state; + + static const char *state_string(const PG::Scrubber::State& state) { + const char *ret = NULL; + switch( state ) + { + case INACTIVE: ret = "INACTIVE"; break; + case NEW_CHUNK: ret = "NEW_CHUNK"; break; + case WAIT_PUSHES: ret = "WAIT_PUSHES"; break; + case WAIT_LAST_UPDATE: ret = "WAIT_LAST_UPDATE"; break; + case BUILD_MAP: ret = "BUILD_MAP"; break; + case WAIT_REPLICAS: ret = "WAIT_REPLICAS"; break; + case COMPARE_MAPS: ret = "COMPARE_MAPS"; break; + case FINISH: ret = "FINISH"; break; + } + return ret; + } - enum ScrubState { - SCRUB_INACTIVE, - SCRUB_NEW_CHUNK, - SCRUB_WAIT_PUSHES, - SCRUB_WAIT_LAST_UPDATE, - SCRUB_BUILD_MAP, - SCRUB_WAIT_REPLICAS, - SCRUB_COMPARE_MAPS, - SCRUB_FINISH, - } scrub_state; - - static const char *scrub_string(const PG::ScrubState& state) { - const char *ret = NULL; - switch( state ) - { - case SCRUB_INACTIVE: ret = "SCRUB_INACTIVE"; break; - case SCRUB_NEW_CHUNK: ret = "SCRUB_NEW_CHUNK"; break; - case SCRUB_WAIT_PUSHES: ret = "SCRUB_WAIT_PUSHES"; break; - case SCRUB_WAIT_LAST_UPDATE: ret = "SCRUB_WAIT_LAST_UPDATE"; break; - case SCRUB_BUILD_MAP: ret = "SCRUB_BUILD_MAP"; break; - case SCRUB_WAIT_REPLICAS: ret = "SCRUB_WAIT_REPLICAS"; break; - case SCRUB_COMPARE_MAPS: ret = "SCRUB_COMPARE_MAPS"; break; - case SCRUB_FINISH: ret = "SCRUB_FINISH"; break; + bool is_chunky_scrub_active() const { return state != INACTIVE; } + + // clear all state + void reset() { + finalizing = false; + block_writes = false; + active = false; + waiting_on = 0; + waiting_on_whom.clear(); + if (active_rep_scrub) { + active_rep_scrub->put(); + active_rep_scrub = NULL; + } + received_maps.clear(); + + state = PG::Scrubber::INACTIVE; + start = hobject_t(); + end = hobject_t(); + subset_last_update = eversion_t(); + errors = 0; + fixed = 0; } - return ret; - } + + } scrubber; + + int active_pushes; void repair_object(const hobject_t& soid, ScrubMap::object *po, int bad_peer, int ok_peer); bool _compare_scrub_objects(ScrubMap::object &auth, @@ -1481,8 +1520,6 @@ public: bool is_scrubbing() const { return state_test(PG_STATE_SCRUBBING); } - bool is_chunky_scrub_active() const { return scrub_state != SCRUB_INACTIVE; } - bool is_empty() const { return info.last_update == eversion_t(0,0); } void init(int role, vector& up, vector& acting, pg_history_t& history, diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 11a75b6ab6ed..01cab81e52b9 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -619,10 +619,10 @@ void ReplicatedPG::do_op(OpRequestRef op) CEPH_NOSNAP, m->get_pg().ps(), info.pgid.pool()); - if (scrub_block_writes && m->may_write()) { + if (scrubber.block_writes && m->may_write()) { // classic (non chunk) scrubs block all writes // chunky scrubs only block writes to a range - if (!scrub_is_chunky || (head >= scrub_start && head < scrub_end)) { + if (!scrubber.is_chunky || (head >= scrubber.start && head < scrubber.end)) { dout(20) << __func__ << ": waiting for scrub" << dendl; waiting_for_active.push_back(op); op->mark_delayed(); @@ -1444,7 +1444,7 @@ void ReplicatedPG::snap_trimmer() dout(10) << "snap_trimmer entry" << dendl; if (is_primary()) { entity_inst_t nobody; - if (!mode.try_write(nobody) || scrub_active) { + if (!mode.try_write(nobody) || scrubber.active) { dout(10) << " can't write, requeueing" << dendl; queue_snap_trim(); unlock(); @@ -3353,9 +3353,9 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx) pending_backfill_updates[soid].stats.add(ctx->delta_stats, ctx->obc->obs.oi.category); } - if (scrub_active && scrub_is_chunky) { - assert(soid < scrub_start || soid >= scrub_end); - if (soid < scrub_start) + if (scrubber.active && scrubber.is_chunky) { + assert(soid < scrubber.start || soid >= scrubber.end); + if (soid < scrubber.start) scrub_cstat.add(ctx->delta_stats, ctx->obc->obs.oi.category); } @@ -3469,18 +3469,18 @@ void ReplicatedPG::op_applied(RepGather *repop) last_update_applied = repop->v; // chunky scrub - if (scrub_active && scrub_is_chunky) { - if (last_update_applied == scrub_subset_last_update) { + if (scrubber.active && scrubber.is_chunky) { + if (last_update_applied == scrubber.subset_last_update) { osd->scrub_wq.queue(this); } // classic scrub - } else if (last_update_applied == info.last_update && scrub_block_writes) { + } else if (last_update_applied == info.last_update && scrubber.block_writes) { dout(10) << "requeueing scrub for cleanup" << dendl; - finalizing_scrub = true; + scrubber.finalizing = true; scrub_gather_replica_maps(); - ++scrub_waiting_on; - scrub_waiting_on_whom.insert(osd->whoami); + ++scrubber.waiting_on; + scrubber.waiting_on_whom.insert(osd->whoami); osd->scrub_wq.queue(this); } } @@ -4379,10 +4379,10 @@ void ReplicatedPG::sub_op_modify_applied(RepModify *rm) assert(info.last_update >= m->version); assert(last_update_applied < m->version); last_update_applied = m->version; - if (active_rep_scrub) { - if (last_update_applied == active_rep_scrub->scrub_to) { - osd->rep_scrub_wq.queue(active_rep_scrub); - active_rep_scrub = 0; + if (scrubber.active_rep_scrub) { + if (last_update_applied == scrubber.active_rep_scrub->scrub_to) { + osd->rep_scrub_wq.queue(scrubber.active_rep_scrub); + scrubber.active_rep_scrub = 0; } } } else { @@ -5410,7 +5410,7 @@ void ReplicatedPG::_applied_recovered_object(ObjectStore::Transaction *t, Object --active_pushes; // requeue an active chunky scrub waiting on recovery ops - if (active_pushes == 0 && is_chunky_scrub_active()) { + if (active_pushes == 0 && scrubber.is_chunky_scrub_active()) { osd->scrub_wq.queue(this); } @@ -5428,9 +5428,9 @@ void ReplicatedPG::_applied_recovered_object_replica(ObjectStore::Transaction *t // requeue an active chunky scrub waiting on recovery ops if (active_pushes == 0 && - active_rep_scrub && active_rep_scrub->chunky) { - osd->rep_scrub_wq.queue(active_rep_scrub); - active_rep_scrub = 0; + scrubber.active_rep_scrub && scrubber.active_rep_scrub->chunky) { + osd->rep_scrub_wq.queue(scrubber.active_rep_scrub); + scrubber.active_rep_scrub = 0; } unlock(); @@ -6619,7 +6619,7 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap) if (p->second.attrs.count(SS_ATTR) == 0) { osd->clog.error() << mode << " " << info.pgid << " " << soid << " no '" << SS_ATTR << "' attr"; - ++scrub_errors; + ++scrubber.errors; continue; } bufferlist bl; @@ -6631,7 +6631,7 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap) if (head != hobject_t()) { osd->clog.error() << mode << " " << info.pgid << " " << head << " missing clones"; - ++scrub_errors; + ++scrubber.errors; } // what will be next? @@ -6664,7 +6664,7 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap) if (p->second.attrs.count(OI_ATTR) == 0) { osd->clog.error() << mode << " " << info.pgid << " " << soid << " no '" << OI_ATTR << "' attr"; - ++scrub_errors; + ++scrubber.errors; continue; } bufferlist bv; @@ -6675,7 +6675,7 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap) osd->clog.error() << mode << " " << info.pgid << " " << soid << " on disk size (" << p->second.size << ") does not match object info size (" << oi.size << ")"; - ++scrub_errors; + ++scrubber.errors; } dout(20) << mode << " " << soid << " " << oi << dendl; @@ -6690,7 +6690,7 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap) if (!snapset.head_exists) { osd->clog.error() << mode << " " << info.pgid << " " << soid << " snapset.head_exists=false, but object exists"; - ++scrub_errors; + ++scrubber.errors; continue; } } else if (soid.snap) { @@ -6702,7 +6702,7 @@ void ReplicatedPG::_scrub(ScrubMap& scrubmap) if (soid.snap != *curclone) { osd->clog.error() << mode << " " << info.pgid << " " << soid << " expected clone " << *curclone; - ++scrub_errors; + ++scrubber.errors; assert(soid.snap == *curclone); } @@ -6753,10 +6753,10 @@ void ReplicatedPG::_scrub_finish() << scrub_cstat.sum.num_objects << "/" << info.stats.stats.sum.num_objects << " objects, " << scrub_cstat.sum.num_object_clones << "/" << info.stats.stats.sum.num_object_clones << " clones, " << scrub_cstat.sum.num_bytes << "/" << info.stats.stats.sum.num_bytes << " bytes.\n"; - ++scrub_errors; + ++scrubber.errors; if (repair) { - ++scrub_fixed; + ++scrubber.fixed; info.stats.stats = scrub_cstat; update_stats(); share_pg_info(); @@ -6809,7 +6809,7 @@ boost::statechart::result ReplicatedPG::NotTrimming::react(const SnapTrim&) } else if (!pg->is_primary() || !pg->is_active() || !pg->is_clean()) { dout(10) << "NotTrimming not primary, active, clean" << dendl; return discard_event(); - } else if (pg->scrub_active) { + } else if (pg->scrubber.active) { dout(10) << "NotTrimming finalizing scrub" << dendl; pg->queue_snap_trim(); return discard_event();