From 076f33afb393f1b733d14c1eff99a9818b086d4f Mon Sep 17 00:00:00 2001 From: Ma Jianpeng Date: Wed, 30 Jul 2014 11:03:17 +0800 Subject: [PATCH] ECBackend: Don't directly use get_recovery_chunk_size() in RecoveryOp::WRITING state. We cannot guarantee that conf->osd_recovery_max_chunk don't change when recoverying a erasure object. If change between RecoveryOp::READING and RecoveryOp::WRITING, it can cause this bug: 2014-07-30 10:12:09.599220 7f7ff26c0700 -1 osd/ECBackend.cc: In function 'void ECBackend::continue_recovery_op(ECBackend::RecoveryOp&, RecoveryMessages*)' thread 7f7ff26c0700 time 2014-07-30 10:12:09.596837 osd/ECBackend.cc: 529: FAILED assert(pop.data.length() == sinfo.aligned_logical_offset_to_chunk_offset( after_progress.data_recovered_to - op.recovery_progress.data_recovered_to)) ceph version 0.83-383-g3cfda57 (3cfda577b15039cb5c678b79bef3e561df826ed1) 1: (ECBackend::continue_recovery_op(ECBackend::RecoveryOp&,RecoveryMessages*)+0x1a50) [0x928070] 2: (ECBackend::handle_recovery_read_complete(hobject_t const&, boost::tuples::tuple, std::allocator > >, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type, boost::tuples::null_type>&, boost::optional, std::allocator > > >, RecoveryMessages*)+0x90c) [0x92952c] 3: (OnRecoveryReadComplete::finish(std::pair&)+0x121) [0x938481] 4: (GenContext&>::complete(std::pair&)+0x9) [0x929d69] 5: (ECBackend::complete_read_op(ECBackend::ReadOp&,RecoveryMessages*)+0x63) [0x91c6e3] 6: (ECBackend::handle_sub_read_reply(pg_shard_t, ECSubReadReply&,RecoveryMessages*)+0x96d) [0x920b4d] 7: (ECBackend::handle_message(std::tr1::shared_ptr)+0x17e)[0x92884e] 8: (ReplicatedPG::do_request(std::tr1::shared_ptr&,ThreadPool::TPHandle&)+0x23b) [0x7b34db] 9: (OSD::dequeue_op(boost::intrusive_ptr,std::tr1::shared_ptr, ThreadPool::TPHandle&)+0x428) [0x638d58] 10: (OSD::ShardedOpWQ::_process(unsigned int,ceph::heartbeat_handle_d*)+0x346) [0x6392f6] 11: (ShardedThreadPool::shardedthreadpool_worker(unsigned int)+0x8ce)[0xa5caae] 12: (ShardedThreadPool::WorkThreadSharded::entry()+0x10) [0xa5ed00] 13: (()+0x8182) [0x7f800b5d3182] 14: (clone()+0x6d) [0x7f800997430d] NOTE: a copy of the executable, or `objdump -rdS ` is needed to interpret this. So we only get the get_recovery_chunk_size() at RecoverOp::READING and record it using RecoveryOp::extent_requested. Signed-off-by: Ma Jianpeng --- src/osd/ECBackend.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 9fa036ff7dfda..0c15076f4ca46 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -470,6 +470,7 @@ void ECBackend::continue_recovery_op( assert(!op.recovery_progress.data_complete); set want(op.missing_on_shards.begin(), op.missing_on_shards.end()); set to_read; + uint64_t recovery_max_chunk = get_recovery_chunk_size(); int r = get_min_avail_to_read_shards( op.hoid, want, true, &to_read); if (r != 0) { @@ -486,11 +487,11 @@ void ECBackend::continue_recovery_op( this, op.hoid, op.recovery_progress.data_recovered_to, - get_recovery_chunk_size(), + recovery_max_chunk, to_read, op.recovery_progress.first); op.extent_requested = make_pair(op.recovery_progress.data_recovered_to, - get_recovery_chunk_size()); + recovery_max_chunk); dout(10) << __func__ << ": IDLE return " << op << dendl; return; } @@ -500,7 +501,7 @@ void ECBackend::continue_recovery_op( assert(op.returned_data.size()); op.state = RecoveryOp::WRITING; ObjectRecoveryProgress after_progress = op.recovery_progress; - after_progress.data_recovered_to += get_recovery_chunk_size(); + after_progress.data_recovered_to += op.extent_requested.second; after_progress.first = false; if (after_progress.data_recovered_to >= op.obc->obs.oi.size) { after_progress.data_recovered_to = -- 2.47.3