From d25befe730f542408a5ae353d3e1ea8a92b20753 Mon Sep 17 00:00:00 2001 From: sageweil Date: Thu, 13 Dec 2007 20:50:16 +0000 Subject: [PATCH] fixed zero; some checksum fixes git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@2207 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/ebofs/config.cc | 3 - branches/ebofs/config.h | 1 - branches/ebofs/ebofs/BufferCache.cc | 89 ++++++++++++++------------- branches/ebofs/ebofs/BufferCache.h | 8 +-- branches/ebofs/ebofs/Ebofs.cc | 94 ++++++++++++++--------------- branches/ebofs/ebofs/Onode.h | 3 +- 6 files changed, 99 insertions(+), 99 deletions(-) diff --git a/branches/ebofs/config.cc b/branches/ebofs/config.cc index b4c108d0b53eb..bb86b35b3c116 100644 --- a/branches/ebofs/config.cc +++ b/branches/ebofs/config.cc @@ -328,7 +328,6 @@ md_config_t g_conf = { ebofs_cloneable: false, ebofs_verify: false, ebofs_commit_ms: 1000, // 0 = no forced commit timeout (for debugging/tracing) - ebofs_idle_commit_ms: 0, // 0 = no idle detection. UGLY HACK. use bdev_idle_kick_after_ms instead. ebofs_oc_size: 10000, // onode cache ebofs_cc_size: 10000, // cnode cache ebofs_bc_size: (5 *256), // 4k blocks, *256 for MB @@ -810,8 +809,6 @@ void parse_config_options(std::vector& args) g_conf.ebofs_verify = atoi(args[++i]); else if (strcmp(args[i], "--ebofs_commit_ms") == 0) g_conf.ebofs_commit_ms = atoi(args[++i]); - else if (strcmp(args[i], "--ebofs_idle_commit_ms") == 0) - g_conf.ebofs_idle_commit_ms = atoi(args[++i]); else if (strcmp(args[i], "--ebofs_oc_size") == 0) g_conf.ebofs_oc_size = atoi(args[++i]); else if (strcmp(args[i], "--ebofs_cc_size") == 0) diff --git a/branches/ebofs/config.h b/branches/ebofs/config.h index 13a49b1d39237..6ce585a84437a 100644 --- a/branches/ebofs/config.h +++ b/branches/ebofs/config.h @@ -291,7 +291,6 @@ struct md_config_t { bool ebofs_cloneable; bool ebofs_verify; int ebofs_commit_ms; - int ebofs_idle_commit_ms; int ebofs_oc_size; int ebofs_cc_size; off_t ebofs_bc_size; diff --git a/branches/ebofs/ebofs/BufferCache.cc b/branches/ebofs/ebofs/BufferCache.cc index ad5380977dccc..c9c8dafca164b 100644 --- a/branches/ebofs/ebofs/BufferCache.cc +++ b/branches/ebofs/ebofs/BufferCache.cc @@ -38,7 +38,9 @@ void do_apply_partial(bufferlist& bl, map& pm) #undef dout +#undef derr #define dout(x) if (x <= g_conf.debug_ebofs) *_dout << dbeginl << g_clock.now() << " ebofs." << *this << "." +#define derr(x) if (x <= g_conf.debug_ebofs) *_derr << dbeginl << g_clock.now() << " ebofs." << *this << "." void BufferHead::add_partial(off_t off, bufferlist& p) @@ -129,7 +131,9 @@ void BufferHead::apply_partial() #undef dout +#undef derr #define dout(x) if (x <= g_conf.debug_ebofs) *_dout << dbeginl << g_clock.now() << " ebofs.oc." +#define derr(x) if (x <= g_conf.debug_ebofs) *_derr << dbeginl << g_clock.now() << " ebofs.oc." @@ -182,8 +186,9 @@ void ObjectCache::rx_finish(ioh_t ioh, block_t start, block_t length, bufferlist csum_t got[bh->length()]; for (unsigned i=0; ilength(); i++) { got[i] = calc_csum(&bh->data[i*EBOFS_BLOCK_SIZE], EBOFS_BLOCK_SIZE); - if (false && rand() % 10 == 0) { + if (rand() % 10 == 0) { dout(0) << "rx_finish HACK INJECTING bad csum" << dendl; + derr(0) << "rx_finish HACK INJECTING bad csum" << dendl; got[i] = 0; } if (got[i] != want[i]) { @@ -201,7 +206,8 @@ void ObjectCache::rx_finish(ioh_t ioh, block_t start, block_t length, bufferlist unsigned e; for (e=s; eoc->on->object_id << " over " << s << "~" << (e-s) << dendl; + derr(0) << "rx_finish bad csum in " << bh->oc->on->object_id << " over " << s << "~" << (e-s) << dendl; if (s) { BufferHead *middle = bc->split(bh, ostart+s); @@ -1259,36 +1265,37 @@ void BufferCache::rx_finish(ObjectCache *oc, // finish any partials? // note: these are partials that were re-written after a commit, // or for whom the OC was destroyed (eg truncated after a commit) - map >::iterator sp = partial_write.lower_bound(diskstart); - while (sp != partial_write.end()) { - if (sp->first >= diskstart+length) break; - assert(sp->first >= diskstart); - - block_t pblock = sp->first; - map writes; - writes.swap( sp->second ); - - map >::iterator t = sp; - sp++; - partial_write.erase(t); - - for (map::iterator p = writes.begin(); - p != writes.end(); - p++) { - dout(10) << "rx_finish partial from " << pblock << " -> " << p->first - << " for epoch " << p->second.epoch - << dendl; - // make the combined block - bufferlist combined; - bufferptr bp = buffer::create_page_aligned(EBOFS_BLOCK_SIZE); - combined.push_back( bp ); - combined.copy_in((pblock-diskstart)*EBOFS_BLOCK_SIZE, (pblock-diskstart+1)*EBOFS_BLOCK_SIZE, bl); - do_apply_partial( combined, p->second.partial ); - - // write it! - dev.write( pblock, 1, combined, - new C_OC_PartialTxFinish( this, p->second.epoch ), - "finish_partials"); + if (length == 1) { + map::iterator sp = partial_write.find(diskstart); + if (sp != partial_write.end()) { + block_t pblock = diskstart; + + // verify csum + csum_t actual = calc_csum(bl.c_str(), bl.length()); + if (actual != sp->second.csum) { + dout(0) << "rx_finish bad csum on partial block " << pblock << dendl; + derr(0) << "rx_finish bad csum on partial block " << pblock << dendl; + } + + for (map::iterator p = sp->second.writes.begin(); + p != sp->second.writes.end(); + p++) { + dout(10) << "rx_finish partial from " << pblock << " -> " << p->first + << " for epoch " << p->second.epoch + << dendl; + // make the combined block + bufferlist combined; + bufferptr bp = buffer::create_page_aligned(EBOFS_BLOCK_SIZE); + combined.push_back( bp ); + combined.copy_in((pblock-diskstart)*EBOFS_BLOCK_SIZE, (pblock-diskstart+1)*EBOFS_BLOCK_SIZE, bl); + do_apply_partial( combined, p->second.partial ); + + // write it! + dev.write( pblock, 1, combined, + new C_OC_PartialTxFinish( this, p->second.epoch ), + "finish_partials"); + } + partial_write.erase(sp); } } @@ -1399,30 +1406,30 @@ void BufferCache::queue_partial(block_t from, block_t to, << " in epoch " << epoch << dendl; - if (partial_write[from].count(to)) { + if (partial_write[from].writes.count(to)) { // this should be in the same epoch. - assert( partial_write[from][to].epoch == epoch); + assert( partial_write[from].writes[to].epoch == epoch); assert(0); // actually.. no! } else { inc_unflushed( EBOFS_BC_FLUSH_PARTIAL, epoch ); } - partial_write[from][to].partial = partial; - partial_write[from][to].epoch = epoch; + partial_write[from].writes[to].partial = partial; + partial_write[from].writes[to].epoch = epoch; } void BufferCache::cancel_partial(block_t from, block_t to, version_t epoch) { assert(partial_write.count(from)); - assert(partial_write[from].count(to)); - assert(partial_write[from][to].epoch == epoch); + assert(partial_write[from].writes.count(to)); + assert(partial_write[from].writes[to].epoch == epoch); dout(10) << "cancel_partial " << from << " -> " << to - << " (was epoch " << partial_write[from][to].epoch << ")" + << " (was epoch " << partial_write[from].writes[to].epoch << ")" << dendl; - partial_write[from].erase(to); - if (partial_write[from].empty()) + partial_write[from].writes.erase(to); + if (partial_write[from].writes.empty()) partial_write.erase(from); dec_unflushed( EBOFS_BC_FLUSH_PARTIAL, epoch ); diff --git a/branches/ebofs/ebofs/BufferCache.h b/branches/ebofs/ebofs/BufferCache.h index fc8424057f1a0..3959a7e644090 100644 --- a/branches/ebofs/ebofs/BufferCache.h +++ b/branches/ebofs/ebofs/BufferCache.h @@ -260,6 +260,7 @@ inline ostream& operator<<(ostream& out, BufferHead& bh) if (bh.is_rx()) out << " rx"; if (bh.is_tx()) out << " tx"; if (bh.is_partial()) out << " partial"; + if (bh.is_corrupt()) out << " corrupt"; // include epoch modified? if (bh.is_dirty() || bh.is_tx() || bh.is_partial()) @@ -425,17 +426,16 @@ class BufferCache { * * really, at most there will only ever be two of these, for current+previous epochs. */ - class PartialWrite { - public: + struct PartialWrite { map partial; // partial dirty content overlayed onto incoming data version_t epoch; }; - class WriteSet { + struct PartialWriteSet { csum_t csum; // expected csum map writes; }; - map > partial_write; // queued writes w/ partial content + map partial_write; // queued writes w/ partial content map > shadow_partials; public: diff --git a/branches/ebofs/ebofs/Ebofs.cc b/branches/ebofs/ebofs/Ebofs.cc index 504d8df749d8f..e8bacbe6989bb 100644 --- a/branches/ebofs/ebofs/Ebofs.cc +++ b/branches/ebofs/ebofs/Ebofs.cc @@ -437,41 +437,9 @@ int Ebofs::commit_thread_entry() // wait for kick, or timeout if (g_conf.ebofs_commit_ms) { - if (g_conf.ebofs_idle_commit_ms > 0) { - // *** this is an ugly ugly hack **** - // do not use - // periodically check for idle block device - utime_t idle_wait(0, g_conf.ebofs_idle_commit_ms*1000); - dout(20) << "commit_thread sleeping (up to) " << g_conf.ebofs_commit_ms << " ms, " - << idle_wait << " ms if idle" << dendl; - utime_t now = g_clock.now(); - utime_t stop = now; - stop += (double)g_conf.ebofs_commit_ms / 1000.0; - do { - utime_t wait = MIN(stop - now, idle_wait); - if (commit_cond.WaitInterval(ebofs_lock, wait) != ETIMEDOUT) { - dout(20) << "commit_thread i got kicked" << dendl; - break; // we got kicked - } - if (dev.is_idle()) { - dout(20) << "commit_thread bdev is idle, early commit" << dendl; - break; // dev is idle - } - now = g_clock.now(); - dout(20) << "commit_thread now=" << now << ", stop at " << stop << dendl; - - // hack hack - //if (!left) g_conf.debug_ebofs = 10; - // /hack hack - } while (now < stop); - dout(20) << "commit_thread done with idle loop" << dendl; - - } else { - // normal wait+timeout - dout(20) << "commit_thread sleeping (up to) " << g_conf.ebofs_commit_ms << " ms" << dendl; - commit_cond.WaitInterval(ebofs_lock, utime_t(0, g_conf.ebofs_commit_ms*1000)); - } - + // normal wait+timeout + dout(20) << "commit_thread sleeping (up to) " << g_conf.ebofs_commit_ms << " ms" << dendl; + commit_cond.WaitInterval(ebofs_lock, utime_t(0, g_conf.ebofs_commit_ms*1000)); } else { // DEBUG.. wait until kicked dout(10) << "commit_thread no commit_ms, waiting until kicked" << dendl; @@ -490,6 +458,7 @@ int Ebofs::commit_thread_entry() dout(10) << "commit_thread not dirty" << dendl; } else { + // --- this all happens in one go, from here --- super_epoch++; dirty = false; @@ -525,15 +494,18 @@ int Ebofs::commit_thread_entry() // (async) write btree nodes nodepool.commit_start( dev, super_epoch ); - - // blockdev barrier (prioritize our writes!) - dout(30) << "commit_thread barrier. flushing inodes " << inodes_flushing << dendl; - dev.barrier(); // prepare super (before any changes get made!) bufferptr superbp; prepare_super(super_epoch, superbp); + // --- to here. --- + // now wait. + + // blockdev barrier (prioritize our writes!) + dout(30) << "commit_thread barrier. flushing inodes " << inodes_flushing << dendl; + dev.barrier(); + // wait for it all to flush (drops global lock) commit_bc_wait(super_epoch-1); dout(30) << "commit_thread bc flushed" << dendl; @@ -566,10 +538,8 @@ int Ebofs::commit_thread_entry() // kick waiters dout(10) << "commit_thread queueing commit + kicking sync waiters" << dendl; - queue_finishers(commit_waiters[super_epoch-1]); commit_waiters.erase(super_epoch-1); - sync_cond.Signal(); dout(10) << "commit_thread commit finish" << dendl; @@ -1723,23 +1693,37 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) unsigned len_in_bh = MIN( (off_t)(left), (off_t)(bh->end()*EBOFS_BLOCK_SIZE)-opos ); - if (bh->is_partial() || bh->is_rx() || bh->is_missing()) { - assert(bh->is_partial() || bh->is_rx() || bh->is_missing()); + if (bh->is_partial() || bh->is_rx() || bh->is_missing() || bh->is_corrupt()) { assert(bh->length() == 1); if (bh->is_missing()) { - // newly realloc; carry old checksum over since we're only partially overwriting - if (bh->start() == bstart) { + // newly realloc? carry old checksum over since we're only partially overwriting + if (bh->start() == bstart && alloc.contains(bstart)) { dout(10) << "apply_write carrying over starting csum " << hex << old_csum_first << dec << " for partial " << *bh << dendl; *on->get_extent_csum_ptr(bh->start(), 1) = old_csum_first; on->data_csum += old_csum_first; - } else if (bh->end()-1 == blast) { + } else if (bh->end()-1 == blast && alloc.contains(blast)) { dout(10) << "apply_write carrying over ending csum " << hex << old_csum_last << dec << " for partial " << *bh << dendl; *on->get_extent_csum_ptr(bh->end()-1, 1) = old_csum_last; on->data_csum += old_csum_last; - } else assert(0); + } + } + if (bh->is_corrupt()) { + dout(10) << "apply_write marking non-overwritten bytes bad on corrupt " << *bh << dendl; + interval_set bad; + off_t bs = bh->start() * EBOFS_BLOCK_SIZE; + if (off_in_bh) bad.insert(bs, bs+off_in_bh); + if (off_in_bh+len_in_bh < (unsigned)EBOFS_BLOCK_SIZE) + bad.insert(bs+off_in_bh+len_in_bh, bs+EBOFS_BLOCK_SIZE-off_in_bh-len_in_bh); + dout(10) << "apply_write marking non-overwritten bytes " << bad << " bad on corrupt " << *bh << dendl; + bh->oc->on->bad_byte_extents.union_of(bad); + csum_t csum = calc_csum(bh->data.c_str(), bh->data.length()); + dout(10) << "apply_write marking corrupt bh csum " << hex << csum << dec << " clean " << *bh << dendl; + *on->get_extent_csum_ptr(bh->start(), 1) = csum; + on->data_csum += csum; + bc.mark_clean(bh); } // add frag to partial @@ -1772,7 +1756,7 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) bc.mark_partial(bh); bc.bh_queue_partial_write(on, bh); // queue the eventual write } - else if (bh->is_missing()) { + else if (bh->is_missing() || bh->is_corrupt()) { dout(10) << "apply_write missing -> partial " << *bh << dendl; assert(bh->length() == 1); bc.mark_partial(bh); @@ -2420,6 +2404,20 @@ unsigned Ebofs::_apply_transaction(Transaction& t) } break; + case Transaction::OP_ZERO: + { + pobject_t oid; + t.get_oid(oid); + off_t offset, len; + t.get_length(offset); + t.get_length(len); + if (_zero(oid, offset, len) < 0) { + dout(7) << "apply_transaction fail on _zero" << dendl; + r &= bit; + } + } + break; + case Transaction::OP_TRIMCACHE: { pobject_t oid; diff --git a/branches/ebofs/ebofs/Onode.h b/branches/ebofs/ebofs/Onode.h index fd78b4f788a85..a0acdbad1aa27 100644 --- a/branches/ebofs/ebofs/Onode.h +++ b/branches/ebofs/ebofs/Onode.h @@ -190,8 +190,7 @@ public: } } } - cout << " verify_extents got csum " - << hex << csum << " want " << data_csum << dec << std::endl; + cout << " verify_extents got csum " << hex << csum << " want " << data_csum << dec << std::endl; assert(s.size() == count); assert(count == alloc_blocks); -- 2.39.5