From 995485302e9607271deb861280148088e60f66ba Mon Sep 17 00:00:00 2001 From: sageweil Date: Wed, 19 Dec 2007 00:07:29 +0000 Subject: [PATCH] fixed bug with full partial overwrite; cleaned up map_read; fixed bug in attempt_read on partial bh git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@2226 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/ebofs/ebofs/BufferCache.cc | 34 ++++++++---------- branches/ebofs/ebofs/BufferCache.h | 3 +- branches/ebofs/ebofs/Ebofs.cc | 54 +++++++++++++---------------- 3 files changed, 40 insertions(+), 51 deletions(-) diff --git a/branches/ebofs/ebofs/BufferCache.cc b/branches/ebofs/ebofs/BufferCache.cc index cd0b980e5fb00..d34dd1e2489c2 100644 --- a/branches/ebofs/ebofs/BufferCache.cc +++ b/branches/ebofs/ebofs/BufferCache.cc @@ -113,18 +113,13 @@ void BufferHead::add_partial(off_t off, bufferlist& p) void BufferHead::apply_partial() { - assert(!partial.empty()); dout(10) << "apply_partial on " << partial.size() << " substrings" << dendl; - csum_t *expect = oc->on->get_extent_csum_ptr(start(), 1); - csum_t oldc = calc_csum(data.c_str(), EBOFS_BLOCK_SIZE); + assert(!partial.empty()); + csum_t *p = oc->on->get_extent_csum_ptr(start(), 1); do_apply_partial(data, partial); csum_t newc = calc_csum(data.c_str(), EBOFS_BLOCK_SIZE); - dout(10) << "apply_partial onode expected " << hex << *expect - << " bl was " << oldc - << " now " << newc << dec << dendl; - assert(*expect == oldc); - *expect = newc; - oc->on->data_csum += newc - oldc; + oc->on->data_csum += newc - *p; + *p = newc; } @@ -236,10 +231,13 @@ void ObjectCache::rx_finish(ioh_t ioh, block_t start, block_t length, bufferlist assert(exv[0].start != 0); block_t cur_block = exv[0].start; + off_t off_in_bl = (bh->start() - start) * EBOFS_BLOCK_SIZE; + assert(off_in_bl >= 0); + off_t len_in_bl = bh->length() * EBOFS_BLOCK_SIZE; + // verify csum - assert(bl.length() == (unsigned)EBOFS_BLOCK_SIZE); csum_t want = *bh->oc->on->get_extent_csum_ptr(bh->start(), 1); - csum_t got = calc_csum(bl.c_str(), bl.length()); + csum_t got = calc_csum(bl.c_str() + off_in_bl, len_in_bl); if (want != got) { derr(0) << "rx_finish bad csum on partial readback, want " << hex << want << " got " << got << dec << dendl; @@ -266,7 +264,9 @@ void ObjectCache::rx_finish(ioh_t ioh, block_t start, block_t length, bufferlist assert(bh->data.length() == 0); bufferptr bp = buffer::create_page_aligned(EBOFS_BLOCK_SIZE); bh->data.push_back( bp ); - bh->data.copy_in(0, EBOFS_BLOCK_SIZE, bl); + bufferlist sub; + sub.substr_of(bl, off_in_bl, len_in_bl); + bh->data.copy_in(0, EBOFS_BLOCK_SIZE, sub); bh->apply_partial(); // write "normally" @@ -479,8 +479,7 @@ int ObjectCache::map_read(block_t start, block_t len, map& hits, map& missing, map& rx, - map& partial, - map& corrupt) { + map& partial) { map::iterator p = find_bh(start, len); block_t cur = start; @@ -521,15 +520,12 @@ int ObjectCache::map_read(block_t start, block_t len, if (e->is_clean() || e->is_dirty() || - e->is_tx()) { + e->is_tx() || + e->is_corrupt()) { hits[cur] = e; // readable! dout(20) << "map_read hit " << *e << dendl; bc->touch(e); } - else if (e->is_corrupt()) { - corrupt[cur] = e; - dout(20) << "map_read corrupt " << *e << dendl; - } else if (e->is_rx()) { rx[cur] = e; // missing, not readable. dout(20) << "map_read rx " << *e << dendl; diff --git a/branches/ebofs/ebofs/BufferCache.h b/branches/ebofs/ebofs/BufferCache.h index c42bbc0fccbd8..ef9979b067a0a 100644 --- a/branches/ebofs/ebofs/BufferCache.h +++ b/branches/ebofs/ebofs/BufferCache.h @@ -400,8 +400,7 @@ class ObjectCache { map& hits, // hits map& missing, // read these from disk map& rx, // wait for these to finish reading from disk - map& partial, // (maybe) wait for these to read from disk - map& corrupt); // bad checksums + map& partial); // (maybe) wait for these to read from disk int try_map_read(block_t start, block_t len); // just tell us how many extents we're missing. int map_write(block_t start, block_t len, diff --git a/branches/ebofs/ebofs/Ebofs.cc b/branches/ebofs/ebofs/Ebofs.cc index 8f20f278319cf..5a90db911bb9c 100644 --- a/branches/ebofs/ebofs/Ebofs.cc +++ b/branches/ebofs/ebofs/Ebofs.cc @@ -1841,11 +1841,12 @@ int Ebofs::apply_write(Onode *on, off_t off, off_t len, const bufferlist& bl) blpos += len_in_bh; opos += len_in_bh; - if (bh->partial_is_complete(on->object_size - bh->start()*EBOFS_BLOCK_SIZE)) { + if (bh->is_partial() && + bh->partial_is_complete(on->object_size - bh->start()*EBOFS_BLOCK_SIZE)) { dout(10) << "apply_write completed partial " << *bh << dendl; + bc.bh_cancel_read(bh); // cancel old rx op, if we can. bh->data.clear(); - bh->data.push_back( buffer::create_page_aligned(EBOFS_BLOCK_SIZE*bh->length()) ); - bh->data.zero(); + bh->data.push_back(buffer::create_page_aligned(EBOFS_BLOCK_SIZE)); bh->apply_partial(); bc.mark_dirty(bh); bc.bh_write(on, bh); @@ -2142,8 +2143,7 @@ int Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl, map missing; // read these map rx; // wait for these map partials; // ?? - map corrupt; - oc->map_read(bstart, blen, hits, missing, rx, partials, corrupt); + oc->map_read(bstart, blen, hits, missing, rx, partials); // missing buffers? if (!missing.empty()) { @@ -2159,8 +2159,17 @@ int Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl, return 0; } + // wait on rx? + if (!rx.empty()) { + BufferHead *wait_on = rx.begin()->second; + Context *c = new C_Cond(will_wait_on, will_wait_on_bool); + dout(20) << "attempt_read waiting for read to finish on " << *wait_on << " c " << c << dendl; + block_t b = MAX(wait_on->start(), bstart); + wait_on->waitfor_read[b].push_back(c); + return 0; + } + // are partials sufficient? - bool partials_ok = true; for (map::iterator i = partials.begin(); i != partials.end(); i++) { @@ -2170,33 +2179,20 @@ int Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl, off_t start = MAX( off, bhstart ); off_t end = MIN( off+(off_t)len, bhend ); - if (!i->second->have_partial_range(start-bhstart, end-bhend)) { - if (partials_ok) { - // wait on this one - Context *c = new C_Cond(will_wait_on, will_wait_on_bool); - dout(10) << "attempt_read insufficient partial buffer " << *(i->second) << " c " << c << dendl; - i->second->waitfor_read[i->second->start()].push_back(c); - } - partials_ok = false; + if (!i->second->have_partial_range(start-bhstart, end-bhstart)) { + // wait on this one + Context *c = new C_Cond(will_wait_on, will_wait_on_bool); + dout(10) << "attempt_read insufficient partial buffer " << *(i->second) << " c " << c << dendl; + i->second->waitfor_read[i->second->start()].push_back(c); + return 0; } - } - if (!partials_ok) return 0; - - // wait on rx? - if (!rx.empty()) { - BufferHead *wait_on = rx.begin()->second; - Context *c = new C_Cond(will_wait_on, will_wait_on_bool); - dout(20) << "attempt_read waiting for read to finish on " << *wait_on << " c " << c << dendl; - block_t b = MAX(wait_on->start(), bstart); - wait_on->waitfor_read[b].push_back(c); - return 0; + dout(10) << "attempt_read have partial range " << (start-bhstart) << "~" << (end-bhstart) << " on " << *bh << dendl; } // yay, we have it all! - // concurrently walk thru hits, partials. + // concurrently walk thru hits, partials, corrupt. map::iterator h = hits.begin(); map::iterator p = partials.begin(); - map::iterator c = corrupt.begin(); bl.clear(); off_t pos = off; @@ -2209,9 +2205,6 @@ int Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl, } else if (p != partials.end() && p->first == curblock) { bh = p->second; p++; - } else if (c != corrupt.end() && c->first == curblock) { - bh = c->second; - c++; } else assert(0); off_t bhstart = (off_t)(bh->start()*EBOFS_BLOCK_SIZE); @@ -2230,6 +2223,7 @@ int Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl, } else if (bh->is_partial()) { // copy from a partial block. yuck! bufferlist frag; + dout(10) << "attempt_read copying partial range " << (start-bhstart) << "~" << (end-bhstart) << " on " << *bh << dendl; bh->copy_partial_substr( start-bhstart, end-bhstart, frag ); bl.claim_append( frag ); pos += frag.length(); -- 2.39.5