From e8a919fce18b2d7a42206ce1270ae07342c17409 Mon Sep 17 00:00:00 2001 From: sageweil Date: Wed, 21 Nov 2007 23:31:20 +0000 Subject: [PATCH] csum stabilizing git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@2105 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/ebofs/ebofs/BufferCache.cc | 48 +++++++++++--- branches/ebofs/ebofs/BufferCache.h | 2 + branches/ebofs/ebofs/Ebofs.cc | 98 +++++++++++++++++++++-------- branches/ebofs/ebofs/Onode.h | 12 ++-- branches/ebofs/ebofs/csum.h | 12 ++-- branches/ebofs/ebofs/test.ebofs.cc | 13 ++-- branches/ebofs/include/buffer.h | 22 +++++++ 7 files changed, 156 insertions(+), 51 deletions(-) diff --git a/branches/ebofs/ebofs/BufferCache.cc b/branches/ebofs/ebofs/BufferCache.cc index 10da7510fb46c..57c02a667c295 100644 --- a/branches/ebofs/ebofs/BufferCache.cc +++ b/branches/ebofs/ebofs/BufferCache.cc @@ -538,9 +538,9 @@ int ObjectCache::map_read(block_t start, block_t len, * - break up bufferheads that don't fall completely within the range * - cancel rx ops we obsolete. * - resubmit rx ops if we split bufferheads - * - break over disk extent boundaries + * - cancel obsoleted tx ops * - * - leave potentially obsoleted tx ops alone (for now) + * - DO NOT break over disk extent boundaries */ int ObjectCache::map_write(block_t start, block_t len, map& hits, @@ -579,11 +579,13 @@ int ObjectCache::map_write(block_t start, block_t len, // max for this bh (bc of (re)alloc on disk) block_t max = left; + /* // based on disk extent boundary ... vector exv; on->map_extents(cur, max, exv, 0); if (exv.size() > 1) max = exv[0].length; + */ dout(10) << "map_write " << cur << "~" << max << dendl; @@ -593,8 +595,8 @@ int ObjectCache::map_write(block_t start, block_t len, n->set_start( cur ); n->set_length( max ); bc->add_bh(n); - if (exv[0].start == 0) - n->set_state(BufferHead::STATE_CLEAN); // hole + //if (exv[0].start == 0) + //n->set_state(BufferHead::STATE_CLEAN); // hole hits[cur] = n; left -= max; cur += max; @@ -668,7 +670,8 @@ int ObjectCache::map_write(block_t start, block_t len, } // try to cancel tx? - if (bh->is_tx() && bh->epoch_modified == super_epoch) bc->bh_cancel_write(bh, super_epoch); + if (bh->is_tx() && bh->epoch_modified == super_epoch) + bc->bh_cancel_write(bh, super_epoch); // put in our map hits[cur] = bh; @@ -687,8 +690,8 @@ int ObjectCache::map_write(block_t start, block_t len, BufferHead *n = new BufferHead(this); n->set_start( cur ); n->set_length( glen ); - if (exv[0].start == 0) - n->set_state(BufferHead::STATE_CLEAN); // hole + //if (exv[0].start == 0) + //n->set_state(BufferHead::STATE_CLEAN); // hole bc->add_bh(n); hits[cur] = n; @@ -934,6 +937,37 @@ void ObjectCache::try_merge_bh_right(map::iterator& p) } +void ObjectCache::scrub_csums() +{ + dout(10) << "scrub_csums on " << *this->on << dendl; + int bad = 0; + for (map::iterator p = data.begin(); + p != data.end(); + p++) { + BufferHead *bh = p->second; + if (bh->is_rx() || bh->is_missing()) continue; // nothing to scrub + if (bh->is_clean() && bh->data.length() == 0) continue; // hole. + if (bh->is_clean() || bh->is_tx()) { + for (unsigned i=0; ilength(); i++) { + vector exv; + on->map_extents(bh->start()+i, 1, exv, 0); + assert(exv.size() == 1); + if (exv[0].start == 0) continue; // hole. + csum_t want = *on->get_extent_csum_ptr(bh->start()+i); + csum_t b = calc_csum(&bh->data[i*EBOFS_BLOCK_SIZE], EBOFS_BLOCK_SIZE); + if (b != want) { + dout(0) << "scrub_csums bad data at " << (bh->start()+i) << " have " + << hex << b << " should be " << want << dec + << " in bh " << *bh + << dendl; + bad++; + } + } + } + } + assert(bad == 0); +} + /************** BufferCache ***************/ diff --git a/branches/ebofs/ebofs/BufferCache.h b/branches/ebofs/ebofs/BufferCache.h index 24074b0ac1790..fcddefbdad886 100644 --- a/branches/ebofs/ebofs/BufferCache.h +++ b/branches/ebofs/ebofs/BufferCache.h @@ -377,6 +377,8 @@ class ObjectCache { cout << "dump: " << i->first << ": " << *i->second << std::endl; } + void scrub_csums(); + }; diff --git a/branches/ebofs/ebofs/Ebofs.cc b/branches/ebofs/ebofs/Ebofs.cc index 008a3e57f8430..10aeb286ef80f 100644 --- a/branches/ebofs/ebofs/Ebofs.cc +++ b/branches/ebofs/ebofs/Ebofs.cc @@ -1573,6 +1573,7 @@ void Ebofs::alloc_write(Onode *on, void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) { ObjectCache *oc = on->get_oc(&bc); + oc->scrub_csums(); // map into blocks off_t opos = off; // byte pos in object @@ -1592,6 +1593,30 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) block_t blen = blast-bstart+1; block_t oldlastblock = on->last_block; + + // map b range onto buffer_heads + map hits; + oc->map_write(bstart, blen, hits, super_epoch); + + for (map::iterator i = hits.begin(); + i != hits.end(); + i++) { + BufferHead *bh = i->second; + + if (bh->start() < oldlastblock) { + vector exv; + on->map_extents(bh->start(), bh->length(), exv, 0); + assert(exv.size() >= 1); + if (exv[0].start) continue; // not a hole. + assert(bh->is_missing() || bh->is_clean()); + dout(10) << "apply_write marking old hole clean " << *bh << dendl; + } else { + assert(bh->is_missing()); + dout(10) << "apply_write treating appended bh as a hole " << *bh << dendl; + } + bc.mark_clean(bh); + } + // allocate write on disk. interval_set alloc; block_t old_bfirst = 0; // zero means not defined here (since we ultimately pass to bh_read) @@ -1602,16 +1627,10 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) if (fake_writes) { on->uncommitted.clear(); // worst case! return; - } - - // map b range onto buffer_heads - map hits; - oc->map_write(bstart, blen, hits, super_epoch); + } + // get current versions - //version_t lowv, highv; - //oc->scan_versions(bstart, blen, lowv, highv); - //highv++; version_t highv = ++oc->write_count; // copy from bl into buffer cache @@ -1624,14 +1643,18 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) BufferHead *bh = i->second; bh->set_version(highv); bh->epoch_modified = super_epoch; - - // newly allocated? - if (bh->start() >= oldlastblock) { - assert(bh->is_missing()); - bc.mark_clean(bh); // now a hole - dout(10) << "apply_write treating new (past old last_block) bh as a hole " << *bh << dendl; - } + // break over extent boundary? + vector exv; + on->map_extents(bh->start(), bh->length(), exv, 0); + dout(10) << "apply_write bh " << *bh << " maps to " << exv << dendl; + if (exv.size() > 1) { + dout(10) << "apply_write breaking interior bh " << *bh << " over extent boundary " + << exv[0] << " " << exv[1] << dendl; + BufferHead *right = bc.split(bh, bh->start() + exv[0].length); + hits[right->start()] = right; + } + // old write in progress? if (bh->is_tx()) { // copy the buffer to avoid munging up in-flight write dout(10) << "apply_write tx pending, copying buffer on " << *bh << dendl; @@ -1737,14 +1760,23 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) // copy data into new buffers first (copy on write!) // FIXME: only do the modified pages? this might be a big bh! - bufferlist temp; - temp.claim(bh->data); - //bc.bufferpool.alloc(EBOFS_BLOCK_SIZE*bh->length(), bh->data); + bufferlist oldbl; + oldbl.claim(bh->data); bh->data.push_back( buffer::create_page_aligned(EBOFS_BLOCK_SIZE*bh->length()) ); - if (temp.length()) - bh->data.copy_in(0, bh->length()*EBOFS_BLOCK_SIZE, temp); - else - bh->data.zero(); // was a hole + if (oldbl.length()) { + // had data + if (off_in_bh) + bh->data.copy_in(0, off_in_bh, oldbl); + if (off_in_bh+len_in_bh < bh->data.length()) + bh->data.copy_in(off_in_bh+len_in_bh, bh->data.length()-off_in_bh-len_in_bh, + oldbl.c_str()+off_in_bh+len_in_bh); + } else { + // was a hole + if (off_in_bh) + bh->data.zero(0, off_in_bh); + if (off_in_bh+len_in_bh < bh->data.length()) + bh->data.zero(off_in_bh+len_in_bh, bh->data.length()-off_in_bh-len_in_bh); + } // new data bufferlist sub; @@ -1752,12 +1784,18 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) bh->data.copy_in(off_in_bh, len_in_bh, sub); // update csum - csum_t *csum = on->get_extent_csum_ptr(opos/EBOFS_BLOCK_SIZE); - unsigned blocks = (off_in_bh+len_in_bh+4095)/EBOFS_BLOCK_SIZE - off_in_bh/EBOFS_BLOCK_SIZE; - for (unsigned i=0; iget_extent_csum_ptr(bh->start()+rbfirst); + dout(10) << "calc csum for " << rbfirst << "~" << bnum << dendl; + for (unsigned i=0; idata_csum -= csum[i]; - csum[i] = calc_csum(bh->data.c_str() + i*EBOFS_BLOCK_SIZE, EBOFS_BLOCK_SIZE); + dout(10) << "old csum for " << (i+rbfirst) << " is " << hex << csum[i] << dec << dendl; + csum[i] = calc_csum(&bh->data[i*EBOFS_BLOCK_SIZE], EBOFS_BLOCK_SIZE); + dout(10) << "new csum for " << (i+rbfirst) << " is " << hex << csum[i] << dec << dendl; on->data_csum += csum[i]; + dout(10) << "new data_csum is " << hex << on->data_csum << dec << dendl; } blpos += len_in_bh; @@ -1804,11 +1842,15 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) bufferlist sub; sub.substr_of(bl, blpos, len_in_bh); bh->data.copy_in(0, len_in_bh, sub); + + // zero the past-eof tail, too, to be tidy. + if (len_in_bh < bh->data.length()) + bh->data.zero(len_in_bh, bh->data.length()-len_in_bh); } // fill in csums csum_t *csum = on->get_extent_csum_ptr(bh->start()); - unsigned blocks = len_in_bh / EBOFS_BLOCK_SIZE; + unsigned blocks = (len_in_bh + 4095)/ EBOFS_BLOCK_SIZE; for (unsigned i=0; idata_csum -= csum[i]; csum[i] = calc_csum(bh->data.c_str() + i*EBOFS_BLOCK_SIZE, EBOFS_BLOCK_SIZE); @@ -1837,6 +1879,8 @@ void Ebofs::apply_write(Onode *on, off_t off, size_t len, const bufferlist& bl) assert(left == 0); assert(opos == off+(off_t)len); //assert(blpos == bl.length()); + + oc->scrub_csums(); } diff --git a/branches/ebofs/ebofs/Onode.h b/branches/ebofs/ebofs/Onode.h index 7845ce3b997f2..6bc074c4dbbda 100644 --- a/branches/ebofs/ebofs/Onode.h +++ b/branches/ebofs/ebofs/Onode.h @@ -58,7 +58,10 @@ struct ExtentCsum { } }; inline ostream& operator<<(ostream& out, ExtentCsum &ec) { - return out << ec.ex << '=' << hex << ec.csum << dec; + out << ec.ex; + out << '='; + out << hex << ec.csum << dec; + return out; } class Onode : public LRUObject { @@ -169,12 +172,12 @@ public: csum_t csum = 0; set s; - cout << "verifying. data_csum=" << hex << data_csum << dec << std::endl; + //cout << "verify_extentsing. data_csum=" << hex << data_csum << dec << std::endl; for (map::iterator p = extent_map.begin(); p != extent_map.end(); p++) { - cout << " " << p->first << ": " << p->second << std::endl; + cout << " verify_extents " << p->first << ": " << p->second << std::endl; assert(pos == p->first); pos += p->second.ex.length; if (p->second.ex.start) { @@ -186,7 +189,8 @@ public: } } } - cout << " calculated csum=" << hex << csum << dec << std::endl; + cout << " verify_extents got csum " + << hex << csum << " want " << data_csum << dec << std::endl; assert(s.size() == count); assert(count == alloc_blocks); diff --git a/branches/ebofs/ebofs/csum.h b/branches/ebofs/ebofs/csum.h index a1f48dfda22d0..65f3e4c87c248 100644 --- a/branches/ebofs/ebofs/csum.h +++ b/branches/ebofs/ebofs/csum.h @@ -20,7 +20,7 @@ typedef __u64 csum_t; /* * physically and logically aligned buffer. yay. */ -inline __u64 calc_csum(char *start, int len) { +inline __u64 calc_csum(const char *start, int len) { // must be 64-bit aligned assert(((unsigned long)start & 7) == 0); assert((len & 7) == 0); @@ -38,8 +38,8 @@ inline __u64 calc_csum(char *start, int len) { /* * arbitrarily aligned buffer. buffer alignment must match logical alignment. */ -inline __u64 calc_csum_unaligned(char *start, int len) { - char *end = start + len; +inline __u64 calc_csum_unaligned(const char *start, int len) { + const char *end = start + len; __u64 csum = 0; // front @@ -51,7 +51,7 @@ inline __u64 calc_csum_unaligned(char *start, int len) { return csum; // middle, aligned - char *fastend = end - 7; + const char *fastend = end - 7; while (start < fastend) { csum += *(__u64*)start; start += sizeof(__u64); @@ -69,8 +69,8 @@ inline __u64 calc_csum_unaligned(char *start, int len) { /* * arbitrarily aligned buffer, with arbitrary logical alignment */ -inline __u64 calc_csum_realign(char *start, int len, int off) { - char *end = start + len; +inline __u64 calc_csum_realign(const char *start, int len, int off) { + const char *end = start + len; __u64 csum = 0; if (((unsigned long)start & 7) == (off & 7)) diff --git a/branches/ebofs/ebofs/test.ebofs.cc b/branches/ebofs/ebofs/test.ebofs.cc index 8e49f6b9dd3e0..2b96168b51007 100644 --- a/branches/ebofs/ebofs/test.ebofs.cc +++ b/branches/ebofs/ebofs/test.ebofs.cc @@ -22,7 +22,7 @@ bool stop = false; char fingerprint_byte_at(int pos, int seed) { - __u64 big = ((pos & ~7) / 133) ^ big; + __u64 big = ((pos & ~7) / 133) ^ seed; return ((char*)&big)[pos & 7]; } @@ -32,7 +32,7 @@ class Tester : public Thread { Ebofs &fs; int t; - char b[1024*1024]; + //char b[1024*1024]; public: Tester(Ebofs &e) : fs(e), t(nt) { nt++; } @@ -58,8 +58,7 @@ public: int l = MIN(len,bl.length()); if (l) { cout << t << " got " << l << std::endl; - bl.copy(0, l, b); - char *p = b; + char *p = bl.c_str(); while (l--) { char want = fingerprint_byte_at(off, oid.ino); if (*p != 0 && *p != want) { @@ -76,11 +75,11 @@ public: case 1: { cout << t << " write " << hex << oid << dec << " at " << off << " len " << len << std::endl; - for (int j=0;jmakesib(_len); @@ -642,6 +646,24 @@ public: it++) it->zero(); } + void zero(unsigned o, unsigned l) { + assert(o+l <= _len); + unsigned p = 0; + for (std::list::iterator it = _buffers.begin(); + it != _buffers.end(); + it++) { + if (p + it->length() > o) { + if (p >= o && p+it->length() >= o+l) + it->zero(); // all + else if (p >= o) + it->zero(0, o+l-p); // head + else + it->zero(o-p, it->length()-(o-p)); // tail + } + p += it->length(); + if (o+l >= p) break; // done + } + } // sort-of-like-assignment-op void claim(list& bl) { -- 2.39.5