]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
fixed bug with full partial overwrite; cleaned up map_read; fixed bug in attempt_read...
authorsageweil <sageweil@29311d96-e01e-0410-9327-a35deaab8ce9>
Wed, 19 Dec 2007 00:07:29 +0000 (00:07 +0000)
committersageweil <sageweil@29311d96-e01e-0410-9327-a35deaab8ce9>
Wed, 19 Dec 2007 00:07:29 +0000 (00:07 +0000)
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@2226 29311d96-e01e-0410-9327-a35deaab8ce9

branches/ebofs/ebofs/BufferCache.cc
branches/ebofs/ebofs/BufferCache.h
branches/ebofs/ebofs/Ebofs.cc

index cd0b980e5fb001886f9995df5f9fb4ed447bd4e2..d34dd1e2489c21996de6d05353e76d3bc7ab7b53 100644 (file)
@@ -113,18 +113,13 @@ void BufferHead::add_partial(off_t off, bufferlist& p)
 
 void BufferHead::apply_partial() 
 {
-  assert(!partial.empty());
   dout(10) << "apply_partial on " << partial.size() << " substrings" << dendl;
-  csum_t *expect = oc->on->get_extent_csum_ptr(start(), 1);
-  csum_t oldc = calc_csum(data.c_str(), EBOFS_BLOCK_SIZE);
+  assert(!partial.empty());
+  csum_t *p = oc->on->get_extent_csum_ptr(start(), 1);
   do_apply_partial(data, partial);
   csum_t newc = calc_csum(data.c_str(), EBOFS_BLOCK_SIZE);
-  dout(10) << "apply_partial onode expected " << hex << *expect
-          << " bl was " << oldc
-          << " now " << newc << dec << dendl;
-  assert(*expect == oldc);
-  *expect = newc;
-  oc->on->data_csum += newc - oldc;
+  oc->on->data_csum += newc - *p;
+  *p = newc;
 }
 
 
@@ -236,10 +231,13 @@ void ObjectCache::rx_finish(ioh_t ioh, block_t start, block_t length, bufferlist
       assert(exv[0].start != 0);
       block_t cur_block = exv[0].start;
       
+      off_t off_in_bl = (bh->start() - start) * EBOFS_BLOCK_SIZE;
+      assert(off_in_bl >= 0);
+      off_t len_in_bl = bh->length() * EBOFS_BLOCK_SIZE;
+
       // verify csum
-      assert(bl.length() == (unsigned)EBOFS_BLOCK_SIZE);
       csum_t want = *bh->oc->on->get_extent_csum_ptr(bh->start(), 1);
-      csum_t got = calc_csum(bl.c_str(), bl.length());
+      csum_t got = calc_csum(bl.c_str() + off_in_bl, len_in_bl);
       if (want != got) {
        derr(0) << "rx_finish  bad csum on partial readback, want " << hex << want
                << " got " << got << dec << dendl;
@@ -266,7 +264,9 @@ void ObjectCache::rx_finish(ioh_t ioh, block_t start, block_t length, bufferlist
       assert(bh->data.length() == 0);
       bufferptr bp = buffer::create_page_aligned(EBOFS_BLOCK_SIZE);
       bh->data.push_back( bp );
-      bh->data.copy_in(0, EBOFS_BLOCK_SIZE, bl);
+      bufferlist sub;
+      sub.substr_of(bl, off_in_bl, len_in_bl);
+      bh->data.copy_in(0, EBOFS_BLOCK_SIZE, sub);
       bh->apply_partial();
       
       // write "normally"
@@ -479,8 +479,7 @@ int ObjectCache::map_read(block_t start, block_t len,
                           map<block_t, BufferHead*>& hits,
                           map<block_t, BufferHead*>& missing,
                           map<block_t, BufferHead*>& rx,
-                          map<block_t, BufferHead*>& partial,
-                         map<block_t, BufferHead*>& corrupt) {
+                          map<block_t, BufferHead*>& partial) {
   
   map<block_t, BufferHead*>::iterator p = find_bh(start, len);
   block_t cur = start;
@@ -521,15 +520,12 @@ int ObjectCache::map_read(block_t start, block_t len,
       
       if (e->is_clean() ||
           e->is_dirty() ||
-          e->is_tx()) {
+          e->is_tx() ||
+         e->is_corrupt()) {
         hits[cur] = e;     // readable!
         dout(20) << "map_read hit " << *e << dendl;
         bc->touch(e);
       } 
-      else if (e->is_corrupt()) {
-       corrupt[cur] = e;
-       dout(20) << "map_read corrupt " << *e << dendl;
-      }
       else if (e->is_rx()) {
         rx[cur] = e;       // missing, not readable.
         dout(20) << "map_read rx " << *e << dendl;
index c42bbc0fccbd82fb8e69e6e53f4f065060af79ce..ef9979b067a0a32e52b28851510519c544af37f4 100644 (file)
@@ -400,8 +400,7 @@ class ObjectCache {
                map<block_t, BufferHead*>& hits,     // hits
                map<block_t, BufferHead*>& missing,  // read these from disk
                map<block_t, BufferHead*>& rx,       // wait for these to finish reading from disk
-               map<block_t, BufferHead*>& partial,  // (maybe) wait for these to read from disk
-               map<block_t, BufferHead*>& corrupt); // bad checksums
+               map<block_t, BufferHead*>& partial); // (maybe) wait for these to read from disk
   int try_map_read(block_t start, block_t len);  // just tell us how many extents we're missing.
 
   int map_write(block_t start, block_t len,
index 8f20f278319cf48985a424a174490738da3abd07..5a90db911bb9c0b99d3e90639c8da4f72a850f78 100644 (file)
@@ -1841,11 +1841,12 @@ int Ebofs::apply_write(Onode *on, off_t off, off_t len, const bufferlist& bl)
         blpos += len_in_bh;
         opos += len_in_bh;
 
-        if (bh->partial_is_complete(on->object_size - bh->start()*EBOFS_BLOCK_SIZE)) {
+        if (bh->is_partial() &&
+           bh->partial_is_complete(on->object_size - bh->start()*EBOFS_BLOCK_SIZE)) {
           dout(10) << "apply_write  completed partial " << *bh << dendl;
+         bc.bh_cancel_read(bh);           // cancel old rx op, if we can.
          bh->data.clear();
-         bh->data.push_back( buffer::create_page_aligned(EBOFS_BLOCK_SIZE*bh->length()) );
-          bh->data.zero();
+         bh->data.push_back(buffer::create_page_aligned(EBOFS_BLOCK_SIZE));
           bh->apply_partial();
           bc.mark_dirty(bh);
           bc.bh_write(on, bh);
@@ -2142,8 +2143,7 @@ int Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl,
   map<block_t, BufferHead*> missing;  // read these
   map<block_t, BufferHead*> rx;       // wait for these
   map<block_t, BufferHead*> partials;  // ??
-  map<block_t, BufferHead*> corrupt;
-  oc->map_read(bstart, blen, hits, missing, rx, partials, corrupt);
+  oc->map_read(bstart, blen, hits, missing, rx, partials);
 
   // missing buffers?
   if (!missing.empty()) {
@@ -2159,8 +2159,17 @@ int Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl,
     return 0;
   }
   
+  // wait on rx?
+  if (!rx.empty()) {
+    BufferHead *wait_on = rx.begin()->second;
+    Context *c = new C_Cond(will_wait_on, will_wait_on_bool);
+    dout(20) << "attempt_read waiting for read to finish on " << *wait_on << " c " << c << dendl;
+    block_t b = MAX(wait_on->start(), bstart);
+    wait_on->waitfor_read[b].push_back(c);
+    return 0;
+  }
+
   // are partials sufficient?
-  bool partials_ok = true;
   for (map<block_t,BufferHead*>::iterator i = partials.begin();
        i != partials.end();
        i++) {
@@ -2170,33 +2179,20 @@ int Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl,
     off_t start = MAX( off, bhstart );
     off_t end = MIN( off+(off_t)len, bhend );
     
-    if (!i->second->have_partial_range(start-bhstart, end-bhend)) {
-      if (partials_ok) {
-        // wait on this one
-        Context *c = new C_Cond(will_wait_on, will_wait_on_bool);
-        dout(10) << "attempt_read insufficient partial buffer " << *(i->second) << " c " << c << dendl;
-        i->second->waitfor_read[i->second->start()].push_back(c);
-      }
-      partials_ok = false;
+    if (!i->second->have_partial_range(start-bhstart, end-bhstart)) {
+      // wait on this one
+      Context *c = new C_Cond(will_wait_on, will_wait_on_bool);
+      dout(10) << "attempt_read insufficient partial buffer " << *(i->second) << " c " << c << dendl;
+      i->second->waitfor_read[i->second->start()].push_back(c);
+      return 0;
     }
-  }
-  if (!partials_ok) return 0;
-
-  // wait on rx?
-  if (!rx.empty()) {
-    BufferHead *wait_on = rx.begin()->second;
-    Context *c = new C_Cond(will_wait_on, will_wait_on_bool);
-    dout(20) << "attempt_read waiting for read to finish on " << *wait_on << " c " << c << dendl;
-    block_t b = MAX(wait_on->start(), bstart);
-    wait_on->waitfor_read[b].push_back(c);
-    return 0;
+    dout(10) << "attempt_read have partial range " << (start-bhstart) << "~" << (end-bhstart) << " on " << *bh << dendl;
   }
 
   // yay, we have it all!
-  // concurrently walk thru hits, partials.
+  // concurrently walk thru hits, partials, corrupt.
   map<block_t,BufferHead*>::iterator h = hits.begin();
   map<block_t,BufferHead*>::iterator p = partials.begin();
-  map<block_t,BufferHead*>::iterator c = corrupt.begin();
 
   bl.clear();
   off_t pos = off;
@@ -2209,9 +2205,6 @@ int Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl,
     } else if (p != partials.end() && p->first == curblock) {
       bh = p->second;
       p++;
-    } else if (c != corrupt.end() && c->first == curblock) {
-      bh = c->second;
-      c++;
     } else assert(0);
     
     off_t bhstart = (off_t)(bh->start()*EBOFS_BLOCK_SIZE);
@@ -2230,6 +2223,7 @@ int Ebofs::attempt_read(Onode *on, off_t off, size_t len, bufferlist& bl,
     } else if (bh->is_partial()) {
       // copy from a partial block.  yuck!
       bufferlist frag;
+      dout(10) << "attempt_read copying partial range " << (start-bhstart) << "~" << (end-bhstart) << " on " << *bh << dendl;
       bh->copy_partial_substr( start-bhstart, end-bhstart, frag );
       bl.claim_append( frag );
       pos += frag.length();