]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osdc: restart read on truncate/discard 37732/head
authorPatrick Donnelly <pdonnell@redhat.com>
Tue, 20 Oct 2020 19:38:15 +0000 (12:38 -0700)
committerPatrick Donnelly <pdonnell@redhat.com>
Wed, 21 Oct 2020 01:48:02 +0000 (18:48 -0700)
This can be reliably reproduced by reading a large file (~500MB) on a
ceph-fuse mount and then evicting the client.

Now we see in the logs:

2020-10-21T01:46:29.679+0000 7f00beffd700 10 objectcacher.object(10000000000.00000030/head) truncate object[10000000000.00000030/head oset 0x7f00a0006b08 wr 0/0] to 0
2020-10-21T01:46:29.679+0000 7f00beffd700 10 objectcacher.object(10000000000.00000030/head) restarting reads post-truncate
...
2020-10-21T01:46:29.762+0000 7f00bffff700 20 objectcacher.object(10000000000.00000030/head) map_read error bh[ 0x7f0070004e30 1048576~3145728 0x7f008404a720 (0) v 0 error=-108] waiters = {}
2020-10-21T01:46:29.762+0000 7f00bffff700 10 objectcacher readx hit bh bh[ 0x7f0070004e30 1048576~3145728 0x7f008404a720 (0) v 0 error=-108] waiters = {}
2020-10-21T01:46:29.762+0000 7f00bffff700 10 objectcacher readx has all buffers
2020-10-21T01:46:29.762+0000 7f00bffff700 20 objectcacher readx done 0x7f007c00a9b0 -108
...
2020-10-21T01:46:29.763+0000 7f0077fff700  3 client.4497 ll_read 0x7f0080023720 202637312~131072 = -108

Fixes: https://tracker.ceph.com/issues/46434
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
src/osdc/ObjectCacher.cc

index 84b88f2852f06ba6f1999f7a20d4b1aba7e85c92..7522c8258c83fe7dfa8bc9c160b69d0071826fa4 100644 (file)
@@ -566,6 +566,7 @@ void ObjectCacher::Object::truncate(loff_t s)
   ceph_assert(ceph_mutex_is_locked(oc->lock));
   ldout(oc->cct, 10) << "truncate " << *this << " to " << s << dendl;
 
+  std::list<Context*> waiting_for_read;
   while (!data.empty()) {
     BufferHead *bh = data.rbegin()->second;
     if (bh->end() <= s)
@@ -580,11 +581,18 @@ void ObjectCacher::Object::truncate(loff_t s)
 
     // remove bh entirely
     ceph_assert(bh->start() >= s);
-    ceph_assert(bh->waitfor_read.empty());
+    for ([[maybe_unused]] auto& [off, ctxs] : bh->waitfor_read) {
+      waiting_for_read.splice(waiting_for_read.end(), ctxs);
+    }
+    bh->waitfor_read.clear();
     replace_journal_tid(bh, 0);
     oc->bh_remove(this, bh);
     delete bh;
   }
+  if (!waiting_for_read.empty()) {
+    ldout(oc->cct, 10) <<  "restarting reads post-truncate" << dendl;
+  }
+  finish_contexts(oc->cct, waiting_for_read, 0);
 }
 
 void ObjectCacher::Object::discard(loff_t off, loff_t len,
@@ -603,6 +611,7 @@ void ObjectCacher::Object::discard(loff_t off, loff_t len,
     complete = false;
   }
 
+  std::list<Context*> waiting_for_read;
   auto p = data_lower_bound(off);
   while (p != data.end()) {
     BufferHead *bh = p->second;
@@ -640,12 +649,19 @@ void ObjectCacher::Object::discard(loff_t off, loff_t len,
       // we should mark all Rx bh to zero
       continue;
     } else {
-      ceph_assert(bh->waitfor_read.empty());
+      for ([[maybe_unused]] auto& [off, ctxs] : bh->waitfor_read) {
+        waiting_for_read.splice(waiting_for_read.end(), ctxs);
+      }
+      bh->waitfor_read.clear();
     }
 
     oc->bh_remove(this, bh);
     delete bh;
   }
+  if (!waiting_for_read.empty()) {
+    ldout(oc->cct, 10) <<  "restarting reads post-discard" << dendl;
+  }
+  finish_contexts(oc->cct, waiting_for_read, 0); /* restart reads */
 }