]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
Enforce cache size on read requests
authorJason Dillaman <dillaman@redhat.com>
Sun, 7 Sep 2014 02:59:40 +0000 (22:59 -0400)
committerSage Weil <sage@redhat.com>
Fri, 3 Oct 2014 00:50:13 +0000 (17:50 -0700)
In-flight cache reads were not previously counted against
new cache read requests, which could result in very large
cache usage.  This effect is most noticeable when writing
small chunks to a cloned image since each write requires
a full object read from the parent.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
(cherry picked from commit 4fc9fffc494abedac0a9b1ce44706343f18466f1)

src/osdc/ObjectCacher.cc
src/osdc/ObjectCacher.h

index e1499b4148541329c439b309b2fa04ddd438698d..e804674ffe28572db0164fd9c01d2cdb800a9fff 100644 (file)
@@ -764,6 +764,9 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, ceph_tid_t tid,
       loff_t oldpos = opos;
       opos = bh->end();
 
+      ls.splice(ls.end(), waitfor_read);
+      waitfor_read.clear();
+
       if (r == -ENOENT) {
        if (trust_enoent) {
          ldout(cct, 10) << "bh_read_finish removing " << *bh << dendl;
@@ -1111,13 +1114,26 @@ int ObjectCacher::_readx(OSDRead *rd, ObjectSet *oset, Context *onfinish,
       for (map<loff_t, BufferHead*>::iterator bh_it = missing.begin();
            bh_it != missing.end();
            ++bh_it) {
-        bh_read(bh_it->second);
-        if (success && onfinish) {
-          ldout(cct, 10) << "readx missed, waiting on " << *bh_it->second 
-                   << " off " << bh_it->first << dendl;
-         bh_it->second->waitfor_read[bh_it->first].push_back( new C_RetryRead(this, rd, oset, onfinish) );
+        loff_t clean = get_stat_clean() + get_stat_rx() +
+                       bh_it->second->length();
+        if (get_stat_rx() > 0 && static_cast<uint64_t>(clean) > max_size) {
+          // cache is full -- wait for rx's to complete
+          ldout(cct, 10) << "readx missed, waiting on cache to free "
+                         << (clean - max_size) << " bytes" << dendl;
+          if (success) {
+            waitfor_read.push_back(new C_RetryRead(this, rd, oset, onfinish));
+          }
+          bh_remove(o, bh_it->second);
+          delete bh_it->second;
+        } else {
+          bh_read(bh_it->second);
+          if (success && onfinish) {
+            ldout(cct, 10) << "readx missed, waiting on " << *bh_it->second 
+                     << " off " << bh_it->first << dendl;
+           bh_it->second->waitfor_read[bh_it->first].push_back( new C_RetryRead(this, rd, oset, onfinish) );
+          }
+          bytes_not_in_cache += bh_it->second->length();
         }
-        bytes_not_in_cache += bh_it->second->length();
        success = false;
       }
 
index d2aebe984cdf4865590eabf8143e7c7487c45a94..5b93998456ab0e0407ab30854b5ebb053f6baac2 100644 (file)
@@ -340,6 +340,7 @@ class ObjectCacher {
   void *flush_set_callback_arg;
 
   vector<ceph::unordered_map<sobject_t, Object*> > objects; // indexed by pool_id
+  list<Context*> waitfor_read;
 
   ceph_tid_t last_read_tid;