]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
Enforce cache size on read requests 2422/head
authorJason Dillaman <dillaman@redhat.com>
Sun, 7 Sep 2014 02:59:40 +0000 (22:59 -0400)
committerJason Dillaman <dillaman@redhat.com>
Sun, 7 Sep 2014 03:04:50 +0000 (23:04 -0400)
In-flight cache reads were not previously counted against
new cache read requests, which could result in very large
cache usage.  This effect is most noticeable when writing
small chunks to a cloned image since each write requires
a full object read from the parent.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
src/osdc/ObjectCacher.cc
src/osdc/ObjectCacher.h

index e7dfe18bb0608781bbf156d850e20a3a8b143d4b..1f58674ec7fadb27ca6d8cb7955e29165f6a3283 100644 (file)
@@ -763,6 +763,9 @@ void ObjectCacher::bh_read_finish(int64_t poolid, sobject_t oid, ceph_tid_t tid,
       loff_t oldpos = opos;
       opos = bh->end();
 
+      ls.splice(ls.end(), waitfor_read);
+      waitfor_read.clear();
+
       if (r == -ENOENT) {
        if (trust_enoent) {
          ldout(cct, 10) << "bh_read_finish removing " << *bh << dendl;
@@ -1110,13 +1113,26 @@ int ObjectCacher::_readx(OSDRead *rd, ObjectSet *oset, Context *onfinish,
       for (map<loff_t, BufferHead*>::iterator bh_it = missing.begin();
            bh_it != missing.end();
            ++bh_it) {
-        bh_read(bh_it->second);
-        if (success && onfinish) {
-          ldout(cct, 10) << "readx missed, waiting on " << *bh_it->second 
-                   << " off " << bh_it->first << dendl;
-         bh_it->second->waitfor_read[bh_it->first].push_back( new C_RetryRead(this, rd, oset, onfinish) );
+        loff_t clean = get_stat_clean() + get_stat_rx() +
+                       bh_it->second->length();
+        if (get_stat_rx() > 0 && static_cast<uint64_t>(clean) > max_size) {
+          // cache is full -- wait for rx's to complete
+          ldout(cct, 10) << "readx missed, waiting on cache to free "
+                         << (clean - max_size) << " bytes" << dendl;
+          if (success) {
+            waitfor_read.push_back(new C_RetryRead(this, rd, oset, onfinish));
+          }
+          bh_remove(o, bh_it->second);
+          delete bh_it->second;
+        } else {
+          bh_read(bh_it->second);
+          if (success && onfinish) {
+            ldout(cct, 10) << "readx missed, waiting on " << *bh_it->second 
+                     << " off " << bh_it->first << dendl;
+           bh_it->second->waitfor_read[bh_it->first].push_back( new C_RetryRead(this, rd, oset, onfinish) );
+          }
+          bytes_not_in_cache += bh_it->second->length();
         }
-        bytes_not_in_cache += bh_it->second->length();
        success = false;
       }
 
index ca16138fa2d00d764b88899a6bfd5ce010c1cfc1..9685ee3838bedf0aa111ce426436f1927eb8b9cd 100644 (file)
@@ -340,6 +340,7 @@ class ObjectCacher {
   void *flush_set_callback_arg;
 
   vector<ceph::unordered_map<sobject_t, Object*> > objects; // indexed by pool_id
+  list<Context*> waitfor_read;
 
   ceph_tid_t last_read_tid;