]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
Added Files:
authorcarlosm <carlosm@29311d96-e01e-0410-9327-a35deaab8ce9>
Tue, 28 Jun 2005 11:26:53 +0000 (11:26 +0000)
committercarlosm <carlosm@29311d96-e01e-0410-9327-a35deaab8ce9>
Tue, 28 Jun 2005 11:26:53 +0000 (11:26 +0000)
Buffercache.cc Buffercache.h

Still missing:
- all forms of flushes
- buffer trimming strategy

git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@354 29311d96-e01e-0410-9327-a35deaab8ce9

ceph/client/Buffercache.cc [new file with mode: 0644]
ceph/client/Buffercache.h [new file with mode: 0644]

diff --git a/ceph/client/Buffercache.cc b/ceph/client/Buffercache.cc
new file mode 100644 (file)
index 0000000..2138687
--- /dev/null
@@ -0,0 +1,250 @@
+#include "Buffercache.h"
+
+// -- Filecache methods
+
+map<off_t, Bufferhead*>::iterator Filecache::overlap(size_t len, off_t off)
+{
+  // returns iterator to buffer overlapping specified extent or end() if no overlap exists
+  map<off_t, Bufferhead*>::iterator it = buffer_map.lower_bound(off);
+  if (it == buffer_map.end() || (*it)->first < off + len) {
+    return it;
+  } else if (it == buffer_map.begin()) {
+    return buffer_map.end();
+  } else {
+    --it;
+    if ((*it)->first + (*it)->second->bl.length() > off) {
+      return it;
+    } else {
+      return buffer_map.end();
+    }
+  }
+}
+
+void Filecache::map_existing(size_t len, 
+                             off_t start_off,
+                             map<off_t, Bufferhead>& hits, 
+                             map<off_t, Bufferhead>& inflight, 
+                             map<off_t, size_t>& holes)
+{
+  off_t need_off = start_off;
+  for (map<off_t, Bufferhead*>::iterator existing = overlap(len, start_off);
+       existing != buffer_map.end() && (*existing)->first < start_off + len;
+       existing++) {
+    off_t actual_off = (*existing)->first;
+    Bufferhead *bh = (*existing)->second;
+    lru.lru_touch(bh);
+    if (actual_off > need_off) {
+      holes[need_off] = (size_t) (actual_off - need_off);
+    }
+    if (bh->state == BUFHD_STATE_INFLIGHT) {
+      inflight[actual_off] = bh;
+    } else {
+      hits[actual_off] = bh;
+    }
+    need_off = actual_off + bh->length();
+  }
+  if (next_off < off + len) {
+    holes[next_off] = (size_t) (off + len - next_off);
+  }
+  // FIXME: consolidate holes
+}
+
+list<Bufferhead*> Filecache::simplify()
+{
+  list<Bufferhead*> removed;
+  map<off_t, Bufferhead*>::iterator start, next;
+  start = buffer_map.begin();
+  while (start != buffer_map.end()) {
+       next = start + 1;
+       while (next != buffer_map.end()) {
+         if ((*start)->second->state != BUFHD_STATE_INFLIGHT &&
+                 (*start)->second->state == (*next)->second->state &&
+                 (*start)->second->offset + (*start)->second->len == (*next)->second->offset) {
+               (*start)->second->claim_append((*next)->second);
+               buffer_map.erase((*next)->first);
+               removed.push_back((*next)->second);
+               next++;
+         } else {
+               break;
+         }
+       }
+       start = next;
+  }
+  return removed;
+}
+
+void Filecache::copy_out(size_t size, off_t offset, char *dst) 
+{
+  assert(offset >= 0);
+  assert(offset + size <= length());
+  
+  map<off_t, Bufferhead*>::iterator curbuf = overlap(size, offset);
+  offset -= (*curbuf)->first;
+  assert(offset >= 0);
+  
+  while (size > 0) {
+    Bufferhead *bh = (*curbuf)->second;
+    if (offset + size <= bh->len) {
+      bh->bl.copy(offset, size, dst);
+      break;
+    }
+    
+    int howmuch = bh->len - offset;
+    bh->bl.copy(offset, howmuch, dst);
+    
+    dst += howmuch;
+    size -= howmuch;
+    offset = 0;
+    curbuf++;
+    assert(curbuf != buffer_map.end());
+  }
+}
+
+// -- Buffercache methods
+
+void Buffercache::insert(Bufferhead *bh) {
+  Filecache *fc;
+  if (bcache_map.count(bh->ino)) {
+    fc = bcache_map[bh->ino];
+  } else {
+    fc = new Filecache();
+    bcache_map[bh->ino] = fc;
+  }
+  if (fc->buffermap.count(bh->offset)) assert(0); // fail loudly if offset already exists!
+  fc->buffer_map[bh->offset] = bh;
+  lru.lru_insert_top(bh);
+  clean_size += bh->len;
+}
+
+void Buffercache::dirty(inodeno_t ino, size_t size, off_t offset, char *src) 
+{
+  Filecache *fc = bcache_map[ino];
+  assert(offset >= 0);
+  assert(offset + size <= fc->length());
+  
+  map<off_t, Bufferhead*>::iterator curbuf = fc->overlap(size, offset);
+  offset -= (*curbuf)->first;
+  assert(offset >= 0);
+  
+  while (size > 0) {
+    Bufferhead *bh = (*curbuf)->second;
+    if (offset + size <= bh->len) {
+      bh->bl.copy_in(offset, size, src); // last bit
+      bh->dirty();
+      break;
+    }
+    
+    int howmuch = bh->len - offset;
+    bh->bl.copy_in(offset, howmuch, src);
+    bh->dirty();    
+    src += howmuch;
+    size -= howmuch;
+    offset = 0;
+    curbuf++;
+    assert(curbuf != buffer_map.end());
+  }
+}
+
+
+size_t Buffercache::touch_continuous(map<off_t, Bufferhead*>& hits, size_t size, off_t offset)
+{
+  off_t next_off;
+  for (map<off_t, Bufferhead*>::iterator curbuf = hits->begin(); 
+       curbuf != hits->end(); 
+       curbuf++) {
+    if (curbuf != hits.begin() &&
+        (*curbuf)->first != next_off) {
+      break;
+    }
+    lru.lru_touch((*curbuf)->second);
+    next_off += (*curbuf)->second->len;
+  }
+  return (next_off - offset) >= size ? size : (next_off - offset);
+}
+
+void Buffercache::simplify(inodeno_t ino)
+{
+  Filecache *fc = bcache_map[ino];
+  list<Bufferhead*> removed = fc->simplify();
+  for (list<Bufferhead*>::iterator it = removed.begin();
+          it != removed.end();
+          it++) {
+       lru.lru_remove(*it);
+       delete *it;
+  }
+}
+
+Bufferhead *Buffercache::alloc_buffers(ino, offset, size)
+{
+  Bufferhead *bh = new Bufferhead(ino, offset, size, this, BUFHD_STATE_CLEAN);
+  clean_size += size;
+  while (size > 0) {
+    if (size <= BUFC_ALLOC_MAXSIZE) {
+         bh->bl.push_back(new buffer(size));
+         break;
+       }
+       bh->bl.push_back(new buffer(BUFC_ALLOC_MAXSIZE));
+       size -= BUFC_ALLOC_MAXSIZE;
+  }
+  return bh;
+}
+
+
+void Buffercache::map_or_alloc(inodeno_t ino, size_t len, off_t off, 
+                               map<off_t, Bufferhead*> *buffers, 
+                               map<off_t, Bufferhead*> *inflight)
+{
+  Filecache *fc = bcache_map[ino];
+  map<off_t, size_t> holes;
+  fc->map_existing(len, off, buffers, inflight, &holes);
+  // stuff buffers into holes
+  for (map<off_t, size_t>::iterator hole = holes.begin();
+       hole != holes.end();
+       hole++) {
+       assert(buffers->count((*hole)->first) == 0);
+    Bufferhead *bh = alloc_buffers(ino, (*hole)->first, (*hole)->second);
+    buffers[(*hole)->first] = bh;
+    insert(bh); //FIXME: for prefetching we will need more flexible allocation
+  }
+  // split buffers
+  // FIXME: not implemented yet
+}
+
+void Buffercache::free_buffers(Bufferhead *bh) 
+{
+  assert(bh->state == BUFH_STATE_CLEAN);
+  assert(bh->lru_is_expirable());
+  bcache_map[bh->ino]->buffer_map.erase(bh->offset);
+  lru.lru_remove(bh);    
+  clean_size -= bh->len;
+  delete bh;
+}
+
+void Buffercache::release_file(inodeno_t ino) 
+{
+  Filecache *fc = bcache_map[ino];
+  for (map<off_t, Bufferhead*>::iterator it = fc->begin();
+       it != fc->end();
+       it++) {
+    free_buffers((*it)->second);    
+  }
+  bcache_map.erase(ino);
+  delete fc;
+}
+
+size_t Buffercache::reclaim(size_t min_size, set<Bufferhead)
+{
+  size_t freed_size = 0;
+  while (freed_size >= target_size) {
+    Bufferhead *bh = (Bufferhead*)lru.lru_expire();
+    if (bh) {
+      assert(bh->state == BUFHD_STATE_CLEAN);
+      freed_size += bh->bl.length();
+      free_buffers(bh);
+    } else {
+      break; // nothing more that can be expired!
+    }
+  }
+  return freed_size;
+}
+
diff --git a/ceph/client/Buffercache.h b/ceph/client/Buffercache.h
new file mode 100644 (file)
index 0000000..62b4ae3
--- /dev/null
@@ -0,0 +1,183 @@
+#ifndef __Buffercache_H
+#define __Buffercache_H
+
+#include "include/buffer.h"
+#include "include/bufferlist.h"
+#include "include/lru.h"
+
+// FIXME: buffer constants
+#define BUFC_ALLOC_MAXSIZE 262144
+
+// Bufferhead states
+#define BUFHD_STATE_CLEAN        1
+#define BUFHD_STATE_DIRTY        2
+#define BUFHD_STATE_INFLIGHT  3
+
+class Buffercache;
+
+class Bufferhead : public LRUObject {
+ public: // FIXME: make more private and write some accessors
+  off_t offset;
+  size_t len;
+  inodeno_t ino;
+  time_t last_written;
+  int state; 
+  bufferlist bl;
+  // read_waiters: threads waiting for reads from the buffer
+  // write_waiters: threads waiting for writes into the buffer
+  list<Cond*> read_waiters, write_waiters;
+  Buffercache *bc;
+  
+  // cons/destructors
+  Bufferhead(inodeno_t ino, off_t off, size_t len, Buffercache *bc, int state=BUFHD_STATE_CLEAN) {
+    this->ino = ino;
+    this->offset = off;
+       this->len = len;
+       this->state = state;
+    this->bc = bc;
+    last_written = time();
+    // buffers are allocated later
+  }
+  
+  ~Bufferhead() {
+    list<bufferptr> bl = bh->bl.buffers();
+    for (list<bufferptr>::iterator it == bl.begin();
+         it != bl.end();
+         it++) {
+      delete *it;
+    }
+  }
+  
+  //Bufferhead(inodeno_t ino, off_t off, size_t len, int state);
+  
+  // ~Bufferhead(); FIXME: need to mesh with allocator scheme
+
+  void add_read_waiter(Cond *cond) {
+    read_waiters->push_back(cond); 
+       lru_pin(); 
+  }
+  
+  void add_write_waiter(Cond *cond) { 
+    write_waiters->push_back(cond); 
+       lru_pin(); 
+  }
+  
+  void wakeup_read_waiters() { 
+    for (list<Cond*>::iterator it = read_waiters.begin();
+                it != read_waiters.end();
+                it++) {
+         (*it)->Signal();
+       }
+    read_waiters.clear(); 
+       if (write_waiters.empty()) lru_unpin(); 
+  }
+  
+  void wakeup_write_waiters() {
+    for (list<Cond*>::iterator it = write_waiters.begin();
+                it != write_waiters.end();
+                it++) {
+         (*it)->Signal();
+       }
+    write_waiters.clear(); 
+       if (read_waiters.empty()) lru_unpin(); 
+  }
+  
+  void miss_start() {
+       assert(state == BUFHD_STATE_CLEAN);
+       state = BUFHD_STATE_INFLIGHT;
+  }
+  
+  void miss_finish() {
+       assert(state == BUFHD_STATE_INFLIGHT);
+       state = BUFHD_STATE_CLEAN;
+       wakeup_read_waiters();
+       wakeup_write_waiters();
+  }
+  
+  void dirty() {
+    if (state == BUFHD_STATE_CLEAN) {
+      state = BUFHD_STATE_DIRTY;
+      bc->dirty_size += bh->len;
+      bc->clean_size -= bh->len;
+      bc->dirty_map[last_written] = this;
+    }    
+  }
+  
+  void flush_start() {
+       assert(state == BUFHD_STATE_DIRTY);
+       state = BUFHD_STATE_INFLIGHT;
+    bc->dirty_size -= len;
+    bc->flush_size += len;
+  }
+  
+  void flush_finish() {
+       assert(state == BUFHD_STATE_INFLIGHT);
+       state = BUFHD_STATE_CLEAN;
+    bc->flush_size -= len;
+    bc->clean_size += len;
+       wakeup_write_waiters(); // readers never wait on flushes
+  }
+  
+  void claim_append(Bufferhead *other) {
+       bl.claim_append(other->bl);
+       len += other->len;
+    if (other->last_written < last_written) last_written = other->last_written;
+       other->bl.clear();
+       other->len = 0;
+  }
+};
+
+class Filecache {
+ public: 
+  map<off_t, Bufferhead*> buffer_map;
+
+  size_t length() {
+    size_t len = 0;
+    for (map<off_t, Bufferhead*>::iterator it = buffer_map.begin();
+         it != buffer_map.end();
+         it++) {
+      len += (*it)->second->len;
+    }
+    return len;
+  }
+
+  map<off_t, Bufferhead*>::iterator overlap(size_t len, off_t off);
+  void copy_out(size_t size, off_t offset, char *dst);    
+  void map_existing(size_t len, off_t start_off, 
+                    map<off_t, Bufferhead*>& hits, inflight,
+                    map<off_t, size_t>& holes);
+  void simplify();
+
+};
+
+class Buffercache { 
+ public:
+  map<inodeno_t, Filecache*> bcache_map;
+  LRU lru;
+  size_t dirty_size = 0, flushing_size = 0, clean_size = 0;
+  map<time_t, Bufferhead*> dirty_map;
+
+  // FIXME: constructor & destructor need to mesh with allocator scheme
+  ~Buffercache() {
+    // FIXME: make sure all buffers are cleaned  and then free them
+    for (map<inodeno_t, Filecache*>::iterator it = bcache_map.begin();
+         it != bcache_map.end();
+         it++) {
+      delete (*it)->second; 
+    }
+  }
+  
+  void insert(Bufferhead *bh);
+  void dirty(inodeno_t ino, size_t size, off_t offset, char *src);
+  void simplify(inodeno_t ino);
+  Bufferhead *alloc_buffers(inodeno_t ino, size_t size, off_t offset, int state);
+  void map_or_alloc(inodeno_t ino, size_t len, off_t off, 
+                    map<off_t, Bufferhead*>& buffers, inflight);
+  void free_buffers(Bufferhead *bh);
+  void release_file(inodeno_t ino);       
+  size_t reclaim(size_t min_size);
+};
+
+     
+#endif
+